1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1071 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1072 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1073 1074 %} 1075 1076 1077 //----------SOURCE BLOCK------------------------------------------------------- 1078 // This is a block of C++ code which provides values, functions, and 1079 // definitions necessary in the rest of the architecture description 1080 1081 source_hpp %{ 1082 // Header information of the source block. 1083 // Method declarations/definitions which are used outside 1084 // the ad-scope can conveniently be defined here. 1085 // 1086 // To keep related declarations/definitions/uses close together, 1087 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1088 1089 class NativeJump; 1090 1091 class CallStubImpl { 1092 1093 //-------------------------------------------------------------- 1094 //---< Used for optimization in Compile::shorten_branches >--- 1095 //-------------------------------------------------------------- 1096 1097 public: 1098 // Size of call trampoline stub. 1099 static uint size_call_trampoline() { 1100 return 0; // no call trampolines on this platform 1101 } 1102 1103 // number of relocations needed by a call trampoline stub 1104 static uint reloc_call_trampoline() { 1105 return 0; // no call trampolines on this platform 1106 } 1107 }; 1108 1109 class HandlerImpl { 1110 1111 public: 1112 1113 static int emit_exception_handler(CodeBuffer &cbuf); 1114 static int emit_deopt_handler(CodeBuffer& cbuf); 1115 1116 static uint size_exception_handler() { 1117 // NativeCall instruction size is the same as NativeJump. 1118 // exception handler starts out as jump and can be patched to 1119 // a call be deoptimization. (4932387) 1120 // Note that this value is also credited (in output.cpp) to 1121 // the size of the code section. 1122 return NativeJump::instruction_size; 1123 } 1124 1125 #ifdef _LP64 1126 static uint size_deopt_handler() { 1127 // three 5 byte instructions 1128 return 15; 1129 } 1130 #else 1131 static uint size_deopt_handler() { 1132 // NativeCall instruction size is the same as NativeJump. 1133 // exception handler starts out as jump and can be patched to 1134 // a call be deoptimization. (4932387) 1135 // Note that this value is also credited (in output.cpp) to 1136 // the size of the code section. 1137 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1138 } 1139 #endif 1140 }; 1141 1142 %} // end source_hpp 1143 1144 source %{ 1145 1146 #include "opto/addnode.hpp" 1147 1148 // Emit exception handler code. 1149 // Stuff framesize into a register and call a VM stub routine. 1150 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1151 1152 // Note that the code buffer's insts_mark is always relative to insts. 1153 // That's why we must use the macroassembler to generate a handler. 1154 MacroAssembler _masm(&cbuf); 1155 address base = __ start_a_stub(size_exception_handler()); 1156 if (base == NULL) { 1157 ciEnv::current()->record_failure("CodeCache is full"); 1158 return 0; // CodeBuffer::expand failed 1159 } 1160 int offset = __ offset(); 1161 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1162 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1163 __ end_a_stub(); 1164 return offset; 1165 } 1166 1167 // Emit deopt handler code. 1168 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1169 1170 // Note that the code buffer's insts_mark is always relative to insts. 1171 // That's why we must use the macroassembler to generate a handler. 1172 MacroAssembler _masm(&cbuf); 1173 address base = __ start_a_stub(size_deopt_handler()); 1174 if (base == NULL) { 1175 ciEnv::current()->record_failure("CodeCache is full"); 1176 return 0; // CodeBuffer::expand failed 1177 } 1178 int offset = __ offset(); 1179 1180 #ifdef _LP64 1181 address the_pc = (address) __ pc(); 1182 Label next; 1183 // push a "the_pc" on the stack without destroying any registers 1184 // as they all may be live. 1185 1186 // push address of "next" 1187 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1188 __ bind(next); 1189 // adjust it so it matches "the_pc" 1190 __ subptr(Address(rsp, 0), __ offset() - offset); 1191 #else 1192 InternalAddress here(__ pc()); 1193 __ pushptr(here.addr()); 1194 #endif 1195 1196 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1197 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1198 __ end_a_stub(); 1199 return offset; 1200 } 1201 1202 1203 //============================================================================= 1204 1205 // Float masks come from different places depending on platform. 1206 #ifdef _LP64 1207 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1208 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1209 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1210 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1211 static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } 1212 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } 1213 static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } 1214 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } 1215 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1216 static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); } 1217 static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); } 1218 static address vector_byte_saturationmask() { return StubRoutines::x86::vector_byte_saturation_mask(); } 1219 #else 1220 static address float_signmask() { return (address)float_signmask_pool; } 1221 static address float_signflip() { return (address)float_signflip_pool; } 1222 static address double_signmask() { return (address)double_signmask_pool; } 1223 static address double_signflip() { return (address)double_signflip_pool; } 1224 #endif 1225 1226 1227 const bool Matcher::match_rule_supported(int opcode) { 1228 if (!has_match_rule(opcode)) 1229 return false; 1230 1231 bool ret_value = true; 1232 switch (opcode) { 1233 case Op_PopCountI: 1234 case Op_PopCountL: 1235 if (!UsePopCountInstruction) 1236 ret_value = false; 1237 break; 1238 case Op_MulVB: 1239 case Op_MulVI: 1240 case Op_MulVL: 1241 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1242 ret_value = false; 1243 break; 1244 case Op_MulReductionVL: 1245 if (VM_Version::supports_avx512dq() == false) 1246 ret_value = false; 1247 break; 1248 case Op_AddReductionVL: 1249 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1250 ret_value = false; 1251 break; 1252 case Op_AddReductionVI: 1253 if (UseSSE < 3) // requires at least SSE3 1254 ret_value = false; 1255 break; 1256 case Op_MulReductionVI: 1257 if (UseSSE < 4) // requires at least SSE4 1258 ret_value = false; 1259 break; 1260 case Op_AddReductionVF: 1261 case Op_AddReductionVD: 1262 case Op_MulReductionVF: 1263 case Op_MulReductionVD: 1264 if (UseSSE < 1) // requires at least SSE 1265 ret_value = false; 1266 break; 1267 case Op_SqrtVD: 1268 case Op_SqrtVF: 1269 if (UseAVX < 1) // enabled for AVX only 1270 ret_value = false; 1271 break; 1272 case Op_CompareAndSwapL: 1273 #ifdef _LP64 1274 case Op_CompareAndSwapP: 1275 #endif 1276 if (!VM_Version::supports_cx8()) 1277 ret_value = false; 1278 break; 1279 case Op_CMoveVF: 1280 case Op_CMoveVD: 1281 if (UseAVX < 1 || UseAVX > 2) 1282 ret_value = false; 1283 break; 1284 case Op_StrIndexOf: 1285 if (!UseSSE42Intrinsics) 1286 ret_value = false; 1287 break; 1288 case Op_StrIndexOfChar: 1289 if (!UseSSE42Intrinsics) 1290 ret_value = false; 1291 break; 1292 case Op_OnSpinWait: 1293 if (VM_Version::supports_on_spin_wait() == false) 1294 ret_value = false; 1295 break; 1296 } 1297 1298 return ret_value; // Per default match rules are supported. 1299 } 1300 1301 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1302 // identify extra cases that we might want to provide match rules for 1303 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1304 bool ret_value = match_rule_supported(opcode); 1305 if (ret_value) { 1306 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1307 if (!vector_size_supported(bt, vlen)) { 1308 ret_value = false; 1309 } else if (size_in_bits > 256 && UseAVX <= 2) { 1310 // Only AVX512 supports 512-bit vectors 1311 ret_value = false; 1312 } else if (UseAVX == 0 && size_in_bits > 128) { 1313 // Only AVX supports 256-bit vectors 1314 ret_value = false; 1315 } else if (is_subword_type(bt) && size_in_bits == 512 && VM_Version::supports_avx512bw() == false) { 1316 // Byte and Short types are not supported in AVX512 if AVX512BW is not true. 1317 ret_value = false; 1318 } else { 1319 switch (opcode) { 1320 case Op_AddVB: 1321 case Op_SubVB: 1322 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1323 ret_value = false; 1324 break; 1325 case Op_URShiftVS: 1326 case Op_RShiftVS: 1327 case Op_LShiftVS: 1328 case Op_MulVS: 1329 case Op_AddVS: 1330 case Op_SubVS: 1331 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1332 ret_value = false; 1333 break; 1334 case Op_CMoveVF: 1335 if (vlen != 8) 1336 ret_value = false; 1337 break; 1338 case Op_CMoveVD: 1339 if (vlen != 4) 1340 ret_value = false; 1341 break; 1342 case Op_VectorMaskCmp: 1343 if (UseAVX <= 0) { ret_value = false; } 1344 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1345 break; 1346 case Op_VectorBlend: 1347 if (UseSSE <= 3 && UseAVX == 0) { ret_value = false; } 1348 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1349 break; 1350 case Op_VectorTest: 1351 if (UseAVX <= 0) { ret_value = false; } 1352 else if (size_in_bits != 128 && size_in_bits != 256) { ret_value = false; } // Implementation limitation 1353 break; 1354 case Op_VectorLoadMask: 1355 if (UseSSE <= 3) { ret_value = false; } 1356 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1357 break; 1358 case Op_VectorStoreMask: 1359 if (UseAVX < 2) { ret_value = false; } // Implementation limitation 1360 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1361 else if (size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } // Implementation limitation 1362 break; 1363 default: 1364 break; 1365 } 1366 } 1367 } 1368 if (ret_value) { 1369 assert(is_java_primitive(bt) && (vlen > 0) && is_power_of_2(vlen) && 1370 vector_size_supported(bt, vlen), "must be supported"); 1371 } 1372 1373 return ret_value; // Per default match rules are supported. 1374 } 1375 1376 const bool Matcher::has_predicated_vectors(void) { 1377 bool ret_value = false; 1378 if (UseAVX > 2) { 1379 ret_value = VM_Version::supports_avx512vl(); 1380 } 1381 1382 return ret_value; 1383 } 1384 1385 const int Matcher::float_pressure(int default_pressure_threshold) { 1386 int float_pressure_threshold = default_pressure_threshold; 1387 #ifdef _LP64 1388 if (UseAVX > 2) { 1389 // Increase pressure threshold on machines with AVX3 which have 1390 // 2x more XMM registers. 1391 float_pressure_threshold = default_pressure_threshold * 2; 1392 } 1393 #endif 1394 return float_pressure_threshold; 1395 } 1396 1397 // Max vector size in bytes. 0 if not supported. 1398 const int Matcher::vector_width_in_bytes(BasicType bt) { 1399 assert(is_java_primitive(bt), "only primitive type vectors"); 1400 if (UseSSE < 2) return 0; 1401 // SSE2 supports 128bit vectors for all types. 1402 // AVX2 supports 256bit vectors for all types. 1403 // AVX2/EVEX supports 512bit vectors for all types. 1404 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1405 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1406 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1407 size = (UseAVX > 2) ? 64 : 32; 1408 // Use flag to limit vector size. 1409 size = MIN2(size,(int)MaxVectorSize); 1410 // Minimum 2 values in vector (or 4 for bytes). 1411 switch (bt) { 1412 case T_DOUBLE: 1413 case T_LONG: 1414 if (size < 16) return 0; 1415 break; 1416 case T_FLOAT: 1417 case T_INT: 1418 if (size < 8) return 0; 1419 break; 1420 case T_BOOLEAN: 1421 if (size < 4) return 0; 1422 break; 1423 case T_CHAR: 1424 if (size < 4) return 0; 1425 break; 1426 case T_BYTE: 1427 if (size < 4) return 0; 1428 break; 1429 case T_SHORT: 1430 if (size < 4) return 0; 1431 break; 1432 default: 1433 ShouldNotReachHere(); 1434 } 1435 return size; 1436 } 1437 1438 // Limits on vector size (number of elements) loaded into vector. 1439 const int Matcher::max_vector_size(const BasicType bt) { 1440 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1441 } 1442 const int Matcher::min_vector_size(const BasicType bt) { 1443 int max_size = max_vector_size(bt); 1444 // Min size which can be loaded into vector is 4 bytes. 1445 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1446 return MIN2(size,max_size); 1447 } 1448 1449 // Vector ideal reg corresponding to specidied size in bytes 1450 const uint Matcher::vector_ideal_reg(int size) { 1451 assert(MaxVectorSize >= size, ""); 1452 switch(size) { 1453 case 4: return Op_VecS; 1454 case 8: return Op_VecD; 1455 case 16: return Op_VecX; 1456 case 32: return Op_VecY; 1457 case 64: return Op_VecZ; 1458 } 1459 ShouldNotReachHere(); 1460 return 0; 1461 } 1462 1463 // Only lowest bits of xmm reg are used for vector shift count. 1464 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1465 return Op_VecS; 1466 } 1467 1468 // x86 supports misaligned vectors store/load. 1469 const bool Matcher::misaligned_vectors_ok() { 1470 return !AlignVector; // can be changed by flag 1471 } 1472 1473 // x86 AES instructions are compatible with SunJCE expanded 1474 // keys, hence we do not need to pass the original key to stubs 1475 const bool Matcher::pass_original_key_for_aes() { 1476 return false; 1477 } 1478 1479 1480 const bool Matcher::convi2l_type_required = true; 1481 1482 // Check for shift by small constant as well 1483 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1484 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1485 shift->in(2)->get_int() <= 3 && 1486 // Are there other uses besides address expressions? 1487 !matcher->is_visited(shift)) { 1488 address_visited.set(shift->_idx); // Flag as address_visited 1489 mstack.push(shift->in(2), Matcher::Visit); 1490 Node *conv = shift->in(1); 1491 #ifdef _LP64 1492 // Allow Matcher to match the rule which bypass 1493 // ConvI2L operation for an array index on LP64 1494 // if the index value is positive. 1495 if (conv->Opcode() == Op_ConvI2L && 1496 conv->as_Type()->type()->is_long()->_lo >= 0 && 1497 // Are there other uses besides address expressions? 1498 !matcher->is_visited(conv)) { 1499 address_visited.set(conv->_idx); // Flag as address_visited 1500 mstack.push(conv->in(1), Matcher::Pre_Visit); 1501 } else 1502 #endif 1503 mstack.push(conv, Matcher::Pre_Visit); 1504 return true; 1505 } 1506 return false; 1507 } 1508 1509 // Should the Matcher clone shifts on addressing modes, expecting them 1510 // to be subsumed into complex addressing expressions or compute them 1511 // into registers? 1512 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1513 Node *off = m->in(AddPNode::Offset); 1514 if (off->is_Con()) { 1515 address_visited.test_set(m->_idx); // Flag as address_visited 1516 Node *adr = m->in(AddPNode::Address); 1517 1518 // Intel can handle 2 adds in addressing mode 1519 // AtomicAdd is not an addressing expression. 1520 // Cheap to find it by looking for screwy base. 1521 if (adr->is_AddP() && 1522 !adr->in(AddPNode::Base)->is_top() && 1523 // Are there other uses besides address expressions? 1524 !is_visited(adr)) { 1525 address_visited.set(adr->_idx); // Flag as address_visited 1526 Node *shift = adr->in(AddPNode::Offset); 1527 if (!clone_shift(shift, this, mstack, address_visited)) { 1528 mstack.push(shift, Pre_Visit); 1529 } 1530 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1531 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1532 } else { 1533 mstack.push(adr, Pre_Visit); 1534 } 1535 1536 // Clone X+offset as it also folds into most addressing expressions 1537 mstack.push(off, Visit); 1538 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1539 return true; 1540 } else if (clone_shift(off, this, mstack, address_visited)) { 1541 address_visited.test_set(m->_idx); // Flag as address_visited 1542 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1543 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1544 return true; 1545 } 1546 return false; 1547 } 1548 1549 void Compile::reshape_address(AddPNode* addp) { 1550 } 1551 1552 // Helper methods for MachSpillCopyNode::implementation(). 1553 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1554 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1555 // In 64-bit VM size calculation is very complex. Emitting instructions 1556 // into scratch buffer is used to get size in 64-bit VM. 1557 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1558 assert(ireg == Op_VecS || // 32bit vector 1559 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1560 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1561 "no non-adjacent vector moves" ); 1562 if (cbuf) { 1563 MacroAssembler _masm(cbuf); 1564 int offset = __ offset(); 1565 switch (ireg) { 1566 case Op_VecS: // copy whole register 1567 case Op_VecD: 1568 case Op_VecX: 1569 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1570 break; 1571 case Op_VecY: 1572 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1573 break; 1574 case Op_VecZ: 1575 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1576 break; 1577 default: 1578 ShouldNotReachHere(); 1579 } 1580 int size = __ offset() - offset; 1581 #ifdef ASSERT 1582 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1583 assert(!do_size || size == 4, "incorrect size calculattion"); 1584 #endif 1585 return size; 1586 #ifndef PRODUCT 1587 } else if (!do_size) { 1588 switch (ireg) { 1589 case Op_VecS: 1590 case Op_VecD: 1591 case Op_VecX: 1592 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1593 break; 1594 case Op_VecY: 1595 case Op_VecZ: 1596 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1597 break; 1598 default: 1599 ShouldNotReachHere(); 1600 } 1601 #endif 1602 } 1603 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1604 return (UseAVX > 2) ? 6 : 4; 1605 } 1606 1607 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1608 int stack_offset, int reg, uint ireg, outputStream* st) { 1609 // In 64-bit VM size calculation is very complex. Emitting instructions 1610 // into scratch buffer is used to get size in 64-bit VM. 1611 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1612 if (cbuf) { 1613 MacroAssembler _masm(cbuf); 1614 int offset = __ offset(); 1615 if (is_load) { 1616 switch (ireg) { 1617 case Op_VecS: 1618 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1619 break; 1620 case Op_VecD: 1621 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1622 break; 1623 case Op_VecX: 1624 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1625 break; 1626 case Op_VecY: 1627 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1628 break; 1629 case Op_VecZ: 1630 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1631 break; 1632 default: 1633 ShouldNotReachHere(); 1634 } 1635 } else { // store 1636 switch (ireg) { 1637 case Op_VecS: 1638 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1639 break; 1640 case Op_VecD: 1641 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1642 break; 1643 case Op_VecX: 1644 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1645 break; 1646 case Op_VecY: 1647 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1648 break; 1649 case Op_VecZ: 1650 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1651 break; 1652 default: 1653 ShouldNotReachHere(); 1654 } 1655 } 1656 int size = __ offset() - offset; 1657 #ifdef ASSERT 1658 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1659 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1660 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1661 #endif 1662 return size; 1663 #ifndef PRODUCT 1664 } else if (!do_size) { 1665 if (is_load) { 1666 switch (ireg) { 1667 case Op_VecS: 1668 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1669 break; 1670 case Op_VecD: 1671 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1672 break; 1673 case Op_VecX: 1674 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1675 break; 1676 case Op_VecY: 1677 case Op_VecZ: 1678 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1679 break; 1680 default: 1681 ShouldNotReachHere(); 1682 } 1683 } else { // store 1684 switch (ireg) { 1685 case Op_VecS: 1686 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1687 break; 1688 case Op_VecD: 1689 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1690 break; 1691 case Op_VecX: 1692 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1693 break; 1694 case Op_VecY: 1695 case Op_VecZ: 1696 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1697 break; 1698 default: 1699 ShouldNotReachHere(); 1700 } 1701 } 1702 #endif 1703 } 1704 bool is_single_byte = false; 1705 int vec_len = 0; 1706 if ((UseAVX > 2) && (stack_offset != 0)) { 1707 int tuple_type = Assembler::EVEX_FVM; 1708 int input_size = Assembler::EVEX_32bit; 1709 switch (ireg) { 1710 case Op_VecS: 1711 tuple_type = Assembler::EVEX_T1S; 1712 break; 1713 case Op_VecD: 1714 tuple_type = Assembler::EVEX_T1S; 1715 input_size = Assembler::EVEX_64bit; 1716 break; 1717 case Op_VecX: 1718 break; 1719 case Op_VecY: 1720 vec_len = 1; 1721 break; 1722 case Op_VecZ: 1723 vec_len = 2; 1724 break; 1725 } 1726 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1727 } 1728 int offset_size = 0; 1729 int size = 5; 1730 if (UseAVX > 2 ) { 1731 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1732 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1733 size += 2; // Need an additional two bytes for EVEX encoding 1734 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1735 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1736 } else { 1737 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1738 size += 2; // Need an additional two bytes for EVEX encodding 1739 } 1740 } else { 1741 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1742 } 1743 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1744 return size+offset_size; 1745 } 1746 1747 static inline jint replicate4_imm(int con, int width) { 1748 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1749 assert(width == 1 || width == 2, "only byte or short types here"); 1750 int bit_width = width * 8; 1751 jint val = con; 1752 val &= (1 << bit_width) - 1; // mask off sign bits 1753 while(bit_width < 32) { 1754 val |= (val << bit_width); 1755 bit_width <<= 1; 1756 } 1757 return val; 1758 } 1759 1760 static inline jlong replicate8_imm(int con, int width) { 1761 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1762 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1763 int bit_width = width * 8; 1764 jlong val = con; 1765 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1766 while(bit_width < 64) { 1767 val |= (val << bit_width); 1768 bit_width <<= 1; 1769 } 1770 return val; 1771 } 1772 1773 #ifndef PRODUCT 1774 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1775 st->print("nop \t# %d bytes pad for loops and calls", _count); 1776 } 1777 #endif 1778 1779 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1780 MacroAssembler _masm(&cbuf); 1781 __ nop(_count); 1782 } 1783 1784 uint MachNopNode::size(PhaseRegAlloc*) const { 1785 return _count; 1786 } 1787 1788 #ifndef PRODUCT 1789 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1790 st->print("# breakpoint"); 1791 } 1792 #endif 1793 1794 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1795 MacroAssembler _masm(&cbuf); 1796 __ int3(); 1797 } 1798 1799 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1800 return MachNode::size(ra_); 1801 } 1802 1803 %} 1804 1805 encode %{ 1806 1807 enc_class call_epilog %{ 1808 if (VerifyStackAtCalls) { 1809 // Check that stack depth is unchanged: find majik cookie on stack 1810 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1811 MacroAssembler _masm(&cbuf); 1812 Label L; 1813 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1814 __ jccb(Assembler::equal, L); 1815 // Die if stack mismatch 1816 __ int3(); 1817 __ bind(L); 1818 } 1819 %} 1820 1821 %} 1822 1823 1824 //----------OPERANDS----------------------------------------------------------- 1825 // Operand definitions must precede instruction definitions for correct parsing 1826 // in the ADLC because operands constitute user defined types which are used in 1827 // instruction definitions. 1828 1829 // This one generically applies only for evex, so only one version 1830 operand vecZ() %{ 1831 constraint(ALLOC_IN_RC(vectorz_reg)); 1832 match(VecZ); 1833 1834 format %{ %} 1835 interface(REG_INTER); 1836 %} 1837 1838 operand rxmm0() %{ 1839 Â Â constraint(ALLOC_IN_RC(xmm0_reg)); match(VecX); 1840 Â Â predicate((UseSSE > 0) && (UseAVX == 0)); format%{%} interface(REG_INTER); 1841 %} 1842 1843 // Comparison Code for FP conditional move 1844 operand cmpOp_vcmppd() %{ 1845 match(Bool); 1846 1847 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1848 n->as_Bool()->_test._test != BoolTest::no_overflow); 1849 format %{ "" %} 1850 interface(COND_INTER) %{ 1851 equal (0x0, "eq"); 1852 less (0x1, "lt"); 1853 less_equal (0x2, "le"); 1854 not_equal (0xC, "ne"); 1855 greater_equal(0xD, "ge"); 1856 greater (0xE, "gt"); 1857 //TODO cannot compile (adlc breaks) without two next lines with error: 1858 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1859 // equal' for overflow. 1860 overflow (0x20, "o"); // not really supported by the instruction 1861 no_overflow (0x21, "no"); // not really supported by the instruction 1862 %} 1863 %} 1864 1865 1866 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1867 1868 // ============================================================================ 1869 1870 instruct ShouldNotReachHere() %{ 1871 match(Halt); 1872 format %{ "ud2\t# ShouldNotReachHere" %} 1873 ins_encode %{ 1874 __ ud2(); 1875 %} 1876 ins_pipe(pipe_slow); 1877 %} 1878 1879 // =================================EVEX special=============================== 1880 1881 instruct setMask(rRegI dst, rRegI src) %{ 1882 predicate(Matcher::has_predicated_vectors()); 1883 match(Set dst (SetVectMaskI src)); 1884 effect(TEMP dst); 1885 format %{ "setvectmask $dst, $src" %} 1886 ins_encode %{ 1887 __ setvectmask($dst$$Register, $src$$Register); 1888 %} 1889 ins_pipe(pipe_slow); 1890 %} 1891 1892 // ============================================================================ 1893 1894 instruct addF_reg(regF dst, regF src) %{ 1895 predicate((UseSSE>=1) && (UseAVX == 0)); 1896 match(Set dst (AddF dst src)); 1897 1898 format %{ "addss $dst, $src" %} 1899 ins_cost(150); 1900 ins_encode %{ 1901 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1902 %} 1903 ins_pipe(pipe_slow); 1904 %} 1905 1906 instruct addF_mem(regF dst, memory src) %{ 1907 predicate((UseSSE>=1) && (UseAVX == 0)); 1908 match(Set dst (AddF dst (LoadF src))); 1909 1910 format %{ "addss $dst, $src" %} 1911 ins_cost(150); 1912 ins_encode %{ 1913 __ addss($dst$$XMMRegister, $src$$Address); 1914 %} 1915 ins_pipe(pipe_slow); 1916 %} 1917 1918 instruct addF_imm(regF dst, immF con) %{ 1919 predicate((UseSSE>=1) && (UseAVX == 0)); 1920 match(Set dst (AddF dst con)); 1921 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1922 ins_cost(150); 1923 ins_encode %{ 1924 __ addss($dst$$XMMRegister, $constantaddress($con)); 1925 %} 1926 ins_pipe(pipe_slow); 1927 %} 1928 1929 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1930 predicate(UseAVX > 0); 1931 match(Set dst (AddF src1 src2)); 1932 1933 format %{ "vaddss $dst, $src1, $src2" %} 1934 ins_cost(150); 1935 ins_encode %{ 1936 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1937 %} 1938 ins_pipe(pipe_slow); 1939 %} 1940 1941 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1942 predicate(UseAVX > 0); 1943 match(Set dst (AddF src1 (LoadF src2))); 1944 1945 format %{ "vaddss $dst, $src1, $src2" %} 1946 ins_cost(150); 1947 ins_encode %{ 1948 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1949 %} 1950 ins_pipe(pipe_slow); 1951 %} 1952 1953 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1954 predicate(UseAVX > 0); 1955 match(Set dst (AddF src con)); 1956 1957 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1958 ins_cost(150); 1959 ins_encode %{ 1960 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1961 %} 1962 ins_pipe(pipe_slow); 1963 %} 1964 1965 instruct addD_reg(regD dst, regD src) %{ 1966 predicate((UseSSE>=2) && (UseAVX == 0)); 1967 match(Set dst (AddD dst src)); 1968 1969 format %{ "addsd $dst, $src" %} 1970 ins_cost(150); 1971 ins_encode %{ 1972 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1973 %} 1974 ins_pipe(pipe_slow); 1975 %} 1976 1977 instruct addD_mem(regD dst, memory src) %{ 1978 predicate((UseSSE>=2) && (UseAVX == 0)); 1979 match(Set dst (AddD dst (LoadD src))); 1980 1981 format %{ "addsd $dst, $src" %} 1982 ins_cost(150); 1983 ins_encode %{ 1984 __ addsd($dst$$XMMRegister, $src$$Address); 1985 %} 1986 ins_pipe(pipe_slow); 1987 %} 1988 1989 instruct addD_imm(regD dst, immD con) %{ 1990 predicate((UseSSE>=2) && (UseAVX == 0)); 1991 match(Set dst (AddD dst con)); 1992 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1993 ins_cost(150); 1994 ins_encode %{ 1995 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1996 %} 1997 ins_pipe(pipe_slow); 1998 %} 1999 2000 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2001 predicate(UseAVX > 0); 2002 match(Set dst (AddD src1 src2)); 2003 2004 format %{ "vaddsd $dst, $src1, $src2" %} 2005 ins_cost(150); 2006 ins_encode %{ 2007 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2008 %} 2009 ins_pipe(pipe_slow); 2010 %} 2011 2012 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2013 predicate(UseAVX > 0); 2014 match(Set dst (AddD src1 (LoadD src2))); 2015 2016 format %{ "vaddsd $dst, $src1, $src2" %} 2017 ins_cost(150); 2018 ins_encode %{ 2019 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2020 %} 2021 ins_pipe(pipe_slow); 2022 %} 2023 2024 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2025 predicate(UseAVX > 0); 2026 match(Set dst (AddD src con)); 2027 2028 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2029 ins_cost(150); 2030 ins_encode %{ 2031 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2032 %} 2033 ins_pipe(pipe_slow); 2034 %} 2035 2036 instruct subF_reg(regF dst, regF src) %{ 2037 predicate((UseSSE>=1) && (UseAVX == 0)); 2038 match(Set dst (SubF dst src)); 2039 2040 format %{ "subss $dst, $src" %} 2041 ins_cost(150); 2042 ins_encode %{ 2043 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2044 %} 2045 ins_pipe(pipe_slow); 2046 %} 2047 2048 instruct subF_mem(regF dst, memory src) %{ 2049 predicate((UseSSE>=1) && (UseAVX == 0)); 2050 match(Set dst (SubF dst (LoadF src))); 2051 2052 format %{ "subss $dst, $src" %} 2053 ins_cost(150); 2054 ins_encode %{ 2055 __ subss($dst$$XMMRegister, $src$$Address); 2056 %} 2057 ins_pipe(pipe_slow); 2058 %} 2059 2060 instruct subF_imm(regF dst, immF con) %{ 2061 predicate((UseSSE>=1) && (UseAVX == 0)); 2062 match(Set dst (SubF dst con)); 2063 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2064 ins_cost(150); 2065 ins_encode %{ 2066 __ subss($dst$$XMMRegister, $constantaddress($con)); 2067 %} 2068 ins_pipe(pipe_slow); 2069 %} 2070 2071 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2072 predicate(UseAVX > 0); 2073 match(Set dst (SubF src1 src2)); 2074 2075 format %{ "vsubss $dst, $src1, $src2" %} 2076 ins_cost(150); 2077 ins_encode %{ 2078 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2079 %} 2080 ins_pipe(pipe_slow); 2081 %} 2082 2083 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2084 predicate(UseAVX > 0); 2085 match(Set dst (SubF src1 (LoadF src2))); 2086 2087 format %{ "vsubss $dst, $src1, $src2" %} 2088 ins_cost(150); 2089 ins_encode %{ 2090 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2091 %} 2092 ins_pipe(pipe_slow); 2093 %} 2094 2095 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2096 predicate(UseAVX > 0); 2097 match(Set dst (SubF src con)); 2098 2099 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2100 ins_cost(150); 2101 ins_encode %{ 2102 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2103 %} 2104 ins_pipe(pipe_slow); 2105 %} 2106 2107 instruct subD_reg(regD dst, regD src) %{ 2108 predicate((UseSSE>=2) && (UseAVX == 0)); 2109 match(Set dst (SubD dst src)); 2110 2111 format %{ "subsd $dst, $src" %} 2112 ins_cost(150); 2113 ins_encode %{ 2114 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2115 %} 2116 ins_pipe(pipe_slow); 2117 %} 2118 2119 instruct subD_mem(regD dst, memory src) %{ 2120 predicate((UseSSE>=2) && (UseAVX == 0)); 2121 match(Set dst (SubD dst (LoadD src))); 2122 2123 format %{ "subsd $dst, $src" %} 2124 ins_cost(150); 2125 ins_encode %{ 2126 __ subsd($dst$$XMMRegister, $src$$Address); 2127 %} 2128 ins_pipe(pipe_slow); 2129 %} 2130 2131 instruct subD_imm(regD dst, immD con) %{ 2132 predicate((UseSSE>=2) && (UseAVX == 0)); 2133 match(Set dst (SubD dst con)); 2134 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2135 ins_cost(150); 2136 ins_encode %{ 2137 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2138 %} 2139 ins_pipe(pipe_slow); 2140 %} 2141 2142 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2143 predicate(UseAVX > 0); 2144 match(Set dst (SubD src1 src2)); 2145 2146 format %{ "vsubsd $dst, $src1, $src2" %} 2147 ins_cost(150); 2148 ins_encode %{ 2149 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2150 %} 2151 ins_pipe(pipe_slow); 2152 %} 2153 2154 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2155 predicate(UseAVX > 0); 2156 match(Set dst (SubD src1 (LoadD src2))); 2157 2158 format %{ "vsubsd $dst, $src1, $src2" %} 2159 ins_cost(150); 2160 ins_encode %{ 2161 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2162 %} 2163 ins_pipe(pipe_slow); 2164 %} 2165 2166 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2167 predicate(UseAVX > 0); 2168 match(Set dst (SubD src con)); 2169 2170 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2171 ins_cost(150); 2172 ins_encode %{ 2173 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2174 %} 2175 ins_pipe(pipe_slow); 2176 %} 2177 2178 instruct mulF_reg(regF dst, regF src) %{ 2179 predicate((UseSSE>=1) && (UseAVX == 0)); 2180 match(Set dst (MulF dst src)); 2181 2182 format %{ "mulss $dst, $src" %} 2183 ins_cost(150); 2184 ins_encode %{ 2185 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2186 %} 2187 ins_pipe(pipe_slow); 2188 %} 2189 2190 instruct mulF_mem(regF dst, memory src) %{ 2191 predicate((UseSSE>=1) && (UseAVX == 0)); 2192 match(Set dst (MulF dst (LoadF src))); 2193 2194 format %{ "mulss $dst, $src" %} 2195 ins_cost(150); 2196 ins_encode %{ 2197 __ mulss($dst$$XMMRegister, $src$$Address); 2198 %} 2199 ins_pipe(pipe_slow); 2200 %} 2201 2202 instruct mulF_imm(regF dst, immF con) %{ 2203 predicate((UseSSE>=1) && (UseAVX == 0)); 2204 match(Set dst (MulF dst con)); 2205 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2206 ins_cost(150); 2207 ins_encode %{ 2208 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2209 %} 2210 ins_pipe(pipe_slow); 2211 %} 2212 2213 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2214 predicate(UseAVX > 0); 2215 match(Set dst (MulF src1 src2)); 2216 2217 format %{ "vmulss $dst, $src1, $src2" %} 2218 ins_cost(150); 2219 ins_encode %{ 2220 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2221 %} 2222 ins_pipe(pipe_slow); 2223 %} 2224 2225 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2226 predicate(UseAVX > 0); 2227 match(Set dst (MulF src1 (LoadF src2))); 2228 2229 format %{ "vmulss $dst, $src1, $src2" %} 2230 ins_cost(150); 2231 ins_encode %{ 2232 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2233 %} 2234 ins_pipe(pipe_slow); 2235 %} 2236 2237 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2238 predicate(UseAVX > 0); 2239 match(Set dst (MulF src con)); 2240 2241 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2242 ins_cost(150); 2243 ins_encode %{ 2244 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2245 %} 2246 ins_pipe(pipe_slow); 2247 %} 2248 2249 instruct mulD_reg(regD dst, regD src) %{ 2250 predicate((UseSSE>=2) && (UseAVX == 0)); 2251 match(Set dst (MulD dst src)); 2252 2253 format %{ "mulsd $dst, $src" %} 2254 ins_cost(150); 2255 ins_encode %{ 2256 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2257 %} 2258 ins_pipe(pipe_slow); 2259 %} 2260 2261 instruct mulD_mem(regD dst, memory src) %{ 2262 predicate((UseSSE>=2) && (UseAVX == 0)); 2263 match(Set dst (MulD dst (LoadD src))); 2264 2265 format %{ "mulsd $dst, $src" %} 2266 ins_cost(150); 2267 ins_encode %{ 2268 __ mulsd($dst$$XMMRegister, $src$$Address); 2269 %} 2270 ins_pipe(pipe_slow); 2271 %} 2272 2273 instruct mulD_imm(regD dst, immD con) %{ 2274 predicate((UseSSE>=2) && (UseAVX == 0)); 2275 match(Set dst (MulD dst con)); 2276 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2277 ins_cost(150); 2278 ins_encode %{ 2279 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2280 %} 2281 ins_pipe(pipe_slow); 2282 %} 2283 2284 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2285 predicate(UseAVX > 0); 2286 match(Set dst (MulD src1 src2)); 2287 2288 format %{ "vmulsd $dst, $src1, $src2" %} 2289 ins_cost(150); 2290 ins_encode %{ 2291 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2292 %} 2293 ins_pipe(pipe_slow); 2294 %} 2295 2296 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2297 predicate(UseAVX > 0); 2298 match(Set dst (MulD src1 (LoadD src2))); 2299 2300 format %{ "vmulsd $dst, $src1, $src2" %} 2301 ins_cost(150); 2302 ins_encode %{ 2303 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2304 %} 2305 ins_pipe(pipe_slow); 2306 %} 2307 2308 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2309 predicate(UseAVX > 0); 2310 match(Set dst (MulD src con)); 2311 2312 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2313 ins_cost(150); 2314 ins_encode %{ 2315 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2316 %} 2317 ins_pipe(pipe_slow); 2318 %} 2319 2320 instruct divF_reg(regF dst, regF src) %{ 2321 predicate((UseSSE>=1) && (UseAVX == 0)); 2322 match(Set dst (DivF dst src)); 2323 2324 format %{ "divss $dst, $src" %} 2325 ins_cost(150); 2326 ins_encode %{ 2327 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2328 %} 2329 ins_pipe(pipe_slow); 2330 %} 2331 2332 instruct divF_mem(regF dst, memory src) %{ 2333 predicate((UseSSE>=1) && (UseAVX == 0)); 2334 match(Set dst (DivF dst (LoadF src))); 2335 2336 format %{ "divss $dst, $src" %} 2337 ins_cost(150); 2338 ins_encode %{ 2339 __ divss($dst$$XMMRegister, $src$$Address); 2340 %} 2341 ins_pipe(pipe_slow); 2342 %} 2343 2344 instruct divF_imm(regF dst, immF con) %{ 2345 predicate((UseSSE>=1) && (UseAVX == 0)); 2346 match(Set dst (DivF dst con)); 2347 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2348 ins_cost(150); 2349 ins_encode %{ 2350 __ divss($dst$$XMMRegister, $constantaddress($con)); 2351 %} 2352 ins_pipe(pipe_slow); 2353 %} 2354 2355 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2356 predicate(UseAVX > 0); 2357 match(Set dst (DivF src1 src2)); 2358 2359 format %{ "vdivss $dst, $src1, $src2" %} 2360 ins_cost(150); 2361 ins_encode %{ 2362 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2363 %} 2364 ins_pipe(pipe_slow); 2365 %} 2366 2367 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2368 predicate(UseAVX > 0); 2369 match(Set dst (DivF src1 (LoadF src2))); 2370 2371 format %{ "vdivss $dst, $src1, $src2" %} 2372 ins_cost(150); 2373 ins_encode %{ 2374 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2375 %} 2376 ins_pipe(pipe_slow); 2377 %} 2378 2379 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2380 predicate(UseAVX > 0); 2381 match(Set dst (DivF src con)); 2382 2383 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2384 ins_cost(150); 2385 ins_encode %{ 2386 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2387 %} 2388 ins_pipe(pipe_slow); 2389 %} 2390 2391 instruct divD_reg(regD dst, regD src) %{ 2392 predicate((UseSSE>=2) && (UseAVX == 0)); 2393 match(Set dst (DivD dst src)); 2394 2395 format %{ "divsd $dst, $src" %} 2396 ins_cost(150); 2397 ins_encode %{ 2398 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2399 %} 2400 ins_pipe(pipe_slow); 2401 %} 2402 2403 instruct divD_mem(regD dst, memory src) %{ 2404 predicate((UseSSE>=2) && (UseAVX == 0)); 2405 match(Set dst (DivD dst (LoadD src))); 2406 2407 format %{ "divsd $dst, $src" %} 2408 ins_cost(150); 2409 ins_encode %{ 2410 __ divsd($dst$$XMMRegister, $src$$Address); 2411 %} 2412 ins_pipe(pipe_slow); 2413 %} 2414 2415 instruct divD_imm(regD dst, immD con) %{ 2416 predicate((UseSSE>=2) && (UseAVX == 0)); 2417 match(Set dst (DivD dst con)); 2418 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2419 ins_cost(150); 2420 ins_encode %{ 2421 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2422 %} 2423 ins_pipe(pipe_slow); 2424 %} 2425 2426 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2427 predicate(UseAVX > 0); 2428 match(Set dst (DivD src1 src2)); 2429 2430 format %{ "vdivsd $dst, $src1, $src2" %} 2431 ins_cost(150); 2432 ins_encode %{ 2433 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2434 %} 2435 ins_pipe(pipe_slow); 2436 %} 2437 2438 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2439 predicate(UseAVX > 0); 2440 match(Set dst (DivD src1 (LoadD src2))); 2441 2442 format %{ "vdivsd $dst, $src1, $src2" %} 2443 ins_cost(150); 2444 ins_encode %{ 2445 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2446 %} 2447 ins_pipe(pipe_slow); 2448 %} 2449 2450 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2451 predicate(UseAVX > 0); 2452 match(Set dst (DivD src con)); 2453 2454 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2455 ins_cost(150); 2456 ins_encode %{ 2457 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2458 %} 2459 ins_pipe(pipe_slow); 2460 %} 2461 2462 instruct absF_reg(regF dst) %{ 2463 predicate((UseSSE>=1) && (UseAVX == 0)); 2464 match(Set dst (AbsF dst)); 2465 ins_cost(150); 2466 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2467 ins_encode %{ 2468 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2469 %} 2470 ins_pipe(pipe_slow); 2471 %} 2472 2473 instruct absF_reg_reg(regF dst, regF src) %{ 2474 predicate(VM_Version::supports_avxonly()); 2475 match(Set dst (AbsF src)); 2476 ins_cost(150); 2477 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2478 ins_encode %{ 2479 int vector_len = 0; 2480 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2481 ExternalAddress(float_signmask()), vector_len); 2482 %} 2483 ins_pipe(pipe_slow); 2484 %} 2485 2486 #ifdef _LP64 2487 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2488 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2489 match(Set dst (AbsF src)); 2490 ins_cost(150); 2491 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2492 ins_encode %{ 2493 int vector_len = 0; 2494 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2495 ExternalAddress(float_signmask()), vector_len); 2496 %} 2497 ins_pipe(pipe_slow); 2498 %} 2499 2500 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2501 predicate(VM_Version::supports_avx512novl()); 2502 match(Set dst (AbsF src1)); 2503 effect(TEMP src2); 2504 ins_cost(150); 2505 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2506 ins_encode %{ 2507 int vector_len = 0; 2508 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2509 ExternalAddress(float_signmask()), vector_len); 2510 %} 2511 ins_pipe(pipe_slow); 2512 %} 2513 #else // _LP64 2514 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2515 predicate(UseAVX > 2); 2516 match(Set dst (AbsF src)); 2517 ins_cost(150); 2518 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2519 ins_encode %{ 2520 int vector_len = 0; 2521 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2522 ExternalAddress(float_signmask()), vector_len); 2523 %} 2524 ins_pipe(pipe_slow); 2525 %} 2526 #endif 2527 2528 instruct absD_reg(regD dst) %{ 2529 predicate((UseSSE>=2) && (UseAVX == 0)); 2530 match(Set dst (AbsD dst)); 2531 ins_cost(150); 2532 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2533 "# abs double by sign masking" %} 2534 ins_encode %{ 2535 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2536 %} 2537 ins_pipe(pipe_slow); 2538 %} 2539 2540 instruct absD_reg_reg(regD dst, regD src) %{ 2541 predicate(VM_Version::supports_avxonly()); 2542 match(Set dst (AbsD src)); 2543 ins_cost(150); 2544 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2545 "# abs double by sign masking" %} 2546 ins_encode %{ 2547 int vector_len = 0; 2548 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2549 ExternalAddress(double_signmask()), vector_len); 2550 %} 2551 ins_pipe(pipe_slow); 2552 %} 2553 2554 #ifdef _LP64 2555 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2556 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2557 match(Set dst (AbsD src)); 2558 ins_cost(150); 2559 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2560 "# abs double by sign masking" %} 2561 ins_encode %{ 2562 int vector_len = 0; 2563 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2564 ExternalAddress(double_signmask()), vector_len); 2565 %} 2566 ins_pipe(pipe_slow); 2567 %} 2568 2569 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2570 predicate(VM_Version::supports_avx512novl()); 2571 match(Set dst (AbsD src1)); 2572 effect(TEMP src2); 2573 ins_cost(150); 2574 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2575 ins_encode %{ 2576 int vector_len = 0; 2577 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2578 ExternalAddress(double_signmask()), vector_len); 2579 %} 2580 ins_pipe(pipe_slow); 2581 %} 2582 #else // _LP64 2583 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2584 predicate(UseAVX > 2); 2585 match(Set dst (AbsD src)); 2586 ins_cost(150); 2587 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2588 "# abs double by sign masking" %} 2589 ins_encode %{ 2590 int vector_len = 0; 2591 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2592 ExternalAddress(double_signmask()), vector_len); 2593 %} 2594 ins_pipe(pipe_slow); 2595 %} 2596 #endif 2597 2598 instruct negF_reg(regF dst) %{ 2599 predicate((UseSSE>=1) && (UseAVX == 0)); 2600 match(Set dst (NegF dst)); 2601 ins_cost(150); 2602 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2603 ins_encode %{ 2604 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2605 %} 2606 ins_pipe(pipe_slow); 2607 %} 2608 2609 instruct negF_reg_reg(regF dst, regF src) %{ 2610 predicate(UseAVX > 0); 2611 match(Set dst (NegF src)); 2612 ins_cost(150); 2613 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2614 ins_encode %{ 2615 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2616 ExternalAddress(float_signflip())); 2617 %} 2618 ins_pipe(pipe_slow); 2619 %} 2620 2621 instruct negD_reg(regD dst) %{ 2622 predicate((UseSSE>=2) && (UseAVX == 0)); 2623 match(Set dst (NegD dst)); 2624 ins_cost(150); 2625 format %{ "xorpd $dst, [0x8000000000000000]\t" 2626 "# neg double by sign flipping" %} 2627 ins_encode %{ 2628 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2629 %} 2630 ins_pipe(pipe_slow); 2631 %} 2632 2633 instruct negD_reg_reg(regD dst, regD src) %{ 2634 predicate(UseAVX > 0); 2635 match(Set dst (NegD src)); 2636 ins_cost(150); 2637 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2638 "# neg double by sign flipping" %} 2639 ins_encode %{ 2640 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2641 ExternalAddress(double_signflip())); 2642 %} 2643 ins_pipe(pipe_slow); 2644 %} 2645 2646 instruct sqrtF_reg(regF dst, regF src) %{ 2647 predicate(UseSSE>=1); 2648 match(Set dst (SqrtF src)); 2649 2650 format %{ "sqrtss $dst, $src" %} 2651 ins_cost(150); 2652 ins_encode %{ 2653 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2654 %} 2655 ins_pipe(pipe_slow); 2656 %} 2657 2658 instruct sqrtF_mem(regF dst, memory src) %{ 2659 predicate(UseSSE>=1); 2660 match(Set dst (SqrtF (LoadF src))); 2661 2662 format %{ "sqrtss $dst, $src" %} 2663 ins_cost(150); 2664 ins_encode %{ 2665 __ sqrtss($dst$$XMMRegister, $src$$Address); 2666 %} 2667 ins_pipe(pipe_slow); 2668 %} 2669 2670 instruct sqrtF_imm(regF dst, immF con) %{ 2671 predicate(UseSSE>=1); 2672 match(Set dst (SqrtF con)); 2673 2674 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2675 ins_cost(150); 2676 ins_encode %{ 2677 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2678 %} 2679 ins_pipe(pipe_slow); 2680 %} 2681 2682 instruct sqrtD_reg(regD dst, regD src) %{ 2683 predicate(UseSSE>=2); 2684 match(Set dst (SqrtD src)); 2685 2686 format %{ "sqrtsd $dst, $src" %} 2687 ins_cost(150); 2688 ins_encode %{ 2689 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2690 %} 2691 ins_pipe(pipe_slow); 2692 %} 2693 2694 instruct sqrtD_mem(regD dst, memory src) %{ 2695 predicate(UseSSE>=2); 2696 match(Set dst (SqrtD (LoadD src))); 2697 2698 format %{ "sqrtsd $dst, $src" %} 2699 ins_cost(150); 2700 ins_encode %{ 2701 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2702 %} 2703 ins_pipe(pipe_slow); 2704 %} 2705 2706 instruct sqrtD_imm(regD dst, immD con) %{ 2707 predicate(UseSSE>=2); 2708 match(Set dst (SqrtD con)); 2709 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2710 ins_cost(150); 2711 ins_encode %{ 2712 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2713 %} 2714 ins_pipe(pipe_slow); 2715 %} 2716 2717 instruct onspinwait() %{ 2718 match(OnSpinWait); 2719 ins_cost(200); 2720 2721 format %{ 2722 $$template 2723 if (os::is_MP()) { 2724 $$emit$$"pause\t! membar_onspinwait" 2725 } else { 2726 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2727 } 2728 %} 2729 ins_encode %{ 2730 __ pause(); 2731 %} 2732 ins_pipe(pipe_slow); 2733 %} 2734 2735 // a * b + c 2736 instruct fmaD_reg(regD a, regD b, regD c) %{ 2737 predicate(UseFMA); 2738 match(Set c (FmaD c (Binary a b))); 2739 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2740 ins_cost(150); 2741 ins_encode %{ 2742 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2743 %} 2744 ins_pipe( pipe_slow ); 2745 %} 2746 2747 // a * b + c 2748 instruct fmaF_reg(regF a, regF b, regF c) %{ 2749 predicate(UseFMA); 2750 match(Set c (FmaF c (Binary a b))); 2751 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2752 ins_cost(150); 2753 ins_encode %{ 2754 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2755 %} 2756 ins_pipe( pipe_slow ); 2757 %} 2758 2759 // ====================VECTOR INSTRUCTIONS===================================== 2760 2761 instruct reinterpretS(vecS dst) %{ 2762 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2763 match(Set dst (VectorReinterpret dst)); 2764 ins_cost(125); 2765 format %{ " # reinterpret $dst" %} 2766 ins_encode %{ 2767 // empty 2768 %} 2769 ins_pipe( pipe_slow ); 2770 %} 2771 2772 instruct reinterpretS2D(vecD dst, vecS src) %{ 2773 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2774 match(Set dst (VectorReinterpret src)); 2775 ins_cost(125); 2776 effect(TEMP dst); 2777 format %{ " # reinterpret $dst,$src" %} 2778 ins_encode %{ 2779 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2780 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2781 %} 2782 ins_pipe( pipe_slow ); 2783 %} 2784 2785 instruct reinterpretS2X(vecX dst, vecS src) %{ 2786 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2787 match(Set dst (VectorReinterpret src)); 2788 ins_cost(125); 2789 effect(TEMP dst); 2790 format %{ " # reinterpret $dst,$src" %} 2791 ins_encode %{ 2792 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2793 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2794 %} 2795 ins_pipe( pipe_slow ); 2796 %} 2797 2798 instruct reinterpretS2Y(vecY dst, vecS src) %{ 2799 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2800 match(Set dst (VectorReinterpret src)); 2801 ins_cost(125); 2802 effect(TEMP dst); 2803 format %{ " # reinterpret $dst,$src" %} 2804 ins_encode %{ 2805 int vector_len = 1; 2806 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2807 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2808 %} 2809 ins_pipe( pipe_slow ); 2810 %} 2811 2812 instruct reinterpretS2Z(vecZ dst, vecS src) %{ 2813 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2814 match(Set dst (VectorReinterpret src)); 2815 ins_cost(125); 2816 effect(TEMP dst); 2817 format %{ " # reinterpret $dst,$src" %} 2818 ins_encode %{ 2819 int vector_len = 2; 2820 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2821 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2822 %} 2823 ins_pipe( pipe_slow ); 2824 %} 2825 2826 instruct reinterpretD2S(vecS dst, vecD src) %{ 2827 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2828 match(Set dst (VectorReinterpret src)); 2829 ins_cost(125); 2830 format %{ " # reinterpret $dst,$src" %} 2831 ins_encode %{ 2832 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2833 %} 2834 ins_pipe( pipe_slow ); 2835 %} 2836 2837 instruct reinterpretD(vecD dst) %{ 2838 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2839 match(Set dst (VectorReinterpret dst)); 2840 ins_cost(125); 2841 format %{ " # reinterpret $dst" %} 2842 ins_encode %{ 2843 // empty 2844 %} 2845 ins_pipe( pipe_slow ); 2846 %} 2847 2848 instruct reinterpretD2X(vecX dst, vecD src) %{ 2849 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2850 match(Set dst (VectorReinterpret src)); 2851 ins_cost(125); 2852 effect(TEMP dst); 2853 format %{ " # reinterpret $dst,$src" %} 2854 ins_encode %{ 2855 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2856 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2857 %} 2858 ins_pipe( pipe_slow ); 2859 %} 2860 2861 instruct reinterpretD2Y(vecY dst, vecD src) %{ 2862 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2863 match(Set dst (VectorReinterpret src)); 2864 ins_cost(125); 2865 effect(TEMP dst); 2866 format %{ " # reinterpret $dst,$src" %} 2867 ins_encode %{ 2868 int vector_len = 1; 2869 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2870 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2871 %} 2872 ins_pipe( pipe_slow ); 2873 %} 2874 2875 instruct reinterpretD2Z(vecZ dst, vecD src) %{ 2876 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2877 match(Set dst (VectorReinterpret src)); 2878 ins_cost(125); 2879 effect(TEMP dst); 2880 format %{ " # reinterpret $dst,$src" %} 2881 ins_encode %{ 2882 int vector_len = 2; 2883 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2884 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2885 %} 2886 ins_pipe( pipe_slow ); 2887 %} 2888 2889 instruct reinterpretX2S(vecS dst, vecX src) %{ 2890 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2891 match(Set dst (VectorReinterpret src)); 2892 ins_cost(125); 2893 format %{ " # reinterpret $dst,$src" %} 2894 ins_encode %{ 2895 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2896 %} 2897 ins_pipe( pipe_slow ); 2898 %} 2899 2900 instruct reinterpretX2D(vecD dst, vecX src) %{ 2901 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2902 match(Set dst (VectorReinterpret src)); 2903 ins_cost(125); 2904 format %{ " # reinterpret $dst,$src" %} 2905 ins_encode %{ 2906 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2907 %} 2908 ins_pipe( pipe_slow ); 2909 %} 2910 2911 instruct reinterpretX(vecX dst) %{ 2912 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2913 match(Set dst (VectorReinterpret dst)); 2914 ins_cost(125); 2915 format %{ " # reinterpret $dst" %} 2916 ins_encode %{ 2917 // empty 2918 %} 2919 ins_pipe( pipe_slow ); 2920 %} 2921 2922 instruct reinterpretX2Y(vecY dst, vecX src) %{ 2923 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2924 match(Set dst (VectorReinterpret src)); 2925 ins_cost(125); 2926 effect(TEMP dst); 2927 format %{ " # reinterpret $dst,$src" %} 2928 ins_encode %{ 2929 int vector_len = 1; 2930 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2931 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2932 %} 2933 ins_pipe( pipe_slow ); 2934 %} 2935 2936 instruct reinterpretX2Z(vecZ dst, vecX src) %{ 2937 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2938 match(Set dst (VectorReinterpret src)); 2939 ins_cost(125); 2940 effect(TEMP dst); 2941 format %{ " # reinterpret $dst,$src" %} 2942 ins_encode %{ 2943 int vector_len = 2; 2944 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2945 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2946 %} 2947 ins_pipe( pipe_slow ); 2948 %} 2949 2950 instruct reinterpretY2S(vecS dst, vecY src) %{ 2951 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 2952 match(Set dst (VectorReinterpret src)); 2953 ins_cost(125); 2954 format %{ " # reinterpret $dst,$src" %} 2955 ins_encode %{ 2956 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2957 %} 2958 ins_pipe( pipe_slow ); 2959 %} 2960 2961 instruct reinterpretY2D(vecD dst, vecY src) %{ 2962 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 2963 match(Set dst (VectorReinterpret src)); 2964 ins_cost(125); 2965 format %{ " # reinterpret $dst,$src" %} 2966 ins_encode %{ 2967 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2968 %} 2969 ins_pipe( pipe_slow ); 2970 %} 2971 2972 instruct reinterpretY2X(vecX dst, vecY src) %{ 2973 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 2974 match(Set dst (VectorReinterpret src)); 2975 ins_cost(125); 2976 format %{ " # reinterpret $dst,$src" %} 2977 ins_encode %{ 2978 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2979 %} 2980 ins_pipe( pipe_slow ); 2981 %} 2982 2983 instruct reinterpretY(vecY dst) %{ 2984 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 2985 match(Set dst (VectorReinterpret dst)); 2986 ins_cost(125); 2987 format %{ " # reinterpret $dst" %} 2988 ins_encode %{ 2989 // empty 2990 %} 2991 ins_pipe( pipe_slow ); 2992 %} 2993 2994 instruct reinterpretY2Z(vecZ dst, vecY src) %{ 2995 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 2996 match(Set dst (VectorReinterpret src)); 2997 ins_cost(125); 2998 effect(TEMP dst); 2999 format %{ " # reinterpret $dst,$src" %} 3000 ins_encode %{ 3001 int vector_len = 2; 3002 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3003 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3004 %} 3005 ins_pipe( pipe_slow ); 3006 %} 3007 3008 instruct reinterpretZ2S(vecS dst, vecZ src) %{ 3009 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3010 match(Set dst (VectorReinterpret src)); 3011 ins_cost(125); 3012 format %{ " # reinterpret $dst,$src" %} 3013 ins_encode %{ 3014 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3015 %} 3016 ins_pipe( pipe_slow ); 3017 %} 3018 3019 instruct reinterpretZ2D(vecD dst, vecZ src) %{ 3020 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3021 match(Set dst (VectorReinterpret src)); 3022 ins_cost(125); 3023 format %{ " # reinterpret $dst,$src" %} 3024 ins_encode %{ 3025 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3026 %} 3027 ins_pipe( pipe_slow ); 3028 %} 3029 3030 instruct reinterpretZ2X(vecX dst, vecZ src) %{ 3031 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3032 match(Set dst (VectorReinterpret src)); 3033 ins_cost(125); 3034 format %{ " # reinterpret $dst,$src" %} 3035 ins_encode %{ 3036 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3037 %} 3038 ins_pipe( pipe_slow ); 3039 %} 3040 3041 instruct reinterpretZ2Y(vecY dst, vecZ src) %{ 3042 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3043 match(Set dst (VectorReinterpret src)); 3044 ins_cost(125); 3045 format %{ " # reinterpret $dst,$src" %} 3046 ins_encode %{ 3047 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3048 %} 3049 ins_pipe( pipe_slow ); 3050 %} 3051 3052 instruct reinterpretZ(vecZ dst) %{ 3053 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3054 match(Set dst (VectorReinterpret dst)); 3055 ins_cost(125); 3056 format %{ " # reinterpret $dst" %} 3057 ins_encode %{ 3058 // empty 3059 %} 3060 ins_pipe( pipe_slow ); 3061 %} 3062 3063 // ========== 3064 3065 // Load vectors (1 byte long) 3066 instruct loadV1(vecS dst, memory mem, rRegI tmp) %{ 3067 predicate(n->as_LoadVector()->memory_size() == 1); 3068 match(Set dst (LoadVector mem)); 3069 ins_cost(125); 3070 effect(TEMP tmp); 3071 format %{ "movzbl $tmp,$mem\n\t" 3072 "movd $dst,$tmp\t! load vector (1 byte)" %} 3073 ins_encode %{ 3074 __ movzbl($tmp$$Register, $mem$$Address); 3075 __ movdl($dst$$XMMRegister, $tmp$$Register); 3076 %} 3077 ins_pipe( pipe_slow ); 3078 %} 3079 3080 // Load vectors (2 bytes long) 3081 instruct loadV2(vecS dst, memory mem, rRegI tmp) %{ 3082 predicate(n->as_LoadVector()->memory_size() == 2); 3083 match(Set dst (LoadVector mem)); 3084 ins_cost(125); 3085 effect(TEMP tmp); 3086 format %{ "movzwl $tmp,$mem\n\t" 3087 "movd $dst,$tmp\t! load vector (2 bytes)" %} 3088 ins_encode %{ 3089 __ movzwl($tmp$$Register, $mem$$Address); 3090 __ movdl($dst$$XMMRegister, $tmp$$Register); 3091 %} 3092 ins_pipe( pipe_slow ); 3093 %} 3094 3095 // Load vectors (4 bytes long) 3096 instruct loadV4(vecS dst, memory mem) %{ 3097 predicate(n->as_LoadVector()->memory_size() == 4); 3098 match(Set dst (LoadVector mem)); 3099 ins_cost(125); 3100 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3101 ins_encode %{ 3102 __ movdl($dst$$XMMRegister, $mem$$Address); 3103 %} 3104 ins_pipe( pipe_slow ); 3105 %} 3106 3107 // Load vectors (8 bytes long) 3108 instruct loadV8(vecD dst, memory mem) %{ 3109 predicate(n->as_LoadVector()->memory_size() == 8); 3110 match(Set dst (LoadVector mem)); 3111 ins_cost(125); 3112 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3113 ins_encode %{ 3114 __ movq($dst$$XMMRegister, $mem$$Address); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 // Load vectors (16 bytes long) 3120 instruct loadV16(vecX dst, memory mem) %{ 3121 predicate(n->as_LoadVector()->memory_size() == 16); 3122 match(Set dst (LoadVector mem)); 3123 ins_cost(125); 3124 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3125 ins_encode %{ 3126 __ movdqu($dst$$XMMRegister, $mem$$Address); 3127 %} 3128 ins_pipe( pipe_slow ); 3129 %} 3130 3131 // Load vectors (32 bytes long) 3132 instruct loadV32(vecY dst, memory mem) %{ 3133 predicate(n->as_LoadVector()->memory_size() == 32); 3134 match(Set dst (LoadVector mem)); 3135 ins_cost(125); 3136 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3137 ins_encode %{ 3138 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3139 %} 3140 ins_pipe( pipe_slow ); 3141 %} 3142 3143 // Load vectors (64 bytes long) 3144 instruct loadV64_dword(vecZ dst, memory mem) %{ 3145 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3146 match(Set dst (LoadVector mem)); 3147 ins_cost(125); 3148 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3149 ins_encode %{ 3150 int vector_len = 2; 3151 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3152 %} 3153 ins_pipe( pipe_slow ); 3154 %} 3155 3156 // Load vectors (64 bytes long) 3157 instruct loadV64_qword(vecZ dst, memory mem) %{ 3158 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3159 match(Set dst (LoadVector mem)); 3160 ins_cost(125); 3161 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3162 ins_encode %{ 3163 int vector_len = 2; 3164 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3165 %} 3166 ins_pipe( pipe_slow ); 3167 %} 3168 3169 // Store vectors 3170 instruct storeV1(memory mem, vecS src, rRegI tmp) %{ 3171 predicate(n->as_StoreVector()->memory_size() == 1); 3172 match(Set mem (StoreVector mem src)); 3173 ins_cost(145); 3174 effect(TEMP tmp); 3175 format %{ "movd $tmp,$src\n\t" 3176 "movb $mem,$tmp\t! store vector (1 byte)" %} 3177 ins_encode %{ 3178 __ movdl($tmp$$Register, $src$$XMMRegister); 3179 __ movb($mem$$Address, $tmp$$Register); 3180 %} 3181 ins_pipe( pipe_slow ); 3182 %} 3183 3184 instruct storeV2(memory mem, vecS src, rRegI tmp) %{ 3185 predicate(n->as_StoreVector()->memory_size() == 2); 3186 match(Set mem (StoreVector mem src)); 3187 ins_cost(145); 3188 effect(TEMP tmp); 3189 format %{ "movd $tmp,$src\n\t" 3190 "movw $mem,$tmp\t! store vector (2 bytes)" %} 3191 ins_encode %{ 3192 __ movdl($tmp$$Register, $src$$XMMRegister); 3193 __ movw($mem$$Address, $tmp$$Register); 3194 %} 3195 ins_pipe( pipe_slow ); 3196 %} 3197 3198 instruct storeV4(memory mem, vecS src) %{ 3199 predicate(n->as_StoreVector()->memory_size() == 4); 3200 match(Set mem (StoreVector mem src)); 3201 ins_cost(145); 3202 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3203 ins_encode %{ 3204 __ movdl($mem$$Address, $src$$XMMRegister); 3205 %} 3206 ins_pipe( pipe_slow ); 3207 %} 3208 3209 instruct storeV8(memory mem, vecD src) %{ 3210 predicate(n->as_StoreVector()->memory_size() == 8); 3211 match(Set mem (StoreVector mem src)); 3212 ins_cost(145); 3213 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3214 ins_encode %{ 3215 __ movq($mem$$Address, $src$$XMMRegister); 3216 %} 3217 ins_pipe( pipe_slow ); 3218 %} 3219 3220 instruct storeV16(memory mem, vecX src) %{ 3221 predicate(n->as_StoreVector()->memory_size() == 16); 3222 match(Set mem (StoreVector mem src)); 3223 ins_cost(145); 3224 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3225 ins_encode %{ 3226 __ movdqu($mem$$Address, $src$$XMMRegister); 3227 %} 3228 ins_pipe( pipe_slow ); 3229 %} 3230 3231 instruct storeV32(memory mem, vecY src) %{ 3232 predicate(n->as_StoreVector()->memory_size() == 32); 3233 match(Set mem (StoreVector mem src)); 3234 ins_cost(145); 3235 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3236 ins_encode %{ 3237 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3238 %} 3239 ins_pipe( pipe_slow ); 3240 %} 3241 3242 instruct storeV64_dword(memory mem, vecZ src) %{ 3243 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3244 match(Set mem (StoreVector mem src)); 3245 ins_cost(145); 3246 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3247 ins_encode %{ 3248 int vector_len = 2; 3249 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3250 %} 3251 ins_pipe( pipe_slow ); 3252 %} 3253 3254 instruct storeV64_qword(memory mem, vecZ src) %{ 3255 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3256 match(Set mem (StoreVector mem src)); 3257 ins_cost(145); 3258 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3259 ins_encode %{ 3260 int vector_len = 2; 3261 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3262 %} 3263 ins_pipe( pipe_slow ); 3264 %} 3265 3266 // ====================LEGACY REPLICATE======================================= 3267 3268 instruct Repl4B_mem(vecS dst, memory mem) %{ 3269 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3270 match(Set dst (ReplicateB (LoadB mem))); 3271 format %{ "punpcklbw $dst,$mem\n\t" 3272 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3273 ins_encode %{ 3274 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3275 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3276 %} 3277 ins_pipe( pipe_slow ); 3278 %} 3279 3280 instruct Repl8B_mem(vecD dst, memory mem) %{ 3281 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3282 match(Set dst (ReplicateB (LoadB mem))); 3283 format %{ "punpcklbw $dst,$mem\n\t" 3284 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3285 ins_encode %{ 3286 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3287 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3288 %} 3289 ins_pipe( pipe_slow ); 3290 %} 3291 3292 instruct Repl16B(vecX dst, rRegI src) %{ 3293 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3294 match(Set dst (ReplicateB src)); 3295 format %{ "movd $dst,$src\n\t" 3296 "punpcklbw $dst,$dst\n\t" 3297 "pshuflw $dst,$dst,0x00\n\t" 3298 "punpcklqdq $dst,$dst\t! replicate16B" %} 3299 ins_encode %{ 3300 __ movdl($dst$$XMMRegister, $src$$Register); 3301 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3302 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3303 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3304 %} 3305 ins_pipe( pipe_slow ); 3306 %} 3307 3308 instruct Repl16B_mem(vecX dst, memory mem) %{ 3309 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3310 match(Set dst (ReplicateB (LoadB mem))); 3311 format %{ "punpcklbw $dst,$mem\n\t" 3312 "pshuflw $dst,$dst,0x00\n\t" 3313 "punpcklqdq $dst,$dst\t! replicate16B" %} 3314 ins_encode %{ 3315 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3316 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3317 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3318 %} 3319 ins_pipe( pipe_slow ); 3320 %} 3321 3322 instruct Repl32B(vecY dst, rRegI src) %{ 3323 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3324 match(Set dst (ReplicateB src)); 3325 format %{ "movd $dst,$src\n\t" 3326 "punpcklbw $dst,$dst\n\t" 3327 "pshuflw $dst,$dst,0x00\n\t" 3328 "punpcklqdq $dst,$dst\n\t" 3329 "vinserti128_high $dst,$dst\t! replicate32B" %} 3330 ins_encode %{ 3331 __ movdl($dst$$XMMRegister, $src$$Register); 3332 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3333 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3334 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3335 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3336 %} 3337 ins_pipe( pipe_slow ); 3338 %} 3339 3340 instruct Repl32B_mem(vecY dst, memory mem) %{ 3341 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3342 match(Set dst (ReplicateB (LoadB mem))); 3343 format %{ "punpcklbw $dst,$mem\n\t" 3344 "pshuflw $dst,$dst,0x00\n\t" 3345 "punpcklqdq $dst,$dst\n\t" 3346 "vinserti128_high $dst,$dst\t! replicate32B" %} 3347 ins_encode %{ 3348 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3349 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3350 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3351 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct Repl16B_imm(vecX dst, immI con) %{ 3357 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3358 match(Set dst (ReplicateB con)); 3359 format %{ "movq $dst,[$constantaddress]\n\t" 3360 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3361 ins_encode %{ 3362 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3363 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3364 %} 3365 ins_pipe( pipe_slow ); 3366 %} 3367 3368 instruct Repl32B_imm(vecY dst, immI con) %{ 3369 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3370 match(Set dst (ReplicateB con)); 3371 format %{ "movq $dst,[$constantaddress]\n\t" 3372 "punpcklqdq $dst,$dst\n\t" 3373 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3374 ins_encode %{ 3375 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3376 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3377 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3378 %} 3379 ins_pipe( pipe_slow ); 3380 %} 3381 3382 instruct Repl4S(vecD dst, rRegI src) %{ 3383 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3384 match(Set dst (ReplicateS src)); 3385 format %{ "movd $dst,$src\n\t" 3386 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3387 ins_encode %{ 3388 __ movdl($dst$$XMMRegister, $src$$Register); 3389 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3390 %} 3391 ins_pipe( pipe_slow ); 3392 %} 3393 3394 instruct Repl4S_mem(vecD dst, memory mem) %{ 3395 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3396 match(Set dst (ReplicateS (LoadS mem))); 3397 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3398 ins_encode %{ 3399 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3400 %} 3401 ins_pipe( pipe_slow ); 3402 %} 3403 3404 instruct Repl8S(vecX dst, rRegI src) %{ 3405 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3406 match(Set dst (ReplicateS src)); 3407 format %{ "movd $dst,$src\n\t" 3408 "pshuflw $dst,$dst,0x00\n\t" 3409 "punpcklqdq $dst,$dst\t! replicate8S" %} 3410 ins_encode %{ 3411 __ movdl($dst$$XMMRegister, $src$$Register); 3412 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3413 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3414 %} 3415 ins_pipe( pipe_slow ); 3416 %} 3417 3418 instruct Repl8S_mem(vecX dst, memory mem) %{ 3419 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3420 match(Set dst (ReplicateS (LoadS mem))); 3421 format %{ "pshuflw $dst,$mem,0x00\n\t" 3422 "punpcklqdq $dst,$dst\t! replicate8S" %} 3423 ins_encode %{ 3424 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3425 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3426 %} 3427 ins_pipe( pipe_slow ); 3428 %} 3429 3430 instruct Repl8S_imm(vecX dst, immI con) %{ 3431 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3432 match(Set dst (ReplicateS con)); 3433 format %{ "movq $dst,[$constantaddress]\n\t" 3434 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3435 ins_encode %{ 3436 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3437 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3438 %} 3439 ins_pipe( pipe_slow ); 3440 %} 3441 3442 instruct Repl16S(vecY dst, rRegI src) %{ 3443 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3444 match(Set dst (ReplicateS src)); 3445 format %{ "movd $dst,$src\n\t" 3446 "pshuflw $dst,$dst,0x00\n\t" 3447 "punpcklqdq $dst,$dst\n\t" 3448 "vinserti128_high $dst,$dst\t! replicate16S" %} 3449 ins_encode %{ 3450 __ movdl($dst$$XMMRegister, $src$$Register); 3451 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3452 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3453 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3454 %} 3455 ins_pipe( pipe_slow ); 3456 %} 3457 3458 instruct Repl16S_mem(vecY dst, memory mem) %{ 3459 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3460 match(Set dst (ReplicateS (LoadS mem))); 3461 format %{ "pshuflw $dst,$mem,0x00\n\t" 3462 "punpcklqdq $dst,$dst\n\t" 3463 "vinserti128_high $dst,$dst\t! replicate16S" %} 3464 ins_encode %{ 3465 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3466 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3467 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3468 %} 3469 ins_pipe( pipe_slow ); 3470 %} 3471 3472 instruct Repl16S_imm(vecY dst, immI con) %{ 3473 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3474 match(Set dst (ReplicateS con)); 3475 format %{ "movq $dst,[$constantaddress]\n\t" 3476 "punpcklqdq $dst,$dst\n\t" 3477 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3478 ins_encode %{ 3479 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3480 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3481 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3482 %} 3483 ins_pipe( pipe_slow ); 3484 %} 3485 3486 instruct Repl4I(vecX dst, rRegI src) %{ 3487 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3488 match(Set dst (ReplicateI src)); 3489 format %{ "movd $dst,$src\n\t" 3490 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3491 ins_encode %{ 3492 __ movdl($dst$$XMMRegister, $src$$Register); 3493 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3494 %} 3495 ins_pipe( pipe_slow ); 3496 %} 3497 3498 instruct Repl4I_mem(vecX dst, memory mem) %{ 3499 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3500 match(Set dst (ReplicateI (LoadI mem))); 3501 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3502 ins_encode %{ 3503 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3504 %} 3505 ins_pipe( pipe_slow ); 3506 %} 3507 3508 instruct Repl8I(vecY dst, rRegI src) %{ 3509 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3510 match(Set dst (ReplicateI src)); 3511 format %{ "movd $dst,$src\n\t" 3512 "pshufd $dst,$dst,0x00\n\t" 3513 "vinserti128_high $dst,$dst\t! replicate8I" %} 3514 ins_encode %{ 3515 __ movdl($dst$$XMMRegister, $src$$Register); 3516 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3517 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3518 %} 3519 ins_pipe( pipe_slow ); 3520 %} 3521 3522 instruct Repl8I_mem(vecY dst, memory mem) %{ 3523 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3524 match(Set dst (ReplicateI (LoadI mem))); 3525 format %{ "pshufd $dst,$mem,0x00\n\t" 3526 "vinserti128_high $dst,$dst\t! replicate8I" %} 3527 ins_encode %{ 3528 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3529 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3530 %} 3531 ins_pipe( pipe_slow ); 3532 %} 3533 3534 instruct Repl4I_imm(vecX dst, immI con) %{ 3535 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3536 match(Set dst (ReplicateI con)); 3537 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3538 "punpcklqdq $dst,$dst" %} 3539 ins_encode %{ 3540 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3541 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3542 %} 3543 ins_pipe( pipe_slow ); 3544 %} 3545 3546 instruct Repl8I_imm(vecY dst, immI con) %{ 3547 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3548 match(Set dst (ReplicateI con)); 3549 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3550 "punpcklqdq $dst,$dst\n\t" 3551 "vinserti128_high $dst,$dst" %} 3552 ins_encode %{ 3553 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3554 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3555 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3556 %} 3557 ins_pipe( pipe_slow ); 3558 %} 3559 3560 // Long could be loaded into xmm register directly from memory. 3561 instruct Repl2L_mem(vecX dst, memory mem) %{ 3562 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3563 match(Set dst (ReplicateL (LoadL mem))); 3564 format %{ "movq $dst,$mem\n\t" 3565 "punpcklqdq $dst,$dst\t! replicate2L" %} 3566 ins_encode %{ 3567 __ movq($dst$$XMMRegister, $mem$$Address); 3568 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3569 %} 3570 ins_pipe( pipe_slow ); 3571 %} 3572 3573 // Replicate long (8 byte) scalar to be vector 3574 #ifdef _LP64 3575 instruct Repl4L(vecY dst, rRegL src) %{ 3576 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3577 match(Set dst (ReplicateL src)); 3578 format %{ "movdq $dst,$src\n\t" 3579 "punpcklqdq $dst,$dst\n\t" 3580 "vinserti128_high $dst,$dst\t! replicate4L" %} 3581 ins_encode %{ 3582 __ movdq($dst$$XMMRegister, $src$$Register); 3583 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3584 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3585 %} 3586 ins_pipe( pipe_slow ); 3587 %} 3588 #else // _LP64 3589 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3590 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3591 match(Set dst (ReplicateL src)); 3592 effect(TEMP dst, USE src, TEMP tmp); 3593 format %{ "movdl $dst,$src.lo\n\t" 3594 "movdl $tmp,$src.hi\n\t" 3595 "punpckldq $dst,$tmp\n\t" 3596 "punpcklqdq $dst,$dst\n\t" 3597 "vinserti128_high $dst,$dst\t! replicate4L" %} 3598 ins_encode %{ 3599 __ movdl($dst$$XMMRegister, $src$$Register); 3600 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3601 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3602 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3603 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3604 %} 3605 ins_pipe( pipe_slow ); 3606 %} 3607 #endif // _LP64 3608 3609 instruct Repl4L_imm(vecY dst, immL con) %{ 3610 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3611 match(Set dst (ReplicateL con)); 3612 format %{ "movq $dst,[$constantaddress]\n\t" 3613 "punpcklqdq $dst,$dst\n\t" 3614 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3615 ins_encode %{ 3616 __ movq($dst$$XMMRegister, $constantaddress($con)); 3617 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3618 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3619 %} 3620 ins_pipe( pipe_slow ); 3621 %} 3622 3623 instruct Repl4L_mem(vecY dst, memory mem) %{ 3624 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3625 match(Set dst (ReplicateL (LoadL mem))); 3626 format %{ "movq $dst,$mem\n\t" 3627 "punpcklqdq $dst,$dst\n\t" 3628 "vinserti128_high $dst,$dst\t! replicate4L" %} 3629 ins_encode %{ 3630 __ movq($dst$$XMMRegister, $mem$$Address); 3631 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3632 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3633 %} 3634 ins_pipe( pipe_slow ); 3635 %} 3636 3637 instruct Repl2F_mem(vecD dst, memory mem) %{ 3638 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3639 match(Set dst (ReplicateF (LoadF mem))); 3640 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3641 ins_encode %{ 3642 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3643 %} 3644 ins_pipe( pipe_slow ); 3645 %} 3646 3647 instruct Repl4F_mem(vecX dst, memory mem) %{ 3648 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3649 match(Set dst (ReplicateF (LoadF mem))); 3650 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3651 ins_encode %{ 3652 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3653 %} 3654 ins_pipe( pipe_slow ); 3655 %} 3656 3657 instruct Repl8F(vecY dst, regF src) %{ 3658 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3659 match(Set dst (ReplicateF src)); 3660 format %{ "pshufd $dst,$src,0x00\n\t" 3661 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3662 ins_encode %{ 3663 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3664 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3665 %} 3666 ins_pipe( pipe_slow ); 3667 %} 3668 3669 instruct Repl8F_mem(vecY dst, memory mem) %{ 3670 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3671 match(Set dst (ReplicateF (LoadF mem))); 3672 format %{ "pshufd $dst,$mem,0x00\n\t" 3673 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3674 ins_encode %{ 3675 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3676 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3677 %} 3678 ins_pipe( pipe_slow ); 3679 %} 3680 3681 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3682 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3683 match(Set dst (ReplicateF zero)); 3684 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3685 ins_encode %{ 3686 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3687 %} 3688 ins_pipe( fpu_reg_reg ); 3689 %} 3690 3691 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3692 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3693 match(Set dst (ReplicateF zero)); 3694 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3695 ins_encode %{ 3696 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3697 %} 3698 ins_pipe( fpu_reg_reg ); 3699 %} 3700 3701 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3702 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3703 match(Set dst (ReplicateF zero)); 3704 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3705 ins_encode %{ 3706 int vector_len = 1; 3707 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3708 %} 3709 ins_pipe( fpu_reg_reg ); 3710 %} 3711 3712 instruct Repl2D_mem(vecX dst, memory mem) %{ 3713 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3714 match(Set dst (ReplicateD (LoadD mem))); 3715 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3716 ins_encode %{ 3717 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3718 %} 3719 ins_pipe( pipe_slow ); 3720 %} 3721 3722 instruct Repl4D(vecY dst, regD src) %{ 3723 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3724 match(Set dst (ReplicateD src)); 3725 format %{ "pshufd $dst,$src,0x44\n\t" 3726 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3727 ins_encode %{ 3728 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3729 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3730 %} 3731 ins_pipe( pipe_slow ); 3732 %} 3733 3734 instruct Repl4D_mem(vecY dst, memory mem) %{ 3735 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3736 match(Set dst (ReplicateD (LoadD mem))); 3737 format %{ "pshufd $dst,$mem,0x44\n\t" 3738 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3739 ins_encode %{ 3740 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3741 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3742 %} 3743 ins_pipe( pipe_slow ); 3744 %} 3745 3746 // Replicate double (8 byte) scalar zero to be vector 3747 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3748 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3749 match(Set dst (ReplicateD zero)); 3750 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3751 ins_encode %{ 3752 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3753 %} 3754 ins_pipe( fpu_reg_reg ); 3755 %} 3756 3757 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3758 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3759 match(Set dst (ReplicateD zero)); 3760 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3761 ins_encode %{ 3762 int vector_len = 1; 3763 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3764 %} 3765 ins_pipe( fpu_reg_reg ); 3766 %} 3767 3768 // ====================GENERIC REPLICATE========================================== 3769 3770 // Replicate byte scalar to be vector 3771 instruct Repl4B(vecS dst, rRegI src) %{ 3772 predicate(n->as_Vector()->length() == 4); 3773 match(Set dst (ReplicateB src)); 3774 format %{ "movd $dst,$src\n\t" 3775 "punpcklbw $dst,$dst\n\t" 3776 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3777 ins_encode %{ 3778 __ movdl($dst$$XMMRegister, $src$$Register); 3779 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3780 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3781 %} 3782 ins_pipe( pipe_slow ); 3783 %} 3784 3785 instruct Repl8B(vecD dst, rRegI src) %{ 3786 predicate(n->as_Vector()->length() == 8); 3787 match(Set dst (ReplicateB src)); 3788 format %{ "movd $dst,$src\n\t" 3789 "punpcklbw $dst,$dst\n\t" 3790 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3791 ins_encode %{ 3792 __ movdl($dst$$XMMRegister, $src$$Register); 3793 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3794 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3795 %} 3796 ins_pipe( pipe_slow ); 3797 %} 3798 3799 // Replicate byte scalar immediate to be vector by loading from const table. 3800 instruct Repl4B_imm(vecS dst, immI con) %{ 3801 predicate(n->as_Vector()->length() == 4); 3802 match(Set dst (ReplicateB con)); 3803 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3804 ins_encode %{ 3805 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3806 %} 3807 ins_pipe( pipe_slow ); 3808 %} 3809 3810 instruct Repl8B_imm(vecD dst, immI con) %{ 3811 predicate(n->as_Vector()->length() == 8); 3812 match(Set dst (ReplicateB con)); 3813 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3814 ins_encode %{ 3815 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 // Replicate byte scalar zero to be vector 3821 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3822 predicate(n->as_Vector()->length() == 4); 3823 match(Set dst (ReplicateB zero)); 3824 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3825 ins_encode %{ 3826 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3827 %} 3828 ins_pipe( fpu_reg_reg ); 3829 %} 3830 3831 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3832 predicate(n->as_Vector()->length() == 8); 3833 match(Set dst (ReplicateB zero)); 3834 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3835 ins_encode %{ 3836 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3837 %} 3838 ins_pipe( fpu_reg_reg ); 3839 %} 3840 3841 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3842 predicate(n->as_Vector()->length() == 16); 3843 match(Set dst (ReplicateB zero)); 3844 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3845 ins_encode %{ 3846 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3847 %} 3848 ins_pipe( fpu_reg_reg ); 3849 %} 3850 3851 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3852 predicate(n->as_Vector()->length() == 32); 3853 match(Set dst (ReplicateB zero)); 3854 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3855 ins_encode %{ 3856 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3857 int vector_len = 1; 3858 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3859 %} 3860 ins_pipe( fpu_reg_reg ); 3861 %} 3862 3863 // Replicate char/short (2 byte) scalar to be vector 3864 instruct Repl2S(vecS dst, rRegI src) %{ 3865 predicate(n->as_Vector()->length() == 2); 3866 match(Set dst (ReplicateS src)); 3867 format %{ "movd $dst,$src\n\t" 3868 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3869 ins_encode %{ 3870 __ movdl($dst$$XMMRegister, $src$$Register); 3871 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3872 %} 3873 ins_pipe( fpu_reg_reg ); 3874 %} 3875 3876 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3877 instruct Repl2S_imm(vecS dst, immI con) %{ 3878 predicate(n->as_Vector()->length() == 2); 3879 match(Set dst (ReplicateS con)); 3880 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3881 ins_encode %{ 3882 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3883 %} 3884 ins_pipe( fpu_reg_reg ); 3885 %} 3886 3887 instruct Repl4S_imm(vecD dst, immI con) %{ 3888 predicate(n->as_Vector()->length() == 4); 3889 match(Set dst (ReplicateS con)); 3890 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3891 ins_encode %{ 3892 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3893 %} 3894 ins_pipe( fpu_reg_reg ); 3895 %} 3896 3897 // Replicate char/short (2 byte) scalar zero to be vector 3898 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3899 predicate(n->as_Vector()->length() == 2); 3900 match(Set dst (ReplicateS zero)); 3901 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3902 ins_encode %{ 3903 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3904 %} 3905 ins_pipe( fpu_reg_reg ); 3906 %} 3907 3908 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3909 predicate(n->as_Vector()->length() == 4); 3910 match(Set dst (ReplicateS zero)); 3911 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3912 ins_encode %{ 3913 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3914 %} 3915 ins_pipe( fpu_reg_reg ); 3916 %} 3917 3918 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3919 predicate(n->as_Vector()->length() == 8); 3920 match(Set dst (ReplicateS zero)); 3921 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3922 ins_encode %{ 3923 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3924 %} 3925 ins_pipe( fpu_reg_reg ); 3926 %} 3927 3928 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3929 predicate(n->as_Vector()->length() == 16); 3930 match(Set dst (ReplicateS zero)); 3931 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3932 ins_encode %{ 3933 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3934 int vector_len = 1; 3935 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3936 %} 3937 ins_pipe( fpu_reg_reg ); 3938 %} 3939 3940 // Replicate integer (4 byte) scalar to be vector 3941 instruct Repl2I(vecD dst, rRegI src) %{ 3942 predicate(n->as_Vector()->length() == 2); 3943 match(Set dst (ReplicateI src)); 3944 format %{ "movd $dst,$src\n\t" 3945 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3946 ins_encode %{ 3947 __ movdl($dst$$XMMRegister, $src$$Register); 3948 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3949 %} 3950 ins_pipe( fpu_reg_reg ); 3951 %} 3952 3953 // Integer could be loaded into xmm register directly from memory. 3954 instruct Repl2I_mem(vecD dst, memory mem) %{ 3955 predicate(n->as_Vector()->length() == 2); 3956 match(Set dst (ReplicateI (LoadI mem))); 3957 format %{ "movd $dst,$mem\n\t" 3958 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3959 ins_encode %{ 3960 __ movdl($dst$$XMMRegister, $mem$$Address); 3961 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3962 %} 3963 ins_pipe( fpu_reg_reg ); 3964 %} 3965 3966 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3967 instruct Repl2I_imm(vecD dst, immI con) %{ 3968 predicate(n->as_Vector()->length() == 2); 3969 match(Set dst (ReplicateI con)); 3970 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3971 ins_encode %{ 3972 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3973 %} 3974 ins_pipe( fpu_reg_reg ); 3975 %} 3976 3977 // Replicate integer (4 byte) scalar zero to be vector 3978 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3979 predicate(n->as_Vector()->length() == 2); 3980 match(Set dst (ReplicateI zero)); 3981 format %{ "pxor $dst,$dst\t! replicate2I" %} 3982 ins_encode %{ 3983 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3984 %} 3985 ins_pipe( fpu_reg_reg ); 3986 %} 3987 3988 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3989 predicate(n->as_Vector()->length() == 4); 3990 match(Set dst (ReplicateI zero)); 3991 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3992 ins_encode %{ 3993 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3994 %} 3995 ins_pipe( fpu_reg_reg ); 3996 %} 3997 3998 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3999 predicate(n->as_Vector()->length() == 8); 4000 match(Set dst (ReplicateI zero)); 4001 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4002 ins_encode %{ 4003 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4004 int vector_len = 1; 4005 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4006 %} 4007 ins_pipe( fpu_reg_reg ); 4008 %} 4009 4010 // Replicate long (8 byte) scalar to be vector 4011 #ifdef _LP64 4012 instruct Repl2L(vecX dst, rRegL src) %{ 4013 predicate(n->as_Vector()->length() == 2); 4014 match(Set dst (ReplicateL src)); 4015 format %{ "movdq $dst,$src\n\t" 4016 "punpcklqdq $dst,$dst\t! replicate2L" %} 4017 ins_encode %{ 4018 __ movdq($dst$$XMMRegister, $src$$Register); 4019 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4020 %} 4021 ins_pipe( pipe_slow ); 4022 %} 4023 #else // _LP64 4024 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 4025 predicate(n->as_Vector()->length() == 2); 4026 match(Set dst (ReplicateL src)); 4027 effect(TEMP dst, USE src, TEMP tmp); 4028 format %{ "movdl $dst,$src.lo\n\t" 4029 "movdl $tmp,$src.hi\n\t" 4030 "punpckldq $dst,$tmp\n\t" 4031 "punpcklqdq $dst,$dst\t! replicate2L"%} 4032 ins_encode %{ 4033 __ movdl($dst$$XMMRegister, $src$$Register); 4034 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4035 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4036 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4037 %} 4038 ins_pipe( pipe_slow ); 4039 %} 4040 #endif // _LP64 4041 4042 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4043 instruct Repl2L_imm(vecX dst, immL con) %{ 4044 predicate(n->as_Vector()->length() == 2); 4045 match(Set dst (ReplicateL con)); 4046 format %{ "movq $dst,[$constantaddress]\n\t" 4047 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4048 ins_encode %{ 4049 __ movq($dst$$XMMRegister, $constantaddress($con)); 4050 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4051 %} 4052 ins_pipe( pipe_slow ); 4053 %} 4054 4055 // Replicate long (8 byte) scalar zero to be vector 4056 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4057 predicate(n->as_Vector()->length() == 2); 4058 match(Set dst (ReplicateL zero)); 4059 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4060 ins_encode %{ 4061 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4062 %} 4063 ins_pipe( fpu_reg_reg ); 4064 %} 4065 4066 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4067 predicate(n->as_Vector()->length() == 4); 4068 match(Set dst (ReplicateL zero)); 4069 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4070 ins_encode %{ 4071 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4072 int vector_len = 1; 4073 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4074 %} 4075 ins_pipe( fpu_reg_reg ); 4076 %} 4077 4078 // Replicate float (4 byte) scalar to be vector 4079 instruct Repl2F(vecD dst, regF src) %{ 4080 predicate(n->as_Vector()->length() == 2); 4081 match(Set dst (ReplicateF src)); 4082 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4083 ins_encode %{ 4084 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4085 %} 4086 ins_pipe( fpu_reg_reg ); 4087 %} 4088 4089 instruct Repl4F(vecX dst, regF src) %{ 4090 predicate(n->as_Vector()->length() == 4); 4091 match(Set dst (ReplicateF src)); 4092 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4093 ins_encode %{ 4094 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4095 %} 4096 ins_pipe( pipe_slow ); 4097 %} 4098 4099 // Replicate double (8 bytes) scalar to be vector 4100 instruct Repl2D(vecX dst, regD src) %{ 4101 predicate(n->as_Vector()->length() == 2); 4102 match(Set dst (ReplicateD src)); 4103 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4104 ins_encode %{ 4105 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4106 %} 4107 ins_pipe( pipe_slow ); 4108 %} 4109 4110 // ====================EVEX REPLICATE============================================= 4111 4112 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4113 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4114 match(Set dst (ReplicateB (LoadB mem))); 4115 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4116 ins_encode %{ 4117 int vector_len = 0; 4118 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4119 %} 4120 ins_pipe( pipe_slow ); 4121 %} 4122 4123 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4124 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4125 match(Set dst (ReplicateB (LoadB mem))); 4126 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4127 ins_encode %{ 4128 int vector_len = 0; 4129 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4130 %} 4131 ins_pipe( pipe_slow ); 4132 %} 4133 4134 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4135 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4136 match(Set dst (ReplicateB src)); 4137 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 4138 ins_encode %{ 4139 int vector_len = 0; 4140 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4141 %} 4142 ins_pipe( pipe_slow ); 4143 %} 4144 4145 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4146 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4147 match(Set dst (ReplicateB (LoadB mem))); 4148 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4149 ins_encode %{ 4150 int vector_len = 0; 4151 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4152 %} 4153 ins_pipe( pipe_slow ); 4154 %} 4155 4156 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4157 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4158 match(Set dst (ReplicateB src)); 4159 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4160 ins_encode %{ 4161 int vector_len = 1; 4162 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4163 %} 4164 ins_pipe( pipe_slow ); 4165 %} 4166 4167 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4168 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4169 match(Set dst (ReplicateB (LoadB mem))); 4170 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4171 ins_encode %{ 4172 int vector_len = 1; 4173 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4174 %} 4175 ins_pipe( pipe_slow ); 4176 %} 4177 4178 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4179 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4180 match(Set dst (ReplicateB src)); 4181 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4182 ins_encode %{ 4183 int vector_len = 2; 4184 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4185 %} 4186 ins_pipe( pipe_slow ); 4187 %} 4188 4189 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4190 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4191 match(Set dst (ReplicateB (LoadB mem))); 4192 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4193 ins_encode %{ 4194 int vector_len = 2; 4195 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4196 %} 4197 ins_pipe( pipe_slow ); 4198 %} 4199 4200 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4201 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4202 match(Set dst (ReplicateB con)); 4203 format %{ "movq $dst,[$constantaddress]\n\t" 4204 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4205 ins_encode %{ 4206 int vector_len = 0; 4207 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4208 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4209 %} 4210 ins_pipe( pipe_slow ); 4211 %} 4212 4213 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4214 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4215 match(Set dst (ReplicateB con)); 4216 format %{ "movq $dst,[$constantaddress]\n\t" 4217 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4218 ins_encode %{ 4219 int vector_len = 1; 4220 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4221 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4222 %} 4223 ins_pipe( pipe_slow ); 4224 %} 4225 4226 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4227 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4228 match(Set dst (ReplicateB con)); 4229 format %{ "movq $dst,[$constantaddress]\n\t" 4230 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4231 ins_encode %{ 4232 int vector_len = 2; 4233 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4234 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4235 %} 4236 ins_pipe( pipe_slow ); 4237 %} 4238 4239 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4240 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4241 match(Set dst (ReplicateB zero)); 4242 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4243 ins_encode %{ 4244 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4245 int vector_len = 2; 4246 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4247 %} 4248 ins_pipe( fpu_reg_reg ); 4249 %} 4250 4251 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4252 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4253 match(Set dst (ReplicateS src)); 4254 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4255 ins_encode %{ 4256 int vector_len = 0; 4257 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4258 %} 4259 ins_pipe( pipe_slow ); 4260 %} 4261 4262 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4263 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4264 match(Set dst (ReplicateS (LoadS mem))); 4265 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4266 ins_encode %{ 4267 int vector_len = 0; 4268 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4269 %} 4270 ins_pipe( pipe_slow ); 4271 %} 4272 4273 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4274 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4275 match(Set dst (ReplicateS src)); 4276 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4277 ins_encode %{ 4278 int vector_len = 0; 4279 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4280 %} 4281 ins_pipe( pipe_slow ); 4282 %} 4283 4284 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4285 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4286 match(Set dst (ReplicateS (LoadS mem))); 4287 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4288 ins_encode %{ 4289 int vector_len = 0; 4290 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4291 %} 4292 ins_pipe( pipe_slow ); 4293 %} 4294 4295 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4296 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4297 match(Set dst (ReplicateS src)); 4298 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4299 ins_encode %{ 4300 int vector_len = 1; 4301 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4302 %} 4303 ins_pipe( pipe_slow ); 4304 %} 4305 4306 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4307 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4308 match(Set dst (ReplicateS (LoadS mem))); 4309 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4310 ins_encode %{ 4311 int vector_len = 1; 4312 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 4317 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4318 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4319 match(Set dst (ReplicateS src)); 4320 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4321 ins_encode %{ 4322 int vector_len = 2; 4323 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4324 %} 4325 ins_pipe( pipe_slow ); 4326 %} 4327 4328 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4329 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4330 match(Set dst (ReplicateS (LoadS mem))); 4331 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4332 ins_encode %{ 4333 int vector_len = 2; 4334 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4335 %} 4336 ins_pipe( pipe_slow ); 4337 %} 4338 4339 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4340 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4341 match(Set dst (ReplicateS con)); 4342 format %{ "movq $dst,[$constantaddress]\n\t" 4343 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4344 ins_encode %{ 4345 int vector_len = 0; 4346 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4347 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4353 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4354 match(Set dst (ReplicateS con)); 4355 format %{ "movq $dst,[$constantaddress]\n\t" 4356 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4357 ins_encode %{ 4358 int vector_len = 1; 4359 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4360 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4361 %} 4362 ins_pipe( pipe_slow ); 4363 %} 4364 4365 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4366 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4367 match(Set dst (ReplicateS con)); 4368 format %{ "movq $dst,[$constantaddress]\n\t" 4369 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4370 ins_encode %{ 4371 int vector_len = 2; 4372 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4373 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4374 %} 4375 ins_pipe( pipe_slow ); 4376 %} 4377 4378 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4379 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4380 match(Set dst (ReplicateS zero)); 4381 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4382 ins_encode %{ 4383 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4384 int vector_len = 2; 4385 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4386 %} 4387 ins_pipe( fpu_reg_reg ); 4388 %} 4389 4390 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4391 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4392 match(Set dst (ReplicateI src)); 4393 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4394 ins_encode %{ 4395 int vector_len = 0; 4396 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4397 %} 4398 ins_pipe( pipe_slow ); 4399 %} 4400 4401 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4402 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4403 match(Set dst (ReplicateI (LoadI mem))); 4404 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4405 ins_encode %{ 4406 int vector_len = 0; 4407 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4408 %} 4409 ins_pipe( pipe_slow ); 4410 %} 4411 4412 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4413 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4414 match(Set dst (ReplicateI src)); 4415 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4416 ins_encode %{ 4417 int vector_len = 1; 4418 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4419 %} 4420 ins_pipe( pipe_slow ); 4421 %} 4422 4423 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4424 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4425 match(Set dst (ReplicateI (LoadI mem))); 4426 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4427 ins_encode %{ 4428 int vector_len = 1; 4429 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4430 %} 4431 ins_pipe( pipe_slow ); 4432 %} 4433 4434 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4435 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4436 match(Set dst (ReplicateI src)); 4437 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4438 ins_encode %{ 4439 int vector_len = 2; 4440 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4441 %} 4442 ins_pipe( pipe_slow ); 4443 %} 4444 4445 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4446 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4447 match(Set dst (ReplicateI (LoadI mem))); 4448 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4449 ins_encode %{ 4450 int vector_len = 2; 4451 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4452 %} 4453 ins_pipe( pipe_slow ); 4454 %} 4455 4456 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4457 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4458 match(Set dst (ReplicateI con)); 4459 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4460 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4461 ins_encode %{ 4462 int vector_len = 0; 4463 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4464 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4470 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4471 match(Set dst (ReplicateI con)); 4472 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4473 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4474 ins_encode %{ 4475 int vector_len = 1; 4476 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4477 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4478 %} 4479 ins_pipe( pipe_slow ); 4480 %} 4481 4482 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4483 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4484 match(Set dst (ReplicateI con)); 4485 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4486 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4487 ins_encode %{ 4488 int vector_len = 2; 4489 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4490 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4496 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4497 match(Set dst (ReplicateI zero)); 4498 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4499 ins_encode %{ 4500 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4501 int vector_len = 2; 4502 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4503 %} 4504 ins_pipe( fpu_reg_reg ); 4505 %} 4506 4507 // Replicate long (8 byte) scalar to be vector 4508 #ifdef _LP64 4509 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4510 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4511 match(Set dst (ReplicateL src)); 4512 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4513 ins_encode %{ 4514 int vector_len = 1; 4515 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4516 %} 4517 ins_pipe( pipe_slow ); 4518 %} 4519 4520 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4521 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4522 match(Set dst (ReplicateL src)); 4523 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4524 ins_encode %{ 4525 int vector_len = 2; 4526 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4527 %} 4528 ins_pipe( pipe_slow ); 4529 %} 4530 #else // _LP64 4531 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4532 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4533 match(Set dst (ReplicateL src)); 4534 effect(TEMP dst, USE src, TEMP tmp); 4535 format %{ "movdl $dst,$src.lo\n\t" 4536 "movdl $tmp,$src.hi\n\t" 4537 "punpckldq $dst,$tmp\n\t" 4538 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4539 ins_encode %{ 4540 int vector_len = 1; 4541 __ movdl($dst$$XMMRegister, $src$$Register); 4542 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4543 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4544 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4545 %} 4546 ins_pipe( pipe_slow ); 4547 %} 4548 4549 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4550 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4551 match(Set dst (ReplicateL src)); 4552 effect(TEMP dst, USE src, TEMP tmp); 4553 format %{ "movdl $dst,$src.lo\n\t" 4554 "movdl $tmp,$src.hi\n\t" 4555 "punpckldq $dst,$tmp\n\t" 4556 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4557 ins_encode %{ 4558 int vector_len = 2; 4559 __ movdl($dst$$XMMRegister, $src$$Register); 4560 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4561 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4562 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4563 %} 4564 ins_pipe( pipe_slow ); 4565 %} 4566 #endif // _LP64 4567 4568 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4569 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4570 match(Set dst (ReplicateL con)); 4571 format %{ "movq $dst,[$constantaddress]\n\t" 4572 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4573 ins_encode %{ 4574 int vector_len = 1; 4575 __ movq($dst$$XMMRegister, $constantaddress($con)); 4576 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4577 %} 4578 ins_pipe( pipe_slow ); 4579 %} 4580 4581 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4582 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4583 match(Set dst (ReplicateL con)); 4584 format %{ "movq $dst,[$constantaddress]\n\t" 4585 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4586 ins_encode %{ 4587 int vector_len = 2; 4588 __ movq($dst$$XMMRegister, $constantaddress($con)); 4589 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4590 %} 4591 ins_pipe( pipe_slow ); 4592 %} 4593 4594 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4595 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4596 match(Set dst (ReplicateL (LoadL mem))); 4597 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4598 ins_encode %{ 4599 int vector_len = 0; 4600 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4601 %} 4602 ins_pipe( pipe_slow ); 4603 %} 4604 4605 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4606 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4607 match(Set dst (ReplicateL (LoadL mem))); 4608 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4609 ins_encode %{ 4610 int vector_len = 1; 4611 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4612 %} 4613 ins_pipe( pipe_slow ); 4614 %} 4615 4616 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4617 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4618 match(Set dst (ReplicateL (LoadL mem))); 4619 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4620 ins_encode %{ 4621 int vector_len = 2; 4622 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4623 %} 4624 ins_pipe( pipe_slow ); 4625 %} 4626 4627 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4628 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4629 match(Set dst (ReplicateL zero)); 4630 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4631 ins_encode %{ 4632 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4633 int vector_len = 2; 4634 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4635 %} 4636 ins_pipe( fpu_reg_reg ); 4637 %} 4638 4639 instruct Repl8F_evex(vecY dst, regF src) %{ 4640 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4641 match(Set dst (ReplicateF src)); 4642 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4643 ins_encode %{ 4644 int vector_len = 1; 4645 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4646 %} 4647 ins_pipe( pipe_slow ); 4648 %} 4649 4650 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4651 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4652 match(Set dst (ReplicateF (LoadF mem))); 4653 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4654 ins_encode %{ 4655 int vector_len = 1; 4656 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4657 %} 4658 ins_pipe( pipe_slow ); 4659 %} 4660 4661 instruct Repl16F_evex(vecZ dst, regF src) %{ 4662 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4663 match(Set dst (ReplicateF src)); 4664 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4665 ins_encode %{ 4666 int vector_len = 2; 4667 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4668 %} 4669 ins_pipe( pipe_slow ); 4670 %} 4671 4672 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4673 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4674 match(Set dst (ReplicateF (LoadF mem))); 4675 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4676 ins_encode %{ 4677 int vector_len = 2; 4678 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4679 %} 4680 ins_pipe( pipe_slow ); 4681 %} 4682 4683 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4684 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4685 match(Set dst (ReplicateF zero)); 4686 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4687 ins_encode %{ 4688 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4689 int vector_len = 2; 4690 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4691 %} 4692 ins_pipe( fpu_reg_reg ); 4693 %} 4694 4695 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4696 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4697 match(Set dst (ReplicateF zero)); 4698 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4699 ins_encode %{ 4700 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4701 int vector_len = 2; 4702 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4703 %} 4704 ins_pipe( fpu_reg_reg ); 4705 %} 4706 4707 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4708 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4709 match(Set dst (ReplicateF zero)); 4710 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4711 ins_encode %{ 4712 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4713 int vector_len = 2; 4714 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4715 %} 4716 ins_pipe( fpu_reg_reg ); 4717 %} 4718 4719 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4720 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4721 match(Set dst (ReplicateF zero)); 4722 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4723 ins_encode %{ 4724 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4725 int vector_len = 2; 4726 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4727 %} 4728 ins_pipe( fpu_reg_reg ); 4729 %} 4730 4731 instruct Repl4D_evex(vecY dst, regD src) %{ 4732 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4733 match(Set dst (ReplicateD src)); 4734 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4735 ins_encode %{ 4736 int vector_len = 1; 4737 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4738 %} 4739 ins_pipe( pipe_slow ); 4740 %} 4741 4742 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4743 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4744 match(Set dst (ReplicateD (LoadD mem))); 4745 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4746 ins_encode %{ 4747 int vector_len = 1; 4748 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4749 %} 4750 ins_pipe( pipe_slow ); 4751 %} 4752 4753 instruct Repl8D_evex(vecZ dst, regD src) %{ 4754 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4755 match(Set dst (ReplicateD src)); 4756 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4757 ins_encode %{ 4758 int vector_len = 2; 4759 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4760 %} 4761 ins_pipe( pipe_slow ); 4762 %} 4763 4764 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4765 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4766 match(Set dst (ReplicateD (LoadD mem))); 4767 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4768 ins_encode %{ 4769 int vector_len = 2; 4770 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4776 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4777 match(Set dst (ReplicateD zero)); 4778 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4779 ins_encode %{ 4780 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4781 int vector_len = 2; 4782 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4783 %} 4784 ins_pipe( fpu_reg_reg ); 4785 %} 4786 4787 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4788 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4789 match(Set dst (ReplicateD zero)); 4790 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4791 ins_encode %{ 4792 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4793 int vector_len = 2; 4794 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4795 %} 4796 ins_pipe( fpu_reg_reg ); 4797 %} 4798 4799 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4800 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4801 match(Set dst (ReplicateD zero)); 4802 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4803 ins_encode %{ 4804 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4805 int vector_len = 2; 4806 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4807 %} 4808 ins_pipe( fpu_reg_reg ); 4809 %} 4810 4811 // ====================REDUCTION ARITHMETIC======================================= 4812 4813 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4814 predicate(UseSSE > 2 && UseAVX == 0); 4815 match(Set dst (AddReductionVI src1 src2)); 4816 effect(TEMP tmp2, TEMP tmp); 4817 format %{ "movdqu $tmp2,$src2\n\t" 4818 "phaddd $tmp2,$tmp2\n\t" 4819 "movd $tmp,$src1\n\t" 4820 "paddd $tmp,$tmp2\n\t" 4821 "movd $dst,$tmp\t! add reduction2I" %} 4822 ins_encode %{ 4823 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4824 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4825 __ movdl($tmp$$XMMRegister, $src1$$Register); 4826 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4827 __ movdl($dst$$Register, $tmp$$XMMRegister); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4833 predicate(VM_Version::supports_avxonly()); 4834 match(Set dst (AddReductionVI src1 src2)); 4835 effect(TEMP tmp, TEMP tmp2); 4836 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4837 "movd $tmp2,$src1\n\t" 4838 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4839 "movd $dst,$tmp2\t! add reduction2I" %} 4840 ins_encode %{ 4841 int vector_len = 0; 4842 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4843 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4844 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4845 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4846 %} 4847 ins_pipe( pipe_slow ); 4848 %} 4849 4850 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4851 predicate(UseAVX > 2); 4852 match(Set dst (AddReductionVI src1 src2)); 4853 effect(TEMP tmp, TEMP tmp2); 4854 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4855 "vpaddd $tmp,$src2,$tmp2\n\t" 4856 "movd $tmp2,$src1\n\t" 4857 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4858 "movd $dst,$tmp2\t! add reduction2I" %} 4859 ins_encode %{ 4860 int vector_len = 0; 4861 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4862 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4863 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4864 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4865 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4866 %} 4867 ins_pipe( pipe_slow ); 4868 %} 4869 4870 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4871 predicate(UseSSE > 2 && UseAVX == 0); 4872 match(Set dst (AddReductionVI src1 src2)); 4873 effect(TEMP tmp, TEMP tmp2); 4874 format %{ "movdqu $tmp,$src2\n\t" 4875 "phaddd $tmp,$tmp\n\t" 4876 "phaddd $tmp,$tmp\n\t" 4877 "movd $tmp2,$src1\n\t" 4878 "paddd $tmp2,$tmp\n\t" 4879 "movd $dst,$tmp2\t! add reduction4I" %} 4880 ins_encode %{ 4881 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4882 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4883 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4884 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4885 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4886 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4887 %} 4888 ins_pipe( pipe_slow ); 4889 %} 4890 4891 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4892 predicate(VM_Version::supports_avxonly()); 4893 match(Set dst (AddReductionVI src1 src2)); 4894 effect(TEMP tmp, TEMP tmp2); 4895 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4896 "vphaddd $tmp,$tmp,$tmp\n\t" 4897 "movd $tmp2,$src1\n\t" 4898 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4899 "movd $dst,$tmp2\t! add reduction4I" %} 4900 ins_encode %{ 4901 int vector_len = 0; 4902 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4903 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4904 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4905 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4906 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4907 %} 4908 ins_pipe( pipe_slow ); 4909 %} 4910 4911 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4912 predicate(UseAVX > 2); 4913 match(Set dst (AddReductionVI src1 src2)); 4914 effect(TEMP tmp, TEMP tmp2); 4915 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4916 "vpaddd $tmp,$src2,$tmp2\n\t" 4917 "pshufd $tmp2,$tmp,0x1\n\t" 4918 "vpaddd $tmp,$tmp,$tmp2\n\t" 4919 "movd $tmp2,$src1\n\t" 4920 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4921 "movd $dst,$tmp2\t! add reduction4I" %} 4922 ins_encode %{ 4923 int vector_len = 0; 4924 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4925 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4926 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4927 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4928 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4929 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4930 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4931 %} 4932 ins_pipe( pipe_slow ); 4933 %} 4934 4935 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4936 predicate(VM_Version::supports_avxonly()); 4937 match(Set dst (AddReductionVI src1 src2)); 4938 effect(TEMP tmp, TEMP tmp2); 4939 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4940 "vphaddd $tmp,$tmp,$tmp2\n\t" 4941 "vextracti128_high $tmp2,$tmp\n\t" 4942 "vpaddd $tmp,$tmp,$tmp2\n\t" 4943 "movd $tmp2,$src1\n\t" 4944 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4945 "movd $dst,$tmp2\t! add reduction8I" %} 4946 ins_encode %{ 4947 int vector_len = 1; 4948 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4949 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4950 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4951 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4952 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4953 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4954 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4955 %} 4956 ins_pipe( pipe_slow ); 4957 %} 4958 4959 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4960 predicate(UseAVX > 2); 4961 match(Set dst (AddReductionVI src1 src2)); 4962 effect(TEMP tmp, TEMP tmp2); 4963 format %{ "vextracti128_high $tmp,$src2\n\t" 4964 "vpaddd $tmp,$tmp,$src2\n\t" 4965 "pshufd $tmp2,$tmp,0xE\n\t" 4966 "vpaddd $tmp,$tmp,$tmp2\n\t" 4967 "pshufd $tmp2,$tmp,0x1\n\t" 4968 "vpaddd $tmp,$tmp,$tmp2\n\t" 4969 "movd $tmp2,$src1\n\t" 4970 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4971 "movd $dst,$tmp2\t! add reduction8I" %} 4972 ins_encode %{ 4973 int vector_len = 0; 4974 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4975 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4976 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4977 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4978 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4979 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4980 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4981 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4982 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 4987 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4988 predicate(UseAVX > 2); 4989 match(Set dst (AddReductionVI src1 src2)); 4990 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4991 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4992 "vpaddd $tmp3,$tmp3,$src2\n\t" 4993 "vextracti128_high $tmp,$tmp3\n\t" 4994 "vpaddd $tmp,$tmp,$tmp3\n\t" 4995 "pshufd $tmp2,$tmp,0xE\n\t" 4996 "vpaddd $tmp,$tmp,$tmp2\n\t" 4997 "pshufd $tmp2,$tmp,0x1\n\t" 4998 "vpaddd $tmp,$tmp,$tmp2\n\t" 4999 "movd $tmp2,$src1\n\t" 5000 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5001 "movd $dst,$tmp2\t! mul reduction16I" %} 5002 ins_encode %{ 5003 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5004 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5005 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5006 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5007 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5008 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5009 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5010 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5011 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5012 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5013 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5014 %} 5015 ins_pipe( pipe_slow ); 5016 %} 5017 5018 #ifdef _LP64 5019 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5020 predicate(UseAVX > 2); 5021 match(Set dst (AddReductionVL src1 src2)); 5022 effect(TEMP tmp, TEMP tmp2); 5023 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5024 "vpaddq $tmp,$src2,$tmp2\n\t" 5025 "movdq $tmp2,$src1\n\t" 5026 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5027 "movdq $dst,$tmp2\t! add reduction2L" %} 5028 ins_encode %{ 5029 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5030 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5031 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5032 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5033 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5034 %} 5035 ins_pipe( pipe_slow ); 5036 %} 5037 5038 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5039 predicate(UseAVX > 2); 5040 match(Set dst (AddReductionVL src1 src2)); 5041 effect(TEMP tmp, TEMP tmp2); 5042 format %{ "vextracti128_high $tmp,$src2\n\t" 5043 "vpaddq $tmp2,$tmp,$src2\n\t" 5044 "pshufd $tmp,$tmp2,0xE\n\t" 5045 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5046 "movdq $tmp,$src1\n\t" 5047 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5048 "movdq $dst,$tmp2\t! add reduction4L" %} 5049 ins_encode %{ 5050 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5051 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5052 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5053 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5054 __ movdq($tmp$$XMMRegister, $src1$$Register); 5055 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5056 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5057 %} 5058 ins_pipe( pipe_slow ); 5059 %} 5060 5061 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5062 predicate(UseAVX > 2); 5063 match(Set dst (AddReductionVL src1 src2)); 5064 effect(TEMP tmp, TEMP tmp2); 5065 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5066 "vpaddq $tmp2,$tmp2,$src2\n\t" 5067 "vextracti128_high $tmp,$tmp2\n\t" 5068 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5069 "pshufd $tmp,$tmp2,0xE\n\t" 5070 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5071 "movdq $tmp,$src1\n\t" 5072 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5073 "movdq $dst,$tmp2\t! add reduction8L" %} 5074 ins_encode %{ 5075 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5076 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5077 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5078 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5079 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5080 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5081 __ movdq($tmp$$XMMRegister, $src1$$Register); 5082 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5083 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5084 %} 5085 ins_pipe( pipe_slow ); 5086 %} 5087 #endif 5088 5089 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5090 predicate(UseSSE >= 1 && UseAVX == 0); 5091 match(Set dst (AddReductionVF dst src2)); 5092 effect(TEMP dst, TEMP tmp); 5093 format %{ "addss $dst,$src2\n\t" 5094 "pshufd $tmp,$src2,0x01\n\t" 5095 "addss $dst,$tmp\t! add reduction2F" %} 5096 ins_encode %{ 5097 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5098 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5099 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5100 %} 5101 ins_pipe( pipe_slow ); 5102 %} 5103 5104 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5105 predicate(UseAVX > 0); 5106 match(Set dst (AddReductionVF dst src2)); 5107 effect(TEMP dst, TEMP tmp); 5108 format %{ "vaddss $dst,$dst,$src2\n\t" 5109 "pshufd $tmp,$src2,0x01\n\t" 5110 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5111 ins_encode %{ 5112 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5113 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5114 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5115 %} 5116 ins_pipe( pipe_slow ); 5117 %} 5118 5119 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5120 predicate(UseSSE >= 1 && UseAVX == 0); 5121 match(Set dst (AddReductionVF dst src2)); 5122 effect(TEMP dst, TEMP tmp); 5123 format %{ "addss $dst,$src2\n\t" 5124 "pshufd $tmp,$src2,0x01\n\t" 5125 "addss $dst,$tmp\n\t" 5126 "pshufd $tmp,$src2,0x02\n\t" 5127 "addss $dst,$tmp\n\t" 5128 "pshufd $tmp,$src2,0x03\n\t" 5129 "addss $dst,$tmp\t! add reduction4F" %} 5130 ins_encode %{ 5131 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5132 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5133 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5134 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5135 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5136 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5137 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5138 %} 5139 ins_pipe( pipe_slow ); 5140 %} 5141 5142 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5143 predicate(UseAVX > 0); 5144 match(Set dst (AddReductionVF dst src2)); 5145 effect(TEMP tmp, TEMP dst); 5146 format %{ "vaddss $dst,dst,$src2\n\t" 5147 "pshufd $tmp,$src2,0x01\n\t" 5148 "vaddss $dst,$dst,$tmp\n\t" 5149 "pshufd $tmp,$src2,0x02\n\t" 5150 "vaddss $dst,$dst,$tmp\n\t" 5151 "pshufd $tmp,$src2,0x03\n\t" 5152 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5153 ins_encode %{ 5154 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5155 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5156 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5157 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5158 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5159 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5160 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5161 %} 5162 ins_pipe( pipe_slow ); 5163 %} 5164 5165 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5166 predicate(UseAVX > 0); 5167 match(Set dst (AddReductionVF dst src2)); 5168 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5169 format %{ "vaddss $dst,$dst,$src2\n\t" 5170 "pshufd $tmp,$src2,0x01\n\t" 5171 "vaddss $dst,$dst,$tmp\n\t" 5172 "pshufd $tmp,$src2,0x02\n\t" 5173 "vaddss $dst,$dst,$tmp\n\t" 5174 "pshufd $tmp,$src2,0x03\n\t" 5175 "vaddss $dst,$dst,$tmp\n\t" 5176 "vextractf128_high $tmp2,$src2\n\t" 5177 "vaddss $dst,$dst,$tmp2\n\t" 5178 "pshufd $tmp,$tmp2,0x01\n\t" 5179 "vaddss $dst,$dst,$tmp\n\t" 5180 "pshufd $tmp,$tmp2,0x02\n\t" 5181 "vaddss $dst,$dst,$tmp\n\t" 5182 "pshufd $tmp,$tmp2,0x03\n\t" 5183 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5184 ins_encode %{ 5185 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5186 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5187 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5188 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5189 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5190 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5191 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5192 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5193 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5194 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5195 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5196 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5197 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5198 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5199 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5200 %} 5201 ins_pipe( pipe_slow ); 5202 %} 5203 5204 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5205 predicate(UseAVX > 2); 5206 match(Set dst (AddReductionVF dst src2)); 5207 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5208 format %{ "vaddss $dst,$dst,$src2\n\t" 5209 "pshufd $tmp,$src2,0x01\n\t" 5210 "vaddss $dst,$dst,$tmp\n\t" 5211 "pshufd $tmp,$src2,0x02\n\t" 5212 "vaddss $dst,$dst,$tmp\n\t" 5213 "pshufd $tmp,$src2,0x03\n\t" 5214 "vaddss $dst,$dst,$tmp\n\t" 5215 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5216 "vaddss $dst,$dst,$tmp2\n\t" 5217 "pshufd $tmp,$tmp2,0x01\n\t" 5218 "vaddss $dst,$dst,$tmp\n\t" 5219 "pshufd $tmp,$tmp2,0x02\n\t" 5220 "vaddss $dst,$dst,$tmp\n\t" 5221 "pshufd $tmp,$tmp2,0x03\n\t" 5222 "vaddss $dst,$dst,$tmp\n\t" 5223 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5224 "vaddss $dst,$dst,$tmp2\n\t" 5225 "pshufd $tmp,$tmp2,0x01\n\t" 5226 "vaddss $dst,$dst,$tmp\n\t" 5227 "pshufd $tmp,$tmp2,0x02\n\t" 5228 "vaddss $dst,$dst,$tmp\n\t" 5229 "pshufd $tmp,$tmp2,0x03\n\t" 5230 "vaddss $dst,$dst,$tmp\n\t" 5231 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5232 "vaddss $dst,$dst,$tmp2\n\t" 5233 "pshufd $tmp,$tmp2,0x01\n\t" 5234 "vaddss $dst,$dst,$tmp\n\t" 5235 "pshufd $tmp,$tmp2,0x02\n\t" 5236 "vaddss $dst,$dst,$tmp\n\t" 5237 "pshufd $tmp,$tmp2,0x03\n\t" 5238 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5239 ins_encode %{ 5240 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5241 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5242 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5243 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5244 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5245 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5246 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5247 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5248 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5249 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5250 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5251 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5252 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5253 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5254 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5255 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5256 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5257 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5258 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5259 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5260 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5261 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5262 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5263 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5264 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5265 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5266 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5267 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5268 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5269 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5270 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5271 %} 5272 ins_pipe( pipe_slow ); 5273 %} 5274 5275 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5276 predicate(UseSSE >= 1 && UseAVX == 0); 5277 match(Set dst (AddReductionVD dst src2)); 5278 effect(TEMP tmp, TEMP dst); 5279 format %{ "addsd $dst,$src2\n\t" 5280 "pshufd $tmp,$src2,0xE\n\t" 5281 "addsd $dst,$tmp\t! add reduction2D" %} 5282 ins_encode %{ 5283 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5284 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5285 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5286 %} 5287 ins_pipe( pipe_slow ); 5288 %} 5289 5290 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5291 predicate(UseAVX > 0); 5292 match(Set dst (AddReductionVD dst src2)); 5293 effect(TEMP tmp, TEMP dst); 5294 format %{ "vaddsd $dst,$dst,$src2\n\t" 5295 "pshufd $tmp,$src2,0xE\n\t" 5296 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5297 ins_encode %{ 5298 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5299 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5300 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5301 %} 5302 ins_pipe( pipe_slow ); 5303 %} 5304 5305 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5306 predicate(UseAVX > 0); 5307 match(Set dst (AddReductionVD dst src2)); 5308 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5309 format %{ "vaddsd $dst,$dst,$src2\n\t" 5310 "pshufd $tmp,$src2,0xE\n\t" 5311 "vaddsd $dst,$dst,$tmp\n\t" 5312 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5313 "vaddsd $dst,$dst,$tmp2\n\t" 5314 "pshufd $tmp,$tmp2,0xE\n\t" 5315 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5316 ins_encode %{ 5317 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5318 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5319 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5320 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5321 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5322 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5323 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5324 %} 5325 ins_pipe( pipe_slow ); 5326 %} 5327 5328 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5329 predicate(UseAVX > 2); 5330 match(Set dst (AddReductionVD dst src2)); 5331 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5332 format %{ "vaddsd $dst,$dst,$src2\n\t" 5333 "pshufd $tmp,$src2,0xE\n\t" 5334 "vaddsd $dst,$dst,$tmp\n\t" 5335 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5336 "vaddsd $dst,$dst,$tmp2\n\t" 5337 "pshufd $tmp,$tmp2,0xE\n\t" 5338 "vaddsd $dst,$dst,$tmp\n\t" 5339 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5340 "vaddsd $dst,$dst,$tmp2\n\t" 5341 "pshufd $tmp,$tmp2,0xE\n\t" 5342 "vaddsd $dst,$dst,$tmp\n\t" 5343 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5344 "vaddsd $dst,$dst,$tmp2\n\t" 5345 "pshufd $tmp,$tmp2,0xE\n\t" 5346 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5347 ins_encode %{ 5348 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5349 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5350 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5351 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5352 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5353 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5354 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5355 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5356 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5357 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5358 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5359 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5360 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5361 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5362 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5363 %} 5364 ins_pipe( pipe_slow ); 5365 %} 5366 5367 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5368 predicate(UseSSE > 3 && UseAVX == 0); 5369 match(Set dst (MulReductionVI src1 src2)); 5370 effect(TEMP tmp, TEMP tmp2); 5371 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5372 "pmulld $tmp2,$src2\n\t" 5373 "movd $tmp,$src1\n\t" 5374 "pmulld $tmp2,$tmp\n\t" 5375 "movd $dst,$tmp2\t! mul reduction2I" %} 5376 ins_encode %{ 5377 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5378 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5379 __ movdl($tmp$$XMMRegister, $src1$$Register); 5380 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5381 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5382 %} 5383 ins_pipe( pipe_slow ); 5384 %} 5385 5386 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5387 predicate(UseAVX > 0); 5388 match(Set dst (MulReductionVI src1 src2)); 5389 effect(TEMP tmp, TEMP tmp2); 5390 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5391 "vpmulld $tmp,$src2,$tmp2\n\t" 5392 "movd $tmp2,$src1\n\t" 5393 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5394 "movd $dst,$tmp2\t! mul reduction2I" %} 5395 ins_encode %{ 5396 int vector_len = 0; 5397 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5398 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5399 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5400 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5401 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5402 %} 5403 ins_pipe( pipe_slow ); 5404 %} 5405 5406 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5407 predicate(UseSSE > 3 && UseAVX == 0); 5408 match(Set dst (MulReductionVI src1 src2)); 5409 effect(TEMP tmp, TEMP tmp2); 5410 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5411 "pmulld $tmp2,$src2\n\t" 5412 "pshufd $tmp,$tmp2,0x1\n\t" 5413 "pmulld $tmp2,$tmp\n\t" 5414 "movd $tmp,$src1\n\t" 5415 "pmulld $tmp2,$tmp\n\t" 5416 "movd $dst,$tmp2\t! mul reduction4I" %} 5417 ins_encode %{ 5418 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5419 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5420 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5421 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5422 __ movdl($tmp$$XMMRegister, $src1$$Register); 5423 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5424 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5425 %} 5426 ins_pipe( pipe_slow ); 5427 %} 5428 5429 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5430 predicate(UseAVX > 0); 5431 match(Set dst (MulReductionVI src1 src2)); 5432 effect(TEMP tmp, TEMP tmp2); 5433 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5434 "vpmulld $tmp,$src2,$tmp2\n\t" 5435 "pshufd $tmp2,$tmp,0x1\n\t" 5436 "vpmulld $tmp,$tmp,$tmp2\n\t" 5437 "movd $tmp2,$src1\n\t" 5438 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5439 "movd $dst,$tmp2\t! mul reduction4I" %} 5440 ins_encode %{ 5441 int vector_len = 0; 5442 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5443 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5444 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5445 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5446 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5447 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5448 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5449 %} 5450 ins_pipe( pipe_slow ); 5451 %} 5452 5453 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5454 predicate(UseAVX > 0); 5455 match(Set dst (MulReductionVI src1 src2)); 5456 effect(TEMP tmp, TEMP tmp2); 5457 format %{ "vextracti128_high $tmp,$src2\n\t" 5458 "vpmulld $tmp,$tmp,$src2\n\t" 5459 "pshufd $tmp2,$tmp,0xE\n\t" 5460 "vpmulld $tmp,$tmp,$tmp2\n\t" 5461 "pshufd $tmp2,$tmp,0x1\n\t" 5462 "vpmulld $tmp,$tmp,$tmp2\n\t" 5463 "movd $tmp2,$src1\n\t" 5464 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5465 "movd $dst,$tmp2\t! mul reduction8I" %} 5466 ins_encode %{ 5467 int vector_len = 0; 5468 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5469 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5470 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5471 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5472 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5473 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5474 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5475 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5476 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5477 %} 5478 ins_pipe( pipe_slow ); 5479 %} 5480 5481 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5482 predicate(UseAVX > 2); 5483 match(Set dst (MulReductionVI src1 src2)); 5484 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5485 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5486 "vpmulld $tmp3,$tmp3,$src2\n\t" 5487 "vextracti128_high $tmp,$tmp3\n\t" 5488 "vpmulld $tmp,$tmp,$src2\n\t" 5489 "pshufd $tmp2,$tmp,0xE\n\t" 5490 "vpmulld $tmp,$tmp,$tmp2\n\t" 5491 "pshufd $tmp2,$tmp,0x1\n\t" 5492 "vpmulld $tmp,$tmp,$tmp2\n\t" 5493 "movd $tmp2,$src1\n\t" 5494 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5495 "movd $dst,$tmp2\t! mul reduction16I" %} 5496 ins_encode %{ 5497 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5498 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5499 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5500 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5501 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5502 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5503 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5504 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5505 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5506 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5507 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5508 %} 5509 ins_pipe( pipe_slow ); 5510 %} 5511 5512 #ifdef _LP64 5513 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5514 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5515 match(Set dst (MulReductionVL src1 src2)); 5516 effect(TEMP tmp, TEMP tmp2); 5517 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5518 "vpmullq $tmp,$src2,$tmp2\n\t" 5519 "movdq $tmp2,$src1\n\t" 5520 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5521 "movdq $dst,$tmp2\t! mul reduction2L" %} 5522 ins_encode %{ 5523 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5524 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5525 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5526 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5527 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5528 %} 5529 ins_pipe( pipe_slow ); 5530 %} 5531 5532 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5533 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5534 match(Set dst (MulReductionVL src1 src2)); 5535 effect(TEMP tmp, TEMP tmp2); 5536 format %{ "vextracti128_high $tmp,$src2\n\t" 5537 "vpmullq $tmp2,$tmp,$src2\n\t" 5538 "pshufd $tmp,$tmp2,0xE\n\t" 5539 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5540 "movdq $tmp,$src1\n\t" 5541 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5542 "movdq $dst,$tmp2\t! mul reduction4L" %} 5543 ins_encode %{ 5544 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5545 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5546 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5547 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5548 __ movdq($tmp$$XMMRegister, $src1$$Register); 5549 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5550 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5551 %} 5552 ins_pipe( pipe_slow ); 5553 %} 5554 5555 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5556 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5557 match(Set dst (MulReductionVL src1 src2)); 5558 effect(TEMP tmp, TEMP tmp2); 5559 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5560 "vpmullq $tmp2,$tmp2,$src2\n\t" 5561 "vextracti128_high $tmp,$tmp2\n\t" 5562 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5563 "pshufd $tmp,$tmp2,0xE\n\t" 5564 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5565 "movdq $tmp,$src1\n\t" 5566 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5567 "movdq $dst,$tmp2\t! mul reduction8L" %} 5568 ins_encode %{ 5569 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5570 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5571 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5572 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5573 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5574 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5575 __ movdq($tmp$$XMMRegister, $src1$$Register); 5576 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5577 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 #endif 5582 5583 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5584 predicate(UseSSE >= 1 && UseAVX == 0); 5585 match(Set dst (MulReductionVF dst src2)); 5586 effect(TEMP dst, TEMP tmp); 5587 format %{ "mulss $dst,$src2\n\t" 5588 "pshufd $tmp,$src2,0x01\n\t" 5589 "mulss $dst,$tmp\t! mul reduction2F" %} 5590 ins_encode %{ 5591 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5592 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5593 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5594 %} 5595 ins_pipe( pipe_slow ); 5596 %} 5597 5598 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5599 predicate(UseAVX > 0); 5600 match(Set dst (MulReductionVF dst src2)); 5601 effect(TEMP tmp, TEMP dst); 5602 format %{ "vmulss $dst,$dst,$src2\n\t" 5603 "pshufd $tmp,$src2,0x01\n\t" 5604 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5605 ins_encode %{ 5606 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5607 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5608 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5609 %} 5610 ins_pipe( pipe_slow ); 5611 %} 5612 5613 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5614 predicate(UseSSE >= 1 && UseAVX == 0); 5615 match(Set dst (MulReductionVF dst src2)); 5616 effect(TEMP dst, TEMP tmp); 5617 format %{ "mulss $dst,$src2\n\t" 5618 "pshufd $tmp,$src2,0x01\n\t" 5619 "mulss $dst,$tmp\n\t" 5620 "pshufd $tmp,$src2,0x02\n\t" 5621 "mulss $dst,$tmp\n\t" 5622 "pshufd $tmp,$src2,0x03\n\t" 5623 "mulss $dst,$tmp\t! mul reduction4F" %} 5624 ins_encode %{ 5625 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5626 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5627 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5628 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5629 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5630 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5631 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5637 predicate(UseAVX > 0); 5638 match(Set dst (MulReductionVF dst src2)); 5639 effect(TEMP tmp, TEMP dst); 5640 format %{ "vmulss $dst,$dst,$src2\n\t" 5641 "pshufd $tmp,$src2,0x01\n\t" 5642 "vmulss $dst,$dst,$tmp\n\t" 5643 "pshufd $tmp,$src2,0x02\n\t" 5644 "vmulss $dst,$dst,$tmp\n\t" 5645 "pshufd $tmp,$src2,0x03\n\t" 5646 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5647 ins_encode %{ 5648 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5649 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5650 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5651 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5652 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5653 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5654 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5655 %} 5656 ins_pipe( pipe_slow ); 5657 %} 5658 5659 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5660 predicate(UseAVX > 0); 5661 match(Set dst (MulReductionVF dst src2)); 5662 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5663 format %{ "vmulss $dst,$dst,$src2\n\t" 5664 "pshufd $tmp,$src2,0x01\n\t" 5665 "vmulss $dst,$dst,$tmp\n\t" 5666 "pshufd $tmp,$src2,0x02\n\t" 5667 "vmulss $dst,$dst,$tmp\n\t" 5668 "pshufd $tmp,$src2,0x03\n\t" 5669 "vmulss $dst,$dst,$tmp\n\t" 5670 "vextractf128_high $tmp2,$src2\n\t" 5671 "vmulss $dst,$dst,$tmp2\n\t" 5672 "pshufd $tmp,$tmp2,0x01\n\t" 5673 "vmulss $dst,$dst,$tmp\n\t" 5674 "pshufd $tmp,$tmp2,0x02\n\t" 5675 "vmulss $dst,$dst,$tmp\n\t" 5676 "pshufd $tmp,$tmp2,0x03\n\t" 5677 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5678 ins_encode %{ 5679 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5680 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5681 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5682 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5683 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5684 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5685 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5686 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5687 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5688 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5689 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5690 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5691 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5692 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5693 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5694 %} 5695 ins_pipe( pipe_slow ); 5696 %} 5697 5698 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5699 predicate(UseAVX > 2); 5700 match(Set dst (MulReductionVF dst src2)); 5701 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5702 format %{ "vmulss $dst,$dst,$src2\n\t" 5703 "pshufd $tmp,$src2,0x01\n\t" 5704 "vmulss $dst,$dst,$tmp\n\t" 5705 "pshufd $tmp,$src2,0x02\n\t" 5706 "vmulss $dst,$dst,$tmp\n\t" 5707 "pshufd $tmp,$src2,0x03\n\t" 5708 "vmulss $dst,$dst,$tmp\n\t" 5709 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5710 "vmulss $dst,$dst,$tmp2\n\t" 5711 "pshufd $tmp,$tmp2,0x01\n\t" 5712 "vmulss $dst,$dst,$tmp\n\t" 5713 "pshufd $tmp,$tmp2,0x02\n\t" 5714 "vmulss $dst,$dst,$tmp\n\t" 5715 "pshufd $tmp,$tmp2,0x03\n\t" 5716 "vmulss $dst,$dst,$tmp\n\t" 5717 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5718 "vmulss $dst,$dst,$tmp2\n\t" 5719 "pshufd $tmp,$tmp2,0x01\n\t" 5720 "vmulss $dst,$dst,$tmp\n\t" 5721 "pshufd $tmp,$tmp2,0x02\n\t" 5722 "vmulss $dst,$dst,$tmp\n\t" 5723 "pshufd $tmp,$tmp2,0x03\n\t" 5724 "vmulss $dst,$dst,$tmp\n\t" 5725 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5726 "vmulss $dst,$dst,$tmp2\n\t" 5727 "pshufd $tmp,$tmp2,0x01\n\t" 5728 "vmulss $dst,$dst,$tmp\n\t" 5729 "pshufd $tmp,$tmp2,0x02\n\t" 5730 "vmulss $dst,$dst,$tmp\n\t" 5731 "pshufd $tmp,$tmp2,0x03\n\t" 5732 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5733 ins_encode %{ 5734 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5735 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5736 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5737 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5738 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5739 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5740 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5741 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5742 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5743 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5744 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5745 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5746 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5747 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5748 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5749 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5750 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5751 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5752 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5753 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5754 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5755 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5756 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5757 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5758 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5759 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5760 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5761 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5762 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5763 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5764 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5765 %} 5766 ins_pipe( pipe_slow ); 5767 %} 5768 5769 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5770 predicate(UseSSE >= 1 && UseAVX == 0); 5771 match(Set dst (MulReductionVD dst src2)); 5772 effect(TEMP dst, TEMP tmp); 5773 format %{ "mulsd $dst,$src2\n\t" 5774 "pshufd $tmp,$src2,0xE\n\t" 5775 "mulsd $dst,$tmp\t! mul reduction2D" %} 5776 ins_encode %{ 5777 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5778 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5779 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5780 %} 5781 ins_pipe( pipe_slow ); 5782 %} 5783 5784 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5785 predicate(UseAVX > 0); 5786 match(Set dst (MulReductionVD dst src2)); 5787 effect(TEMP tmp, TEMP dst); 5788 format %{ "vmulsd $dst,$dst,$src2\n\t" 5789 "pshufd $tmp,$src2,0xE\n\t" 5790 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5791 ins_encode %{ 5792 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5793 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5794 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5795 %} 5796 ins_pipe( pipe_slow ); 5797 %} 5798 5799 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5800 predicate(UseAVX > 0); 5801 match(Set dst (MulReductionVD dst src2)); 5802 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5803 format %{ "vmulsd $dst,$dst,$src2\n\t" 5804 "pshufd $tmp,$src2,0xE\n\t" 5805 "vmulsd $dst,$dst,$tmp\n\t" 5806 "vextractf128_high $tmp2,$src2\n\t" 5807 "vmulsd $dst,$dst,$tmp2\n\t" 5808 "pshufd $tmp,$tmp2,0xE\n\t" 5809 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5810 ins_encode %{ 5811 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5812 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5813 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5814 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5815 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5816 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5817 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5818 %} 5819 ins_pipe( pipe_slow ); 5820 %} 5821 5822 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5823 predicate(UseAVX > 2); 5824 match(Set dst (MulReductionVD dst src2)); 5825 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5826 format %{ "vmulsd $dst,$dst,$src2\n\t" 5827 "pshufd $tmp,$src2,0xE\n\t" 5828 "vmulsd $dst,$dst,$tmp\n\t" 5829 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5830 "vmulsd $dst,$dst,$tmp2\n\t" 5831 "pshufd $tmp,$src2,0xE\n\t" 5832 "vmulsd $dst,$dst,$tmp\n\t" 5833 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5834 "vmulsd $dst,$dst,$tmp2\n\t" 5835 "pshufd $tmp,$tmp2,0xE\n\t" 5836 "vmulsd $dst,$dst,$tmp\n\t" 5837 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5838 "vmulsd $dst,$dst,$tmp2\n\t" 5839 "pshufd $tmp,$tmp2,0xE\n\t" 5840 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5841 ins_encode %{ 5842 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5843 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5844 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5845 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5846 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5847 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5848 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5849 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5850 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5851 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5852 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5853 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5854 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5855 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5856 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct rsand2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5862 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 5863 match(Set dst (AndReductionV src1 src2)); 5864 effect(TEMP tmp, TEMP tmp2); 5865 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5866 "pand $tmp2,$src2\n\t" 5867 "movd $tmp,$src1\n\t" 5868 "pand $tmp2,$tmp\n\t" 5869 "movd $dst,$tmp2\t! and reduction2I" %} 5870 ins_encode %{ 5871 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5872 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 5873 __ movdl($tmp$$XMMRegister, $src1$$Register); 5874 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 5875 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5876 %} 5877 ins_pipe( pipe_slow ); 5878 %} 5879 5880 instruct rsand4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5881 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 5882 match(Set dst (AndReductionV src1 src2)); 5883 effect(TEMP tmp, TEMP tmp2); 5884 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5885 "pand $tmp2,$src2\n\t" 5886 "pshufd $tmp,$tmp2,0x1\n\t" 5887 "pand $tmp2,$tmp\n\t" 5888 "movd $tmp,$src1\n\t" 5889 "pand $tmp2,$tmp\n\t" 5890 "movd $dst,$tmp2\t! and reduction4I" %} 5891 ins_encode %{ 5892 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5893 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 5894 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5895 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 5896 __ movdl($tmp$$XMMRegister, $src1$$Register); 5897 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 5898 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5899 %} 5900 ins_pipe( pipe_slow ); 5901 %} 5902 5903 instruct rvand8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5904 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_INT); 5905 match(Set dst (AndReductionV src1 src2)); 5906 effect(TEMP tmp, TEMP tmp2); 5907 format %{ "vextracti128_high $tmp,$src2\n\t" 5908 "vpand $tmp,$tmp,$src2\n\t" 5909 "vpshufd $tmp2,$tmp,0xE\n\t" 5910 "vpand $tmp,$tmp,$tmp2\n\t" 5911 "vpshufd $tmp2,$tmp,0x1\n\t" 5912 "vpand $tmp,$tmp,$tmp2\n\t" 5913 "movd $tmp2,$src1\n\t" 5914 "vpand $tmp2,$tmp,$tmp2\n\t" 5915 "movd $dst,$tmp2\t! and reduction8I" %} 5916 ins_encode %{ 5917 int vector_len = 0; 5918 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5919 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5920 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 5921 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5922 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 5923 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5924 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5925 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5926 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5927 %} 5928 ins_pipe( pipe_slow ); 5929 %} 5930 5931 instruct rvand16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5932 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_INT); 5933 match(Set dst (AndReductionV src1 src2)); 5934 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5935 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5936 "vpand $tmp3,$tmp3,$src2\n\t" 5937 "vextracti128_high $tmp,$tmp3\n\t" 5938 "vpand $tmp,$tmp,$src2\n\t" 5939 "vpshufd $tmp2,$tmp,0xE\n\t" 5940 "vpand $tmp,$tmp,$tmp2\n\t" 5941 "vpshufd $tmp2,$tmp,0x1\n\t" 5942 "vpand $tmp,$tmp,$tmp2\n\t" 5943 "movd $tmp2,$src1\n\t" 5944 "vpand $tmp2,$tmp,$tmp2\n\t" 5945 "movd $dst,$tmp2\t! and reduction16I" %} 5946 ins_encode %{ 5947 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5948 __ vpand($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5949 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5950 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5951 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, 0); 5952 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5953 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, 0); 5954 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5955 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5956 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5957 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5958 %} 5959 ins_pipe( pipe_slow ); 5960 %} 5961 5962 instruct rsand2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5963 predicate(UseSSE >= 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 5964 match(Set dst (AndReductionV src1 src2)); 5965 effect(TEMP tmp, TEMP tmp2); 5966 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5967 "pand $tmp2,$src2\n\t" 5968 "movdq $tmp,$src1\n\t" 5969 "pand $tmp2,$tmp\n\t" 5970 "movq $dst,$tmp2\t! and reduction2L" %} 5971 ins_encode %{ 5972 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5973 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 5974 __ movdq($tmp$$XMMRegister, $src1$$Register); 5975 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 5976 __ movq($dst$$Register, $tmp2$$XMMRegister); 5977 %} 5978 ins_pipe( pipe_slow ); 5979 %} 5980 5981 instruct rvand4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5982 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_LONG); 5983 match(Set dst (AndReductionV src1 src2)); 5984 effect(TEMP tmp, TEMP tmp2); 5985 format %{ "vextracti128_high $tmp,$src2\n\t" 5986 "vpand $tmp2,$tmp,$src2\n\t" 5987 "vpshufd $tmp,$tmp2,0xE\n\t" 5988 "vpand $tmp2,$tmp2,$tmp\n\t" 5989 "movq $tmp,$src1\n\t" 5990 "vpand $tmp2,$tmp2,$tmp\n\t" 5991 "movq $dst,$tmp2\t! and reduction4L" %} 5992 ins_encode %{ 5993 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5994 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5995 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 5996 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5997 __ movq($tmp$$XMMRegister, $src1$$Register); 5998 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5999 __ movq($dst$$Register, $tmp2$$XMMRegister); 6000 %} 6001 ins_pipe( pipe_slow ); 6002 %} 6003 6004 #ifdef _LP64 6005 instruct rvand8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 6006 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 6007 match(Set dst (AndReductionV src1 src2)); 6008 effect(TEMP tmp, TEMP tmp2); 6009 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6010 "vpandq $tmp2,$tmp2,$src2\n\t" 6011 "vextracti128_high $tmp,$tmp2\n\t" 6012 "vpandq $tmp2,$tmp2,$tmp\n\t" 6013 "vpshufd $tmp,$tmp2,0xE\n\t" 6014 "vpandq $tmp2,$tmp2,$tmp\n\t" 6015 "movdq $tmp,$src1\n\t" 6016 "vpandq $tmp2,$tmp2,$tmp\n\t" 6017 "movdq $dst,$tmp2\t! and reduction8L" %} 6018 ins_encode %{ 6019 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6020 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6021 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6022 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6023 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 6024 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6025 __ movdq($tmp$$XMMRegister, $src1$$Register); 6026 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6027 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6028 %} 6029 ins_pipe( pipe_slow ); 6030 %} 6031 #endif 6032 6033 // ====================VECTOR ARITHMETIC======================================= 6034 6035 // --------------------------------- ADD -------------------------------------- 6036 6037 // Bytes vector add 6038 instruct vadd4B(vecS dst, vecS src) %{ 6039 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6040 match(Set dst (AddVB dst src)); 6041 format %{ "paddb $dst,$src\t! add packed4B" %} 6042 ins_encode %{ 6043 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6044 %} 6045 ins_pipe( pipe_slow ); 6046 %} 6047 6048 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6049 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6050 match(Set dst (AddVB src1 src2)); 6051 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 6052 ins_encode %{ 6053 int vector_len = 0; 6054 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6055 %} 6056 ins_pipe( pipe_slow ); 6057 %} 6058 6059 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6060 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6061 match(Set dst (AddVB src1 src2)); 6062 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 6063 ins_encode %{ 6064 int vector_len = 0; 6065 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6066 %} 6067 ins_pipe( pipe_slow ); 6068 %} 6069 6070 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6071 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6072 match(Set dst (AddVB dst src2)); 6073 effect(TEMP src1); 6074 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 6075 ins_encode %{ 6076 int vector_len = 0; 6077 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6078 %} 6079 ins_pipe( pipe_slow ); 6080 %} 6081 6082 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6083 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6084 match(Set dst (AddVB src (LoadVector mem))); 6085 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6086 ins_encode %{ 6087 int vector_len = 0; 6088 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6089 %} 6090 ins_pipe( pipe_slow ); 6091 %} 6092 6093 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6094 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6095 match(Set dst (AddVB src (LoadVector mem))); 6096 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6097 ins_encode %{ 6098 int vector_len = 0; 6099 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6100 %} 6101 ins_pipe( pipe_slow ); 6102 %} 6103 6104 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6105 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6106 match(Set dst (AddVB dst (LoadVector mem))); 6107 effect(TEMP src); 6108 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6109 ins_encode %{ 6110 int vector_len = 0; 6111 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6112 %} 6113 ins_pipe( pipe_slow ); 6114 %} 6115 6116 instruct vadd8B(vecD dst, vecD src) %{ 6117 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6118 match(Set dst (AddVB dst src)); 6119 format %{ "paddb $dst,$src\t! add packed8B" %} 6120 ins_encode %{ 6121 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6122 %} 6123 ins_pipe( pipe_slow ); 6124 %} 6125 6126 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6127 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6128 match(Set dst (AddVB src1 src2)); 6129 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6130 ins_encode %{ 6131 int vector_len = 0; 6132 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6133 %} 6134 ins_pipe( pipe_slow ); 6135 %} 6136 6137 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6138 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6139 match(Set dst (AddVB src1 src2)); 6140 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6141 ins_encode %{ 6142 int vector_len = 0; 6143 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6144 %} 6145 ins_pipe( pipe_slow ); 6146 %} 6147 6148 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6149 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6150 match(Set dst (AddVB dst src2)); 6151 effect(TEMP src1); 6152 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 6153 ins_encode %{ 6154 int vector_len = 0; 6155 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6156 %} 6157 ins_pipe( pipe_slow ); 6158 %} 6159 6160 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6161 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6162 match(Set dst (AddVB src (LoadVector mem))); 6163 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6164 ins_encode %{ 6165 int vector_len = 0; 6166 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6167 %} 6168 ins_pipe( pipe_slow ); 6169 %} 6170 6171 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6172 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6173 match(Set dst (AddVB src (LoadVector mem))); 6174 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6175 ins_encode %{ 6176 int vector_len = 0; 6177 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6178 %} 6179 ins_pipe( pipe_slow ); 6180 %} 6181 6182 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6183 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6184 match(Set dst (AddVB dst (LoadVector mem))); 6185 effect(TEMP src); 6186 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6187 ins_encode %{ 6188 int vector_len = 0; 6189 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6190 %} 6191 ins_pipe( pipe_slow ); 6192 %} 6193 6194 instruct vadd16B(vecX dst, vecX src) %{ 6195 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6196 match(Set dst (AddVB dst src)); 6197 format %{ "paddb $dst,$src\t! add packed16B" %} 6198 ins_encode %{ 6199 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6200 %} 6201 ins_pipe( pipe_slow ); 6202 %} 6203 6204 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6205 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6206 match(Set dst (AddVB src1 src2)); 6207 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6208 ins_encode %{ 6209 int vector_len = 0; 6210 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6211 %} 6212 ins_pipe( pipe_slow ); 6213 %} 6214 6215 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6216 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6217 match(Set dst (AddVB src1 src2)); 6218 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6219 ins_encode %{ 6220 int vector_len = 0; 6221 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6222 %} 6223 ins_pipe( pipe_slow ); 6224 %} 6225 6226 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6227 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6228 match(Set dst (AddVB dst src2)); 6229 effect(TEMP src1); 6230 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 6231 ins_encode %{ 6232 int vector_len = 0; 6233 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6234 %} 6235 ins_pipe( pipe_slow ); 6236 %} 6237 6238 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6239 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6240 match(Set dst (AddVB src (LoadVector mem))); 6241 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6242 ins_encode %{ 6243 int vector_len = 0; 6244 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6245 %} 6246 ins_pipe( pipe_slow ); 6247 %} 6248 6249 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6250 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6251 match(Set dst (AddVB src (LoadVector mem))); 6252 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6253 ins_encode %{ 6254 int vector_len = 0; 6255 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6256 %} 6257 ins_pipe( pipe_slow ); 6258 %} 6259 6260 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6261 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6262 match(Set dst (AddVB dst (LoadVector mem))); 6263 effect(TEMP src); 6264 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6265 ins_encode %{ 6266 int vector_len = 0; 6267 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6268 %} 6269 ins_pipe( pipe_slow ); 6270 %} 6271 6272 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6273 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6274 match(Set dst (AddVB src1 src2)); 6275 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6276 ins_encode %{ 6277 int vector_len = 1; 6278 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6279 %} 6280 ins_pipe( pipe_slow ); 6281 %} 6282 6283 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6284 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6285 match(Set dst (AddVB src1 src2)); 6286 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6287 ins_encode %{ 6288 int vector_len = 1; 6289 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6290 %} 6291 ins_pipe( pipe_slow ); 6292 %} 6293 6294 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6295 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6296 match(Set dst (AddVB dst src2)); 6297 effect(TEMP src1); 6298 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 6299 ins_encode %{ 6300 int vector_len = 1; 6301 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6307 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6308 match(Set dst (AddVB src (LoadVector mem))); 6309 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6310 ins_encode %{ 6311 int vector_len = 1; 6312 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6313 %} 6314 ins_pipe( pipe_slow ); 6315 %} 6316 6317 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6318 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6319 match(Set dst (AddVB src (LoadVector mem))); 6320 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6321 ins_encode %{ 6322 int vector_len = 1; 6323 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6329 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6330 match(Set dst (AddVB dst (LoadVector mem))); 6331 effect(TEMP src); 6332 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6333 ins_encode %{ 6334 int vector_len = 1; 6335 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6336 %} 6337 ins_pipe( pipe_slow ); 6338 %} 6339 6340 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6341 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6342 match(Set dst (AddVB src1 src2)); 6343 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6344 ins_encode %{ 6345 int vector_len = 2; 6346 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6347 %} 6348 ins_pipe( pipe_slow ); 6349 %} 6350 6351 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6352 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6353 match(Set dst (AddVB src (LoadVector mem))); 6354 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6355 ins_encode %{ 6356 int vector_len = 2; 6357 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6358 %} 6359 ins_pipe( pipe_slow ); 6360 %} 6361 6362 // Shorts/Chars vector add 6363 instruct vadd2S(vecS dst, vecS src) %{ 6364 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6365 match(Set dst (AddVS dst src)); 6366 format %{ "paddw $dst,$src\t! add packed2S" %} 6367 ins_encode %{ 6368 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6369 %} 6370 ins_pipe( pipe_slow ); 6371 %} 6372 6373 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6374 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6375 match(Set dst (AddVS src1 src2)); 6376 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6377 ins_encode %{ 6378 int vector_len = 0; 6379 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6380 %} 6381 ins_pipe( pipe_slow ); 6382 %} 6383 6384 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6385 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6386 match(Set dst (AddVS src1 src2)); 6387 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6388 ins_encode %{ 6389 int vector_len = 0; 6390 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6396 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6397 match(Set dst (AddVS dst src2)); 6398 effect(TEMP src1); 6399 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6400 ins_encode %{ 6401 int vector_len = 0; 6402 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6403 %} 6404 ins_pipe( pipe_slow ); 6405 %} 6406 6407 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6408 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6409 match(Set dst (AddVS src (LoadVector mem))); 6410 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6411 ins_encode %{ 6412 int vector_len = 0; 6413 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6414 %} 6415 ins_pipe( pipe_slow ); 6416 %} 6417 6418 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6419 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6420 match(Set dst (AddVS src (LoadVector mem))); 6421 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6422 ins_encode %{ 6423 int vector_len = 0; 6424 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6425 %} 6426 ins_pipe( pipe_slow ); 6427 %} 6428 6429 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6430 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6431 match(Set dst (AddVS dst (LoadVector mem))); 6432 effect(TEMP src); 6433 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6434 ins_encode %{ 6435 int vector_len = 0; 6436 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6437 %} 6438 ins_pipe( pipe_slow ); 6439 %} 6440 6441 instruct vadd4S(vecD dst, vecD src) %{ 6442 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6443 match(Set dst (AddVS dst src)); 6444 format %{ "paddw $dst,$src\t! add packed4S" %} 6445 ins_encode %{ 6446 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6447 %} 6448 ins_pipe( pipe_slow ); 6449 %} 6450 6451 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6452 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6453 match(Set dst (AddVS src1 src2)); 6454 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6455 ins_encode %{ 6456 int vector_len = 0; 6457 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6458 %} 6459 ins_pipe( pipe_slow ); 6460 %} 6461 6462 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6463 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6464 match(Set dst (AddVS src1 src2)); 6465 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6466 ins_encode %{ 6467 int vector_len = 0; 6468 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6469 %} 6470 ins_pipe( pipe_slow ); 6471 %} 6472 6473 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6474 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6475 match(Set dst (AddVS dst src2)); 6476 effect(TEMP src1); 6477 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6478 ins_encode %{ 6479 int vector_len = 0; 6480 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6481 %} 6482 ins_pipe( pipe_slow ); 6483 %} 6484 6485 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6486 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6487 match(Set dst (AddVS src (LoadVector mem))); 6488 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6489 ins_encode %{ 6490 int vector_len = 0; 6491 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6492 %} 6493 ins_pipe( pipe_slow ); 6494 %} 6495 6496 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6497 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6498 match(Set dst (AddVS src (LoadVector mem))); 6499 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6500 ins_encode %{ 6501 int vector_len = 0; 6502 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6503 %} 6504 ins_pipe( pipe_slow ); 6505 %} 6506 6507 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6508 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6509 match(Set dst (AddVS dst (LoadVector mem))); 6510 effect(TEMP src); 6511 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6512 ins_encode %{ 6513 int vector_len = 0; 6514 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6515 %} 6516 ins_pipe( pipe_slow ); 6517 %} 6518 6519 instruct vadd8S(vecX dst, vecX src) %{ 6520 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6521 match(Set dst (AddVS dst src)); 6522 format %{ "paddw $dst,$src\t! add packed8S" %} 6523 ins_encode %{ 6524 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6525 %} 6526 ins_pipe( pipe_slow ); 6527 %} 6528 6529 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6530 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6531 match(Set dst (AddVS src1 src2)); 6532 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6533 ins_encode %{ 6534 int vector_len = 0; 6535 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6536 %} 6537 ins_pipe( pipe_slow ); 6538 %} 6539 6540 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6541 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6542 match(Set dst (AddVS src1 src2)); 6543 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6544 ins_encode %{ 6545 int vector_len = 0; 6546 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6547 %} 6548 ins_pipe( pipe_slow ); 6549 %} 6550 6551 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6552 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6553 match(Set dst (AddVS dst src2)); 6554 effect(TEMP src1); 6555 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6556 ins_encode %{ 6557 int vector_len = 0; 6558 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6559 %} 6560 ins_pipe( pipe_slow ); 6561 %} 6562 6563 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6564 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6565 match(Set dst (AddVS src (LoadVector mem))); 6566 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6567 ins_encode %{ 6568 int vector_len = 0; 6569 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6570 %} 6571 ins_pipe( pipe_slow ); 6572 %} 6573 6574 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6575 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6576 match(Set dst (AddVS src (LoadVector mem))); 6577 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6578 ins_encode %{ 6579 int vector_len = 0; 6580 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6581 %} 6582 ins_pipe( pipe_slow ); 6583 %} 6584 6585 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6586 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6587 match(Set dst (AddVS dst (LoadVector mem))); 6588 effect(TEMP src); 6589 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6590 ins_encode %{ 6591 int vector_len = 0; 6592 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6593 %} 6594 ins_pipe( pipe_slow ); 6595 %} 6596 6597 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6598 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6599 match(Set dst (AddVS src1 src2)); 6600 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6601 ins_encode %{ 6602 int vector_len = 1; 6603 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6604 %} 6605 ins_pipe( pipe_slow ); 6606 %} 6607 6608 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6609 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6610 match(Set dst (AddVS src1 src2)); 6611 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6612 ins_encode %{ 6613 int vector_len = 1; 6614 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6615 %} 6616 ins_pipe( pipe_slow ); 6617 %} 6618 6619 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6620 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6621 match(Set dst (AddVS dst src2)); 6622 effect(TEMP src1); 6623 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6624 ins_encode %{ 6625 int vector_len = 1; 6626 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6627 %} 6628 ins_pipe( pipe_slow ); 6629 %} 6630 6631 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6632 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6633 match(Set dst (AddVS src (LoadVector mem))); 6634 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6635 ins_encode %{ 6636 int vector_len = 1; 6637 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6638 %} 6639 ins_pipe( pipe_slow ); 6640 %} 6641 6642 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6643 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6644 match(Set dst (AddVS src (LoadVector mem))); 6645 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6646 ins_encode %{ 6647 int vector_len = 1; 6648 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6649 %} 6650 ins_pipe( pipe_slow ); 6651 %} 6652 6653 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6654 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6655 match(Set dst (AddVS dst (LoadVector mem))); 6656 effect(TEMP src); 6657 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6658 ins_encode %{ 6659 int vector_len = 1; 6660 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6661 %} 6662 ins_pipe( pipe_slow ); 6663 %} 6664 6665 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6666 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6667 match(Set dst (AddVS src1 src2)); 6668 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6669 ins_encode %{ 6670 int vector_len = 2; 6671 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6672 %} 6673 ins_pipe( pipe_slow ); 6674 %} 6675 6676 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6677 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6678 match(Set dst (AddVS src (LoadVector mem))); 6679 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6680 ins_encode %{ 6681 int vector_len = 2; 6682 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6683 %} 6684 ins_pipe( pipe_slow ); 6685 %} 6686 6687 // Integers vector add 6688 instruct vadd2I(vecD dst, vecD src) %{ 6689 predicate(n->as_Vector()->length() == 2); 6690 match(Set dst (AddVI dst src)); 6691 format %{ "paddd $dst,$src\t! add packed2I" %} 6692 ins_encode %{ 6693 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6694 %} 6695 ins_pipe( pipe_slow ); 6696 %} 6697 6698 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6699 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6700 match(Set dst (AddVI src1 src2)); 6701 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6702 ins_encode %{ 6703 int vector_len = 0; 6704 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6705 %} 6706 ins_pipe( pipe_slow ); 6707 %} 6708 6709 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6710 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6711 match(Set dst (AddVI src (LoadVector mem))); 6712 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6713 ins_encode %{ 6714 int vector_len = 0; 6715 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6716 %} 6717 ins_pipe( pipe_slow ); 6718 %} 6719 6720 instruct vadd4I(vecX dst, vecX src) %{ 6721 predicate(n->as_Vector()->length() == 4); 6722 match(Set dst (AddVI dst src)); 6723 format %{ "paddd $dst,$src\t! add packed4I" %} 6724 ins_encode %{ 6725 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6726 %} 6727 ins_pipe( pipe_slow ); 6728 %} 6729 6730 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6731 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6732 match(Set dst (AddVI src1 src2)); 6733 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6734 ins_encode %{ 6735 int vector_len = 0; 6736 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6737 %} 6738 ins_pipe( pipe_slow ); 6739 %} 6740 6741 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6742 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6743 match(Set dst (AddVI src (LoadVector mem))); 6744 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6745 ins_encode %{ 6746 int vector_len = 0; 6747 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6748 %} 6749 ins_pipe( pipe_slow ); 6750 %} 6751 6752 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6753 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6754 match(Set dst (AddVI src1 src2)); 6755 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6756 ins_encode %{ 6757 int vector_len = 1; 6758 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6759 %} 6760 ins_pipe( pipe_slow ); 6761 %} 6762 6763 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6764 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6765 match(Set dst (AddVI src (LoadVector mem))); 6766 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6767 ins_encode %{ 6768 int vector_len = 1; 6769 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6770 %} 6771 ins_pipe( pipe_slow ); 6772 %} 6773 6774 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6775 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6776 match(Set dst (AddVI src1 src2)); 6777 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6778 ins_encode %{ 6779 int vector_len = 2; 6780 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6781 %} 6782 ins_pipe( pipe_slow ); 6783 %} 6784 6785 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6786 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6787 match(Set dst (AddVI src (LoadVector mem))); 6788 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6789 ins_encode %{ 6790 int vector_len = 2; 6791 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6792 %} 6793 ins_pipe( pipe_slow ); 6794 %} 6795 6796 // Longs vector add 6797 instruct vadd2L(vecX dst, vecX src) %{ 6798 predicate(n->as_Vector()->length() == 2); 6799 match(Set dst (AddVL dst src)); 6800 format %{ "paddq $dst,$src\t! add packed2L" %} 6801 ins_encode %{ 6802 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6803 %} 6804 ins_pipe( pipe_slow ); 6805 %} 6806 6807 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6808 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6809 match(Set dst (AddVL src1 src2)); 6810 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6811 ins_encode %{ 6812 int vector_len = 0; 6813 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6814 %} 6815 ins_pipe( pipe_slow ); 6816 %} 6817 6818 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6819 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6820 match(Set dst (AddVL src (LoadVector mem))); 6821 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6822 ins_encode %{ 6823 int vector_len = 0; 6824 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6825 %} 6826 ins_pipe( pipe_slow ); 6827 %} 6828 6829 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6830 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6831 match(Set dst (AddVL src1 src2)); 6832 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6833 ins_encode %{ 6834 int vector_len = 1; 6835 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6836 %} 6837 ins_pipe( pipe_slow ); 6838 %} 6839 6840 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6841 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6842 match(Set dst (AddVL src (LoadVector mem))); 6843 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6844 ins_encode %{ 6845 int vector_len = 1; 6846 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6847 %} 6848 ins_pipe( pipe_slow ); 6849 %} 6850 6851 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6852 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6853 match(Set dst (AddVL src1 src2)); 6854 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6855 ins_encode %{ 6856 int vector_len = 2; 6857 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6858 %} 6859 ins_pipe( pipe_slow ); 6860 %} 6861 6862 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6863 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6864 match(Set dst (AddVL src (LoadVector mem))); 6865 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6866 ins_encode %{ 6867 int vector_len = 2; 6868 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6869 %} 6870 ins_pipe( pipe_slow ); 6871 %} 6872 6873 // Floats vector add 6874 instruct vadd2F(vecD dst, vecD src) %{ 6875 predicate(n->as_Vector()->length() == 2); 6876 match(Set dst (AddVF dst src)); 6877 format %{ "addps $dst,$src\t! add packed2F" %} 6878 ins_encode %{ 6879 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6880 %} 6881 ins_pipe( pipe_slow ); 6882 %} 6883 6884 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6885 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6886 match(Set dst (AddVF src1 src2)); 6887 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6888 ins_encode %{ 6889 int vector_len = 0; 6890 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6891 %} 6892 ins_pipe( pipe_slow ); 6893 %} 6894 6895 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6896 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6897 match(Set dst (AddVF src (LoadVector mem))); 6898 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6899 ins_encode %{ 6900 int vector_len = 0; 6901 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6902 %} 6903 ins_pipe( pipe_slow ); 6904 %} 6905 6906 instruct vadd4F(vecX dst, vecX src) %{ 6907 predicate(n->as_Vector()->length() == 4); 6908 match(Set dst (AddVF dst src)); 6909 format %{ "addps $dst,$src\t! add packed4F" %} 6910 ins_encode %{ 6911 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6912 %} 6913 ins_pipe( pipe_slow ); 6914 %} 6915 6916 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6917 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6918 match(Set dst (AddVF src1 src2)); 6919 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6920 ins_encode %{ 6921 int vector_len = 0; 6922 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6923 %} 6924 ins_pipe( pipe_slow ); 6925 %} 6926 6927 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6928 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6929 match(Set dst (AddVF src (LoadVector mem))); 6930 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6931 ins_encode %{ 6932 int vector_len = 0; 6933 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6934 %} 6935 ins_pipe( pipe_slow ); 6936 %} 6937 6938 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6939 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6940 match(Set dst (AddVF src1 src2)); 6941 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6942 ins_encode %{ 6943 int vector_len = 1; 6944 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6945 %} 6946 ins_pipe( pipe_slow ); 6947 %} 6948 6949 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6950 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6951 match(Set dst (AddVF src (LoadVector mem))); 6952 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6953 ins_encode %{ 6954 int vector_len = 1; 6955 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6956 %} 6957 ins_pipe( pipe_slow ); 6958 %} 6959 6960 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6961 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6962 match(Set dst (AddVF src1 src2)); 6963 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6964 ins_encode %{ 6965 int vector_len = 2; 6966 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6967 %} 6968 ins_pipe( pipe_slow ); 6969 %} 6970 6971 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6972 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6973 match(Set dst (AddVF src (LoadVector mem))); 6974 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6975 ins_encode %{ 6976 int vector_len = 2; 6977 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6978 %} 6979 ins_pipe( pipe_slow ); 6980 %} 6981 6982 // Doubles vector add 6983 instruct vadd2D(vecX dst, vecX src) %{ 6984 predicate(n->as_Vector()->length() == 2); 6985 match(Set dst (AddVD dst src)); 6986 format %{ "addpd $dst,$src\t! add packed2D" %} 6987 ins_encode %{ 6988 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6989 %} 6990 ins_pipe( pipe_slow ); 6991 %} 6992 6993 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6994 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6995 match(Set dst (AddVD src1 src2)); 6996 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6997 ins_encode %{ 6998 int vector_len = 0; 6999 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7000 %} 7001 ins_pipe( pipe_slow ); 7002 %} 7003 7004 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 7005 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7006 match(Set dst (AddVD src (LoadVector mem))); 7007 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 7008 ins_encode %{ 7009 int vector_len = 0; 7010 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7011 %} 7012 ins_pipe( pipe_slow ); 7013 %} 7014 7015 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 7016 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7017 match(Set dst (AddVD src1 src2)); 7018 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 7019 ins_encode %{ 7020 int vector_len = 1; 7021 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7022 %} 7023 ins_pipe( pipe_slow ); 7024 %} 7025 7026 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 7027 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7028 match(Set dst (AddVD src (LoadVector mem))); 7029 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 7030 ins_encode %{ 7031 int vector_len = 1; 7032 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7033 %} 7034 ins_pipe( pipe_slow ); 7035 %} 7036 7037 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7038 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7039 match(Set dst (AddVD src1 src2)); 7040 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 7041 ins_encode %{ 7042 int vector_len = 2; 7043 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7044 %} 7045 ins_pipe( pipe_slow ); 7046 %} 7047 7048 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 7049 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7050 match(Set dst (AddVD src (LoadVector mem))); 7051 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 7052 ins_encode %{ 7053 int vector_len = 2; 7054 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7055 %} 7056 ins_pipe( pipe_slow ); 7057 %} 7058 7059 // --------------------------------- SUB -------------------------------------- 7060 7061 // Bytes vector sub 7062 instruct vsub4B(vecS dst, vecS src) %{ 7063 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7064 match(Set dst (SubVB dst src)); 7065 format %{ "psubb $dst,$src\t! sub packed4B" %} 7066 ins_encode %{ 7067 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 7068 %} 7069 ins_pipe( pipe_slow ); 7070 %} 7071 7072 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7073 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7074 match(Set dst (SubVB src1 src2)); 7075 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 7076 ins_encode %{ 7077 int vector_len = 0; 7078 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7079 %} 7080 ins_pipe( pipe_slow ); 7081 %} 7082 7083 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7084 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7085 match(Set dst (SubVB src1 src2)); 7086 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 7087 ins_encode %{ 7088 int vector_len = 0; 7089 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7090 %} 7091 ins_pipe( pipe_slow ); 7092 %} 7093 7094 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 7095 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7096 match(Set dst (SubVB dst src2)); 7097 effect(TEMP src1); 7098 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 7099 ins_encode %{ 7100 int vector_len = 0; 7101 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 7107 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7108 match(Set dst (SubVB src (LoadVector mem))); 7109 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 7110 ins_encode %{ 7111 int vector_len = 0; 7112 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7113 %} 7114 ins_pipe( pipe_slow ); 7115 %} 7116 7117 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 7118 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7119 match(Set dst (SubVB src (LoadVector mem))); 7120 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 7121 ins_encode %{ 7122 int vector_len = 0; 7123 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7124 %} 7125 ins_pipe( pipe_slow ); 7126 %} 7127 7128 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7129 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7130 match(Set dst (SubVB dst (LoadVector mem))); 7131 effect(TEMP src); 7132 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 7133 ins_encode %{ 7134 int vector_len = 0; 7135 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7136 %} 7137 ins_pipe( pipe_slow ); 7138 %} 7139 7140 instruct vsub8B(vecD dst, vecD src) %{ 7141 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7142 match(Set dst (SubVB dst src)); 7143 format %{ "psubb $dst,$src\t! sub packed8B" %} 7144 ins_encode %{ 7145 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 7146 %} 7147 ins_pipe( pipe_slow ); 7148 %} 7149 7150 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7151 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7152 match(Set dst (SubVB src1 src2)); 7153 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 7154 ins_encode %{ 7155 int vector_len = 0; 7156 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7157 %} 7158 ins_pipe( pipe_slow ); 7159 %} 7160 7161 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7162 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7163 match(Set dst (SubVB src1 src2)); 7164 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 7165 ins_encode %{ 7166 int vector_len = 0; 7167 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7168 %} 7169 ins_pipe( pipe_slow ); 7170 %} 7171 7172 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7173 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7174 match(Set dst (SubVB dst src2)); 7175 effect(TEMP src1); 7176 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 7177 ins_encode %{ 7178 int vector_len = 0; 7179 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7180 %} 7181 ins_pipe( pipe_slow ); 7182 %} 7183 7184 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 7185 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7186 match(Set dst (SubVB src (LoadVector mem))); 7187 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7188 ins_encode %{ 7189 int vector_len = 0; 7190 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7191 %} 7192 ins_pipe( pipe_slow ); 7193 %} 7194 7195 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 7196 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7197 match(Set dst (SubVB src (LoadVector mem))); 7198 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7199 ins_encode %{ 7200 int vector_len = 0; 7201 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7202 %} 7203 ins_pipe( pipe_slow ); 7204 %} 7205 7206 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7207 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7208 match(Set dst (SubVB dst (LoadVector mem))); 7209 effect(TEMP src); 7210 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7211 ins_encode %{ 7212 int vector_len = 0; 7213 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7214 %} 7215 ins_pipe( pipe_slow ); 7216 %} 7217 7218 instruct vsub16B(vecX dst, vecX src) %{ 7219 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 7220 match(Set dst (SubVB dst src)); 7221 format %{ "psubb $dst,$src\t! sub packed16B" %} 7222 ins_encode %{ 7223 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 7224 %} 7225 ins_pipe( pipe_slow ); 7226 %} 7227 7228 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7229 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7230 match(Set dst (SubVB src1 src2)); 7231 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7232 ins_encode %{ 7233 int vector_len = 0; 7234 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7235 %} 7236 ins_pipe( pipe_slow ); 7237 %} 7238 7239 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7240 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7241 match(Set dst (SubVB src1 src2)); 7242 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7243 ins_encode %{ 7244 int vector_len = 0; 7245 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7246 %} 7247 ins_pipe( pipe_slow ); 7248 %} 7249 7250 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7251 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7252 match(Set dst (SubVB dst src2)); 7253 effect(TEMP src1); 7254 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7255 ins_encode %{ 7256 int vector_len = 0; 7257 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7258 %} 7259 ins_pipe( pipe_slow ); 7260 %} 7261 7262 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 7263 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7264 match(Set dst (SubVB src (LoadVector mem))); 7265 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7266 ins_encode %{ 7267 int vector_len = 0; 7268 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7269 %} 7270 ins_pipe( pipe_slow ); 7271 %} 7272 7273 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 7274 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7275 match(Set dst (SubVB src (LoadVector mem))); 7276 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7277 ins_encode %{ 7278 int vector_len = 0; 7279 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7280 %} 7281 ins_pipe( pipe_slow ); 7282 %} 7283 7284 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7285 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7286 match(Set dst (SubVB dst (LoadVector mem))); 7287 effect(TEMP src); 7288 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7289 ins_encode %{ 7290 int vector_len = 0; 7291 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7297 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7298 match(Set dst (SubVB src1 src2)); 7299 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7300 ins_encode %{ 7301 int vector_len = 1; 7302 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7308 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7309 match(Set dst (SubVB src1 src2)); 7310 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7311 ins_encode %{ 7312 int vector_len = 1; 7313 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7314 %} 7315 ins_pipe( pipe_slow ); 7316 %} 7317 7318 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7319 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7320 match(Set dst (SubVB dst src2)); 7321 effect(TEMP src1); 7322 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7323 ins_encode %{ 7324 int vector_len = 1; 7325 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7326 %} 7327 ins_pipe( pipe_slow ); 7328 %} 7329 7330 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7331 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7332 match(Set dst (SubVB src (LoadVector mem))); 7333 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7334 ins_encode %{ 7335 int vector_len = 1; 7336 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7337 %} 7338 ins_pipe( pipe_slow ); 7339 %} 7340 7341 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7342 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7343 match(Set dst (SubVB src (LoadVector mem))); 7344 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7345 ins_encode %{ 7346 int vector_len = 1; 7347 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7348 %} 7349 ins_pipe( pipe_slow ); 7350 %} 7351 7352 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7353 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7354 match(Set dst (SubVB dst (LoadVector mem))); 7355 effect(TEMP src); 7356 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7357 ins_encode %{ 7358 int vector_len = 1; 7359 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7360 %} 7361 ins_pipe( pipe_slow ); 7362 %} 7363 7364 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7365 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7366 match(Set dst (SubVB src1 src2)); 7367 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7368 ins_encode %{ 7369 int vector_len = 2; 7370 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7371 %} 7372 ins_pipe( pipe_slow ); 7373 %} 7374 7375 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7376 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7377 match(Set dst (SubVB src (LoadVector mem))); 7378 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7379 ins_encode %{ 7380 int vector_len = 2; 7381 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7382 %} 7383 ins_pipe( pipe_slow ); 7384 %} 7385 7386 // Shorts/Chars vector sub 7387 instruct vsub2S(vecS dst, vecS src) %{ 7388 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7389 match(Set dst (SubVS dst src)); 7390 format %{ "psubw $dst,$src\t! sub packed2S" %} 7391 ins_encode %{ 7392 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7393 %} 7394 ins_pipe( pipe_slow ); 7395 %} 7396 7397 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7398 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7399 match(Set dst (SubVS src1 src2)); 7400 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7401 ins_encode %{ 7402 int vector_len = 0; 7403 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7404 %} 7405 ins_pipe( pipe_slow ); 7406 %} 7407 7408 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7409 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7410 match(Set dst (SubVS src1 src2)); 7411 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7412 ins_encode %{ 7413 int vector_len = 0; 7414 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7415 %} 7416 ins_pipe( pipe_slow ); 7417 %} 7418 7419 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7420 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7421 match(Set dst (SubVS dst src2)); 7422 effect(TEMP src1); 7423 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7424 ins_encode %{ 7425 int vector_len = 0; 7426 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7427 %} 7428 ins_pipe( pipe_slow ); 7429 %} 7430 7431 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7432 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7433 match(Set dst (SubVS src (LoadVector mem))); 7434 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7435 ins_encode %{ 7436 int vector_len = 0; 7437 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7438 %} 7439 ins_pipe( pipe_slow ); 7440 %} 7441 7442 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7443 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7444 match(Set dst (SubVS src (LoadVector mem))); 7445 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7446 ins_encode %{ 7447 int vector_len = 0; 7448 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7449 %} 7450 ins_pipe( pipe_slow ); 7451 %} 7452 7453 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7454 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7455 match(Set dst (SubVS dst (LoadVector mem))); 7456 effect(TEMP src); 7457 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7458 ins_encode %{ 7459 int vector_len = 0; 7460 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7461 %} 7462 ins_pipe( pipe_slow ); 7463 %} 7464 7465 instruct vsub4S(vecD dst, vecD src) %{ 7466 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7467 match(Set dst (SubVS dst src)); 7468 format %{ "psubw $dst,$src\t! sub packed4S" %} 7469 ins_encode %{ 7470 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7471 %} 7472 ins_pipe( pipe_slow ); 7473 %} 7474 7475 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7476 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7477 match(Set dst (SubVS src1 src2)); 7478 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7479 ins_encode %{ 7480 int vector_len = 0; 7481 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7482 %} 7483 ins_pipe( pipe_slow ); 7484 %} 7485 7486 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7487 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7488 match(Set dst (SubVS src1 src2)); 7489 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7490 ins_encode %{ 7491 int vector_len = 0; 7492 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7493 %} 7494 ins_pipe( pipe_slow ); 7495 %} 7496 7497 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7498 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7499 match(Set dst (SubVS dst src2)); 7500 effect(TEMP src1); 7501 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7502 ins_encode %{ 7503 int vector_len = 0; 7504 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7505 %} 7506 ins_pipe( pipe_slow ); 7507 %} 7508 7509 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7510 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7511 match(Set dst (SubVS src (LoadVector mem))); 7512 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7513 ins_encode %{ 7514 int vector_len = 0; 7515 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7516 %} 7517 ins_pipe( pipe_slow ); 7518 %} 7519 7520 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7521 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7522 match(Set dst (SubVS src (LoadVector mem))); 7523 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7524 ins_encode %{ 7525 int vector_len = 0; 7526 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7527 %} 7528 ins_pipe( pipe_slow ); 7529 %} 7530 7531 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7532 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7533 match(Set dst (SubVS dst (LoadVector mem))); 7534 effect(TEMP src); 7535 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7536 ins_encode %{ 7537 int vector_len = 0; 7538 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7539 %} 7540 ins_pipe( pipe_slow ); 7541 %} 7542 7543 instruct vsub8S(vecX dst, vecX src) %{ 7544 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7545 match(Set dst (SubVS dst src)); 7546 format %{ "psubw $dst,$src\t! sub packed8S" %} 7547 ins_encode %{ 7548 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7549 %} 7550 ins_pipe( pipe_slow ); 7551 %} 7552 7553 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7554 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7555 match(Set dst (SubVS src1 src2)); 7556 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7557 ins_encode %{ 7558 int vector_len = 0; 7559 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7560 %} 7561 ins_pipe( pipe_slow ); 7562 %} 7563 7564 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7565 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7566 match(Set dst (SubVS src1 src2)); 7567 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7568 ins_encode %{ 7569 int vector_len = 0; 7570 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7571 %} 7572 ins_pipe( pipe_slow ); 7573 %} 7574 7575 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7576 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7577 match(Set dst (SubVS dst src2)); 7578 effect(TEMP src1); 7579 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7580 ins_encode %{ 7581 int vector_len = 0; 7582 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7583 %} 7584 ins_pipe( pipe_slow ); 7585 %} 7586 7587 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7588 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7589 match(Set dst (SubVS src (LoadVector mem))); 7590 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7591 ins_encode %{ 7592 int vector_len = 0; 7593 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7594 %} 7595 ins_pipe( pipe_slow ); 7596 %} 7597 7598 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7599 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7600 match(Set dst (SubVS src (LoadVector mem))); 7601 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7602 ins_encode %{ 7603 int vector_len = 0; 7604 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7605 %} 7606 ins_pipe( pipe_slow ); 7607 %} 7608 7609 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7610 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7611 match(Set dst (SubVS dst (LoadVector mem))); 7612 effect(TEMP src); 7613 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7614 ins_encode %{ 7615 int vector_len = 0; 7616 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7617 %} 7618 ins_pipe( pipe_slow ); 7619 %} 7620 7621 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7622 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7623 match(Set dst (SubVS src1 src2)); 7624 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7625 ins_encode %{ 7626 int vector_len = 1; 7627 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7628 %} 7629 ins_pipe( pipe_slow ); 7630 %} 7631 7632 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7633 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7634 match(Set dst (SubVS src1 src2)); 7635 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7636 ins_encode %{ 7637 int vector_len = 1; 7638 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7639 %} 7640 ins_pipe( pipe_slow ); 7641 %} 7642 7643 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7644 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7645 match(Set dst (SubVS dst src2)); 7646 effect(TEMP src1); 7647 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7648 ins_encode %{ 7649 int vector_len = 1; 7650 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7651 %} 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7656 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7657 match(Set dst (SubVS src (LoadVector mem))); 7658 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7659 ins_encode %{ 7660 int vector_len = 1; 7661 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7662 %} 7663 ins_pipe( pipe_slow ); 7664 %} 7665 7666 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7667 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7668 match(Set dst (SubVS src (LoadVector mem))); 7669 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7670 ins_encode %{ 7671 int vector_len = 1; 7672 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7673 %} 7674 ins_pipe( pipe_slow ); 7675 %} 7676 7677 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7678 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7679 match(Set dst (SubVS dst (LoadVector mem))); 7680 effect(TEMP src); 7681 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7682 ins_encode %{ 7683 int vector_len = 1; 7684 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7690 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7691 match(Set dst (SubVS src1 src2)); 7692 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7693 ins_encode %{ 7694 int vector_len = 2; 7695 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7696 %} 7697 ins_pipe( pipe_slow ); 7698 %} 7699 7700 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7701 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7702 match(Set dst (SubVS src (LoadVector mem))); 7703 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7704 ins_encode %{ 7705 int vector_len = 2; 7706 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7707 %} 7708 ins_pipe( pipe_slow ); 7709 %} 7710 7711 // Integers vector sub 7712 instruct vsub2I(vecD dst, vecD src) %{ 7713 predicate(n->as_Vector()->length() == 2); 7714 match(Set dst (SubVI dst src)); 7715 format %{ "psubd $dst,$src\t! sub packed2I" %} 7716 ins_encode %{ 7717 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7723 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7724 match(Set dst (SubVI src1 src2)); 7725 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7726 ins_encode %{ 7727 int vector_len = 0; 7728 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7729 %} 7730 ins_pipe( pipe_slow ); 7731 %} 7732 7733 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7735 match(Set dst (SubVI src (LoadVector mem))); 7736 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7737 ins_encode %{ 7738 int vector_len = 0; 7739 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 instruct vsub4I(vecX dst, vecX src) %{ 7745 predicate(n->as_Vector()->length() == 4); 7746 match(Set dst (SubVI dst src)); 7747 format %{ "psubd $dst,$src\t! sub packed4I" %} 7748 ins_encode %{ 7749 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7750 %} 7751 ins_pipe( pipe_slow ); 7752 %} 7753 7754 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7755 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7756 match(Set dst (SubVI src1 src2)); 7757 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7758 ins_encode %{ 7759 int vector_len = 0; 7760 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7761 %} 7762 ins_pipe( pipe_slow ); 7763 %} 7764 7765 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7767 match(Set dst (SubVI src (LoadVector mem))); 7768 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7769 ins_encode %{ 7770 int vector_len = 0; 7771 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7772 %} 7773 ins_pipe( pipe_slow ); 7774 %} 7775 7776 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7777 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7778 match(Set dst (SubVI src1 src2)); 7779 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7780 ins_encode %{ 7781 int vector_len = 1; 7782 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7788 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7789 match(Set dst (SubVI src (LoadVector mem))); 7790 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7791 ins_encode %{ 7792 int vector_len = 1; 7793 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7794 %} 7795 ins_pipe( pipe_slow ); 7796 %} 7797 7798 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7799 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7800 match(Set dst (SubVI src1 src2)); 7801 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7802 ins_encode %{ 7803 int vector_len = 2; 7804 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7805 %} 7806 ins_pipe( pipe_slow ); 7807 %} 7808 7809 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7810 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7811 match(Set dst (SubVI src (LoadVector mem))); 7812 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7813 ins_encode %{ 7814 int vector_len = 2; 7815 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7816 %} 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 // Longs vector sub 7821 instruct vsub2L(vecX dst, vecX src) %{ 7822 predicate(n->as_Vector()->length() == 2); 7823 match(Set dst (SubVL dst src)); 7824 format %{ "psubq $dst,$src\t! sub packed2L" %} 7825 ins_encode %{ 7826 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7833 match(Set dst (SubVL src1 src2)); 7834 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7835 ins_encode %{ 7836 int vector_len = 0; 7837 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7838 %} 7839 ins_pipe( pipe_slow ); 7840 %} 7841 7842 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7843 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7844 match(Set dst (SubVL src (LoadVector mem))); 7845 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7846 ins_encode %{ 7847 int vector_len = 0; 7848 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7849 %} 7850 ins_pipe( pipe_slow ); 7851 %} 7852 7853 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7854 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7855 match(Set dst (SubVL src1 src2)); 7856 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7857 ins_encode %{ 7858 int vector_len = 1; 7859 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7860 %} 7861 ins_pipe( pipe_slow ); 7862 %} 7863 7864 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7865 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7866 match(Set dst (SubVL src (LoadVector mem))); 7867 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7868 ins_encode %{ 7869 int vector_len = 1; 7870 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7871 %} 7872 ins_pipe( pipe_slow ); 7873 %} 7874 7875 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7876 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7877 match(Set dst (SubVL src1 src2)); 7878 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7879 ins_encode %{ 7880 int vector_len = 2; 7881 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7882 %} 7883 ins_pipe( pipe_slow ); 7884 %} 7885 7886 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7887 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7888 match(Set dst (SubVL src (LoadVector mem))); 7889 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7890 ins_encode %{ 7891 int vector_len = 2; 7892 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7893 %} 7894 ins_pipe( pipe_slow ); 7895 %} 7896 7897 // Floats vector sub 7898 instruct vsub2F(vecD dst, vecD src) %{ 7899 predicate(n->as_Vector()->length() == 2); 7900 match(Set dst (SubVF dst src)); 7901 format %{ "subps $dst,$src\t! sub packed2F" %} 7902 ins_encode %{ 7903 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7904 %} 7905 ins_pipe( pipe_slow ); 7906 %} 7907 7908 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7909 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7910 match(Set dst (SubVF src1 src2)); 7911 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7912 ins_encode %{ 7913 int vector_len = 0; 7914 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7915 %} 7916 ins_pipe( pipe_slow ); 7917 %} 7918 7919 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7920 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7921 match(Set dst (SubVF src (LoadVector mem))); 7922 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7923 ins_encode %{ 7924 int vector_len = 0; 7925 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7926 %} 7927 ins_pipe( pipe_slow ); 7928 %} 7929 7930 instruct vsub4F(vecX dst, vecX src) %{ 7931 predicate(n->as_Vector()->length() == 4); 7932 match(Set dst (SubVF dst src)); 7933 format %{ "subps $dst,$src\t! sub packed4F" %} 7934 ins_encode %{ 7935 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7936 %} 7937 ins_pipe( pipe_slow ); 7938 %} 7939 7940 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7941 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7942 match(Set dst (SubVF src1 src2)); 7943 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7944 ins_encode %{ 7945 int vector_len = 0; 7946 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7947 %} 7948 ins_pipe( pipe_slow ); 7949 %} 7950 7951 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7952 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7953 match(Set dst (SubVF src (LoadVector mem))); 7954 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7955 ins_encode %{ 7956 int vector_len = 0; 7957 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7958 %} 7959 ins_pipe( pipe_slow ); 7960 %} 7961 7962 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7963 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7964 match(Set dst (SubVF src1 src2)); 7965 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7966 ins_encode %{ 7967 int vector_len = 1; 7968 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7969 %} 7970 ins_pipe( pipe_slow ); 7971 %} 7972 7973 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7974 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7975 match(Set dst (SubVF src (LoadVector mem))); 7976 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7977 ins_encode %{ 7978 int vector_len = 1; 7979 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7980 %} 7981 ins_pipe( pipe_slow ); 7982 %} 7983 7984 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7985 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7986 match(Set dst (SubVF src1 src2)); 7987 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7988 ins_encode %{ 7989 int vector_len = 2; 7990 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7991 %} 7992 ins_pipe( pipe_slow ); 7993 %} 7994 7995 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7996 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7997 match(Set dst (SubVF src (LoadVector mem))); 7998 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7999 ins_encode %{ 8000 int vector_len = 2; 8001 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8002 %} 8003 ins_pipe( pipe_slow ); 8004 %} 8005 8006 // Doubles vector sub 8007 instruct vsub2D(vecX dst, vecX src) %{ 8008 predicate(n->as_Vector()->length() == 2); 8009 match(Set dst (SubVD dst src)); 8010 format %{ "subpd $dst,$src\t! sub packed2D" %} 8011 ins_encode %{ 8012 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 8013 %} 8014 ins_pipe( pipe_slow ); 8015 %} 8016 8017 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 8018 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8019 match(Set dst (SubVD src1 src2)); 8020 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 8021 ins_encode %{ 8022 int vector_len = 0; 8023 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8024 %} 8025 ins_pipe( pipe_slow ); 8026 %} 8027 8028 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 8029 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8030 match(Set dst (SubVD src (LoadVector mem))); 8031 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 8032 ins_encode %{ 8033 int vector_len = 0; 8034 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8035 %} 8036 ins_pipe( pipe_slow ); 8037 %} 8038 8039 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 8040 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8041 match(Set dst (SubVD src1 src2)); 8042 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 8043 ins_encode %{ 8044 int vector_len = 1; 8045 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8046 %} 8047 ins_pipe( pipe_slow ); 8048 %} 8049 8050 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 8051 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8052 match(Set dst (SubVD src (LoadVector mem))); 8053 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 8054 ins_encode %{ 8055 int vector_len = 1; 8056 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 8061 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8062 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8063 match(Set dst (SubVD src1 src2)); 8064 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 8065 ins_encode %{ 8066 int vector_len = 2; 8067 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8068 %} 8069 ins_pipe( pipe_slow ); 8070 %} 8071 8072 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 8073 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8074 match(Set dst (SubVD src (LoadVector mem))); 8075 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 8076 ins_encode %{ 8077 int vector_len = 2; 8078 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 8083 // --------------------------------- MUL -------------------------------------- 8084 8085 // Byte vector mul 8086 8087 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp2, vecS tmp) %{ 8088 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8089 match(Set dst (MulVB src1 src2)); 8090 effect(TEMP dst, TEMP tmp2, TEMP tmp); 8091 format %{"pmovsxbw $tmp,$src1\n\t" 8092 "pmovsxbw $tmp2,$src2\n\t" 8093 "pmullw $tmp,$tmp2\n\t" 8094 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 8095 "pand $tmp,$tmp2\n\t" 8096 "packuswb $tmp,$tmp\n\t" 8097 "movss $dst,$tmp\t! mul packed4B" %} 8098 ins_encode %{ 8099 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 8100 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 8101 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 8102 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 8103 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 8104 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 8105 __ movss($dst$$XMMRegister, $tmp$$XMMRegister); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 8110 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp2, vecD tmp) %{ 8111 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 8112 match(Set dst (MulVB src1 src2)); 8113 effect(TEMP dst, TEMP tmp2, TEMP tmp); 8114 format %{"pmovsxbw $tmp,$src1\n\t" 8115 "pmovsxbw $tmp2,$src2\n\t" 8116 "pmullw $tmp,$tmp2\n\t" 8117 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 8118 "pand $tmp,$tmp2\n\t" 8119 "packuswb $tmp,$tmp\n\t" 8120 "movsd $dst,$tmp\t! mul packed8B" %} 8121 ins_encode %{ 8122 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 8123 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 8124 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 8125 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 8126 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 8127 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 8128 __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); 8129 %} 8130 ins_pipe( pipe_slow ); 8131 %} 8132 8133 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp3, vecX tmp2, vecX tmp) %{ 8134 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 8135 match(Set dst (MulVB src1 src2)); 8136 effect(TEMP tmp3, TEMP tmp2, TEMP tmp); 8137 format %{"pmovsxbw $tmp,$src1\n\t" 8138 "pmovsxbw $tmp2,$src2\n\t" 8139 "pmullw $tmp,$tmp2\n\t" 8140 "pshufd $tmp2,$src1\n\t" 8141 "pshufd $tmp3,$src2\n\t" 8142 "pmovsxbw $tmp2,$tmp2\n\t" 8143 "pmovsxbw $tmp3,$tmp3\n\t" 8144 "pmullw $tmp2,$tmp3\n\t" 8145 "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" 8146 "pand $tmp,$tmp3\n\t" 8147 "pand $tmp2,$tmp3\n\t" 8148 "packuswb $tmp,$tmp2\n\t" 8149 "movdqu $dst,$tmp \n\t! mul packed16B" %} 8150 ins_encode %{ 8151 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 8152 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 8153 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 8154 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 238); 8155 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 238); 8156 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 8157 __ pmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister); 8158 __ pmullw($tmp2$$XMMRegister, $tmp3$$XMMRegister); 8159 __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 8160 __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); 8161 __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); 8162 __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); 8163 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 8164 %} 8165 ins_pipe( pipe_slow ); 8166 %} 8167 8168 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecY tmp2, vecY tmp) %{ 8169 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8170 match(Set dst (MulVB src1 src2)); 8171 effect(TEMP dst, TEMP tmp2, TEMP tmp); 8172 format %{"vpmovsxbw $tmp,$src1\n\t" 8173 "vpmovsxbw $tmp2,$src2\n\t" 8174 "vpmullw $tmp,$tmp2\n\t" 8175 "vmovdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 8176 "vpand $tmp,$tmp2\n\t" 8177 "vextracti128_high $tmp2,$tmp\n\t" 8178 "vpackuswb $dst,$tmp, $tmp2\n\t! mul packed16B" %} 8179 ins_encode %{ 8180 int vector_len = 1; 8181 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 8182 __ vpmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 8183 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8184 __ vmovdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 8185 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8186 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 8187 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 8188 %} 8189 ins_pipe( pipe_slow ); 8190 %} 8191 8192 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, vecY tmp3) %{ 8193 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 8194 match(Set dst (MulVB src1 src2)); 8195 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); 8196 format %{"vextracti128_high $tmp1,$src1\n\t" 8197 "vextracti128_high $tmp3,$src2\n\t" 8198 "vpmovsxbw $tmp1,$tmp1\n\t" 8199 "vpmovsxbw $tmp3,$tmp3\n\t" 8200 "vpmullw $tmp1,$tmp1,$tmp3\n\t" 8201 "vpmovsxbw $tmp2,$src1\n\t" 8202 "vpmovsxbw $tmp3,$src2\n\t" 8203 "vpmullw $tmp2,$tmp2,$tmp3\n\t" 8204 "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" 8205 "vpbroadcastd $tmp3, $tmp3\n\t" 8206 "vpand $tmp2,$tmp2,$tmp3\n\t" 8207 "vpand $tmp1,$tmp1,$tmp3\n\t" 8208 "vpackuswb $dst,$tmp2,$tmp1\n\t" 8209 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 8210 ins_encode %{ 8211 int vector_len = 1; 8212 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 8213 __ vextracti128_high($tmp3$$XMMRegister, $src2$$XMMRegister); 8214 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8215 __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 8216 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 8217 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 8218 __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 8219 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 8220 __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 8221 __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister); 8222 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 8223 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 8224 __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8225 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 8226 %} 8227 ins_pipe( pipe_slow ); 8228 %} 8229 8230 // Shorts/Chars vector mul 8231 instruct vmul2S(vecS dst, vecS src) %{ 8232 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8233 match(Set dst (MulVS dst src)); 8234 format %{ "pmullw $dst,$src\t! mul packed2S" %} 8235 ins_encode %{ 8236 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 8237 %} 8238 ins_pipe( pipe_slow ); 8239 %} 8240 8241 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 8242 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8243 match(Set dst (MulVS src1 src2)); 8244 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 8245 ins_encode %{ 8246 int vector_len = 0; 8247 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8248 %} 8249 ins_pipe( pipe_slow ); 8250 %} 8251 8252 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 8253 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8254 match(Set dst (MulVS src1 src2)); 8255 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 8256 ins_encode %{ 8257 int vector_len = 0; 8258 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8259 %} 8260 ins_pipe( pipe_slow ); 8261 %} 8262 8263 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 8264 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8265 match(Set dst (MulVS dst src2)); 8266 effect(TEMP src1); 8267 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 8268 ins_encode %{ 8269 int vector_len = 0; 8270 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8271 %} 8272 ins_pipe( pipe_slow ); 8273 %} 8274 8275 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 8276 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8277 match(Set dst (MulVS src (LoadVector mem))); 8278 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 8279 ins_encode %{ 8280 int vector_len = 0; 8281 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8282 %} 8283 ins_pipe( pipe_slow ); 8284 %} 8285 8286 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 8287 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8288 match(Set dst (MulVS src (LoadVector mem))); 8289 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 8290 ins_encode %{ 8291 int vector_len = 0; 8292 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8293 %} 8294 ins_pipe( pipe_slow ); 8295 %} 8296 8297 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 8298 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8299 match(Set dst (MulVS dst (LoadVector mem))); 8300 effect(TEMP src); 8301 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 8302 ins_encode %{ 8303 int vector_len = 0; 8304 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vmul4S(vecD dst, vecD src) %{ 8310 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8311 match(Set dst (MulVS dst src)); 8312 format %{ "pmullw $dst,$src\t! mul packed4S" %} 8313 ins_encode %{ 8314 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 8315 %} 8316 ins_pipe( pipe_slow ); 8317 %} 8318 8319 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 8320 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8321 match(Set dst (MulVS src1 src2)); 8322 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8323 ins_encode %{ 8324 int vector_len = 0; 8325 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8326 %} 8327 ins_pipe( pipe_slow ); 8328 %} 8329 8330 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 8331 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8332 match(Set dst (MulVS src1 src2)); 8333 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8334 ins_encode %{ 8335 int vector_len = 0; 8336 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8337 %} 8338 ins_pipe( pipe_slow ); 8339 %} 8340 8341 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 8342 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8343 match(Set dst (MulVS dst src2)); 8344 effect(TEMP src1); 8345 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8346 ins_encode %{ 8347 int vector_len = 0; 8348 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8349 %} 8350 ins_pipe( pipe_slow ); 8351 %} 8352 8353 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 8354 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8355 match(Set dst (MulVS src (LoadVector mem))); 8356 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8357 ins_encode %{ 8358 int vector_len = 0; 8359 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8360 %} 8361 ins_pipe( pipe_slow ); 8362 %} 8363 8364 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 8365 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8366 match(Set dst (MulVS src (LoadVector mem))); 8367 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8368 ins_encode %{ 8369 int vector_len = 0; 8370 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8371 %} 8372 ins_pipe( pipe_slow ); 8373 %} 8374 8375 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 8376 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8377 match(Set dst (MulVS dst (LoadVector mem))); 8378 effect(TEMP src); 8379 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8380 ins_encode %{ 8381 int vector_len = 0; 8382 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8383 %} 8384 ins_pipe( pipe_slow ); 8385 %} 8386 8387 instruct vmul8S(vecX dst, vecX src) %{ 8388 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8389 match(Set dst (MulVS dst src)); 8390 format %{ "pmullw $dst,$src\t! mul packed8S" %} 8391 ins_encode %{ 8392 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 8393 %} 8394 ins_pipe( pipe_slow ); 8395 %} 8396 8397 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8398 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8399 match(Set dst (MulVS src1 src2)); 8400 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8401 ins_encode %{ 8402 int vector_len = 0; 8403 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8404 %} 8405 ins_pipe( pipe_slow ); 8406 %} 8407 8408 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8409 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8410 match(Set dst (MulVS src1 src2)); 8411 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8412 ins_encode %{ 8413 int vector_len = 0; 8414 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8415 %} 8416 ins_pipe( pipe_slow ); 8417 %} 8418 8419 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8420 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8421 match(Set dst (MulVS dst src2)); 8422 effect(TEMP src1); 8423 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8424 ins_encode %{ 8425 int vector_len = 0; 8426 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8427 %} 8428 ins_pipe( pipe_slow ); 8429 %} 8430 8431 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 8432 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8433 match(Set dst (MulVS src (LoadVector mem))); 8434 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8435 ins_encode %{ 8436 int vector_len = 0; 8437 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8438 %} 8439 ins_pipe( pipe_slow ); 8440 %} 8441 8442 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 8443 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8444 match(Set dst (MulVS src (LoadVector mem))); 8445 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8446 ins_encode %{ 8447 int vector_len = 0; 8448 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8449 %} 8450 ins_pipe( pipe_slow ); 8451 %} 8452 8453 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8454 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8455 match(Set dst (MulVS dst (LoadVector mem))); 8456 effect(TEMP src); 8457 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8458 ins_encode %{ 8459 int vector_len = 0; 8460 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8461 %} 8462 ins_pipe( pipe_slow ); 8463 %} 8464 8465 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8466 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8467 match(Set dst (MulVS src1 src2)); 8468 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8469 ins_encode %{ 8470 int vector_len = 1; 8471 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8472 %} 8473 ins_pipe( pipe_slow ); 8474 %} 8475 8476 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8477 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8478 match(Set dst (MulVS src1 src2)); 8479 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8480 ins_encode %{ 8481 int vector_len = 1; 8482 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8483 %} 8484 ins_pipe( pipe_slow ); 8485 %} 8486 8487 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8488 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8489 match(Set dst (MulVS dst src2)); 8490 effect(TEMP src1); 8491 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8492 ins_encode %{ 8493 int vector_len = 1; 8494 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8495 %} 8496 ins_pipe( pipe_slow ); 8497 %} 8498 8499 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8500 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8501 match(Set dst (MulVS src (LoadVector mem))); 8502 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8503 ins_encode %{ 8504 int vector_len = 1; 8505 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8506 %} 8507 ins_pipe( pipe_slow ); 8508 %} 8509 8510 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8511 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8512 match(Set dst (MulVS src (LoadVector mem))); 8513 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8514 ins_encode %{ 8515 int vector_len = 1; 8516 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8517 %} 8518 ins_pipe( pipe_slow ); 8519 %} 8520 8521 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8522 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8523 match(Set dst (MulVS dst (LoadVector mem))); 8524 effect(TEMP src); 8525 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8526 ins_encode %{ 8527 int vector_len = 1; 8528 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8529 %} 8530 ins_pipe( pipe_slow ); 8531 %} 8532 8533 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8534 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8535 match(Set dst (MulVS src1 src2)); 8536 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8537 ins_encode %{ 8538 int vector_len = 2; 8539 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8540 %} 8541 ins_pipe( pipe_slow ); 8542 %} 8543 8544 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8545 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8546 match(Set dst (MulVS src (LoadVector mem))); 8547 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8548 ins_encode %{ 8549 int vector_len = 2; 8550 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8551 %} 8552 ins_pipe( pipe_slow ); 8553 %} 8554 8555 // Integers vector mul (sse4_1) 8556 instruct vmul2I(vecD dst, vecD src) %{ 8557 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8558 match(Set dst (MulVI dst src)); 8559 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8560 ins_encode %{ 8561 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8562 %} 8563 ins_pipe( pipe_slow ); 8564 %} 8565 8566 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 8567 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8568 match(Set dst (MulVI src1 src2)); 8569 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 8570 ins_encode %{ 8571 int vector_len = 0; 8572 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8573 %} 8574 ins_pipe( pipe_slow ); 8575 %} 8576 8577 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 8578 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8579 match(Set dst (MulVI src (LoadVector mem))); 8580 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 8581 ins_encode %{ 8582 int vector_len = 0; 8583 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8584 %} 8585 ins_pipe( pipe_slow ); 8586 %} 8587 8588 instruct vmul4I(vecX dst, vecX src) %{ 8589 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8590 match(Set dst (MulVI dst src)); 8591 format %{ "pmulld $dst,$src\t! mul packed4I" %} 8592 ins_encode %{ 8593 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8594 %} 8595 ins_pipe( pipe_slow ); 8596 %} 8597 8598 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 8599 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8600 match(Set dst (MulVI src1 src2)); 8601 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8602 ins_encode %{ 8603 int vector_len = 0; 8604 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8605 %} 8606 ins_pipe( pipe_slow ); 8607 %} 8608 8609 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8610 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8611 match(Set dst (MulVI src (LoadVector mem))); 8612 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8613 ins_encode %{ 8614 int vector_len = 0; 8615 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8616 %} 8617 ins_pipe( pipe_slow ); 8618 %} 8619 8620 // Long vector mul 8621 8622 instruct mul2L_reg(vecX dst, vecX src2, vecX tmp) %{ 8623 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && VM_Version::supports_sse4_1()); 8624 match(Set dst (MulVL dst src2)); 8625 effect(TEMP dst, TEMP tmp); 8626 format %{ "pshufd $tmp,$src2, 177\n\t" 8627 "pmulld $tmp,$dst\n\t" 8628 "phaddd $tmp,$tmp\n\t" 8629 "pmovzxdq $tmp,$tmp\n\t" 8630 "psllq $tmp, 32\n\t" 8631 "pmuludq $dst,$src2\n\t" 8632 "paddq $dst,$tmp\n\t! mul packed2L" %} 8633 8634 ins_encode %{ 8635 int vector_len = 0; 8636 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 8637 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 8638 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 8639 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 8640 __ psllq($tmp$$XMMRegister, 32); 8641 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 8642 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 8643 %} 8644 ins_pipe( pipe_slow ); 8645 %} 8646 8647 instruct vmul2L_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp) %{ 8648 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && VM_Version::supports_avx()); 8649 match(Set dst (MulVL src1 src2)); 8650 effect(TEMP tmp1, TEMP tmp); 8651 format %{ "vpshufd $tmp,$src2\n\t" 8652 "vpmulld $tmp,$src1,$tmp\n\t" 8653 "vphaddd $tmp,$tmp,$tmp\n\t" 8654 "vpmovzxdq $tmp,$tmp\n\t" 8655 "vpsllq $tmp,$tmp\n\t" 8656 "vpmuludq $tmp1,$src1,$src2\n\t" 8657 "vpaddq $dst,$tmp,$tmp1\t! mul packed2L" %} 8658 ins_encode %{ 8659 int vector_len = 0; 8660 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 8661 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 8662 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8663 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8664 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 8665 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8666 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8667 %} 8668 ins_pipe( pipe_slow ); 8669 %} 8670 8671 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8672 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8673 match(Set dst (MulVL src1 src2)); 8674 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8675 ins_encode %{ 8676 int vector_len = 0; 8677 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8678 %} 8679 ins_pipe( pipe_slow ); 8680 %} 8681 8682 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8683 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8684 match(Set dst (MulVL src (LoadVector mem))); 8685 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8686 ins_encode %{ 8687 int vector_len = 0; 8688 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 instruct vmul4L_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp, vecY tmp1,) %{ 8694 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && VM_Version::supports_avx2()); 8695 match(Set dst (MulVL src1 src2)); 8696 effect(TEMP tmp1, TEMP tmp); 8697 format %{ "vpshufd $tmp,$src2\n\t" 8698 "vpmulld $tmp,$src1,$tmp\n\t" 8699 "vphaddd $tmp,$tmp,$tmp\n\t" 8700 "vpmovzxdq $tmp,$tmp\n\t" 8701 "vpsllq $tmp,$tmp\n\t" 8702 "vpmuludq $tmp1,$src1,$src2\n\t" 8703 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 8704 ins_encode %{ 8705 int vector_len = 1; 8706 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 8707 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 8708 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8709 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8710 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 8711 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8712 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8718 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8719 match(Set dst (MulVL src1 src2)); 8720 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8721 ins_encode %{ 8722 int vector_len = 1; 8723 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8724 %} 8725 ins_pipe( pipe_slow ); 8726 %} 8727 8728 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8729 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8730 match(Set dst (MulVL src (LoadVector mem))); 8731 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8732 ins_encode %{ 8733 int vector_len = 1; 8734 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8735 %} 8736 ins_pipe( pipe_slow ); 8737 %} 8738 8739 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8740 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8741 match(Set dst (MulVL src1 src2)); 8742 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8743 ins_encode %{ 8744 int vector_len = 2; 8745 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8746 %} 8747 ins_pipe( pipe_slow ); 8748 %} 8749 8750 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8751 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8752 match(Set dst (MulVL src (LoadVector mem))); 8753 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8754 ins_encode %{ 8755 int vector_len = 2; 8756 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8757 %} 8758 ins_pipe( pipe_slow ); 8759 %} 8760 8761 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8762 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8763 match(Set dst (MulVI src1 src2)); 8764 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8765 ins_encode %{ 8766 int vector_len = 1; 8767 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8768 %} 8769 ins_pipe( pipe_slow ); 8770 %} 8771 8772 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8773 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8774 match(Set dst (MulVI src (LoadVector mem))); 8775 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8776 ins_encode %{ 8777 int vector_len = 1; 8778 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8779 %} 8780 ins_pipe( pipe_slow ); 8781 %} 8782 8783 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8784 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8785 match(Set dst (MulVI src1 src2)); 8786 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8787 ins_encode %{ 8788 int vector_len = 2; 8789 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8790 %} 8791 ins_pipe( pipe_slow ); 8792 %} 8793 8794 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8795 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8796 match(Set dst (MulVI src (LoadVector mem))); 8797 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8798 ins_encode %{ 8799 int vector_len = 2; 8800 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8801 %} 8802 ins_pipe( pipe_slow ); 8803 %} 8804 8805 // Floats vector mul 8806 instruct vmul2F(vecD dst, vecD src) %{ 8807 predicate(n->as_Vector()->length() == 2); 8808 match(Set dst (MulVF dst src)); 8809 format %{ "mulps $dst,$src\t! mul packed2F" %} 8810 ins_encode %{ 8811 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8812 %} 8813 ins_pipe( pipe_slow ); 8814 %} 8815 8816 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8817 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8818 match(Set dst (MulVF src1 src2)); 8819 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8820 ins_encode %{ 8821 int vector_len = 0; 8822 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8823 %} 8824 ins_pipe( pipe_slow ); 8825 %} 8826 8827 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8828 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8829 match(Set dst (MulVF src (LoadVector mem))); 8830 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8831 ins_encode %{ 8832 int vector_len = 0; 8833 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8834 %} 8835 ins_pipe( pipe_slow ); 8836 %} 8837 8838 instruct vmul4F(vecX dst, vecX src) %{ 8839 predicate(n->as_Vector()->length() == 4); 8840 match(Set dst (MulVF dst src)); 8841 format %{ "mulps $dst,$src\t! mul packed4F" %} 8842 ins_encode %{ 8843 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8844 %} 8845 ins_pipe( pipe_slow ); 8846 %} 8847 8848 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8849 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8850 match(Set dst (MulVF src1 src2)); 8851 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8852 ins_encode %{ 8853 int vector_len = 0; 8854 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8855 %} 8856 ins_pipe( pipe_slow ); 8857 %} 8858 8859 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8860 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8861 match(Set dst (MulVF src (LoadVector mem))); 8862 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8863 ins_encode %{ 8864 int vector_len = 0; 8865 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8866 %} 8867 ins_pipe( pipe_slow ); 8868 %} 8869 8870 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8871 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8872 match(Set dst (MulVF src1 src2)); 8873 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8874 ins_encode %{ 8875 int vector_len = 1; 8876 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8877 %} 8878 ins_pipe( pipe_slow ); 8879 %} 8880 8881 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8882 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8883 match(Set dst (MulVF src (LoadVector mem))); 8884 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8885 ins_encode %{ 8886 int vector_len = 1; 8887 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8888 %} 8889 ins_pipe( pipe_slow ); 8890 %} 8891 8892 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8893 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8894 match(Set dst (MulVF src1 src2)); 8895 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8896 ins_encode %{ 8897 int vector_len = 2; 8898 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8899 %} 8900 ins_pipe( pipe_slow ); 8901 %} 8902 8903 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8904 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8905 match(Set dst (MulVF src (LoadVector mem))); 8906 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8907 ins_encode %{ 8908 int vector_len = 2; 8909 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8910 %} 8911 ins_pipe( pipe_slow ); 8912 %} 8913 8914 // Doubles vector mul 8915 instruct vmul2D(vecX dst, vecX src) %{ 8916 predicate(n->as_Vector()->length() == 2); 8917 match(Set dst (MulVD dst src)); 8918 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8919 ins_encode %{ 8920 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8921 %} 8922 ins_pipe( pipe_slow ); 8923 %} 8924 8925 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8926 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8927 match(Set dst (MulVD src1 src2)); 8928 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8929 ins_encode %{ 8930 int vector_len = 0; 8931 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8932 %} 8933 ins_pipe( pipe_slow ); 8934 %} 8935 8936 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8937 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8938 match(Set dst (MulVD src (LoadVector mem))); 8939 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8940 ins_encode %{ 8941 int vector_len = 0; 8942 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8943 %} 8944 ins_pipe( pipe_slow ); 8945 %} 8946 8947 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8948 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8949 match(Set dst (MulVD src1 src2)); 8950 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8951 ins_encode %{ 8952 int vector_len = 1; 8953 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8954 %} 8955 ins_pipe( pipe_slow ); 8956 %} 8957 8958 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8959 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8960 match(Set dst (MulVD src (LoadVector mem))); 8961 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8962 ins_encode %{ 8963 int vector_len = 1; 8964 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8965 %} 8966 ins_pipe( pipe_slow ); 8967 %} 8968 8969 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8970 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8971 match(Set dst (MulVD src1 src2)); 8972 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8973 ins_encode %{ 8974 int vector_len = 2; 8975 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8976 %} 8977 ins_pipe( pipe_slow ); 8978 %} 8979 8980 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8981 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8982 match(Set dst (MulVD src (LoadVector mem))); 8983 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8984 ins_encode %{ 8985 int vector_len = 2; 8986 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8987 %} 8988 ins_pipe( pipe_slow ); 8989 %} 8990 8991 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8992 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 8993 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 8994 effect(TEMP dst, USE src1, USE src2); 8995 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 8996 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 8997 %} 8998 ins_encode %{ 8999 int vector_len = 1; 9000 int cond = (Assembler::Condition)($copnd$$cmpcode); 9001 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 9002 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 9003 %} 9004 ins_pipe( pipe_slow ); 9005 %} 9006 9007 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 9008 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 9009 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 9010 effect(TEMP dst, USE src1, USE src2); 9011 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 9012 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 9013 %} 9014 ins_encode %{ 9015 int vector_len = 1; 9016 int cond = (Assembler::Condition)($copnd$$cmpcode); 9017 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 9018 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 9019 %} 9020 ins_pipe( pipe_slow ); 9021 %} 9022 9023 // --------------------------------- DIV -------------------------------------- 9024 9025 // Floats vector div 9026 instruct vdiv2F(vecD dst, vecD src) %{ 9027 predicate(n->as_Vector()->length() == 2); 9028 match(Set dst (DivVF dst src)); 9029 format %{ "divps $dst,$src\t! div packed2F" %} 9030 ins_encode %{ 9031 __ divps($dst$$XMMRegister, $src$$XMMRegister); 9032 %} 9033 ins_pipe( pipe_slow ); 9034 %} 9035 9036 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 9037 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9038 match(Set dst (DivVF src1 src2)); 9039 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 9040 ins_encode %{ 9041 int vector_len = 0; 9042 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9043 %} 9044 ins_pipe( pipe_slow ); 9045 %} 9046 9047 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 9048 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9049 match(Set dst (DivVF src (LoadVector mem))); 9050 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 9051 ins_encode %{ 9052 int vector_len = 0; 9053 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9054 %} 9055 ins_pipe( pipe_slow ); 9056 %} 9057 9058 instruct vdiv4F(vecX dst, vecX src) %{ 9059 predicate(n->as_Vector()->length() == 4); 9060 match(Set dst (DivVF dst src)); 9061 format %{ "divps $dst,$src\t! div packed4F" %} 9062 ins_encode %{ 9063 __ divps($dst$$XMMRegister, $src$$XMMRegister); 9064 %} 9065 ins_pipe( pipe_slow ); 9066 %} 9067 9068 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 9069 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9070 match(Set dst (DivVF src1 src2)); 9071 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 9072 ins_encode %{ 9073 int vector_len = 0; 9074 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9075 %} 9076 ins_pipe( pipe_slow ); 9077 %} 9078 9079 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 9080 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9081 match(Set dst (DivVF src (LoadVector mem))); 9082 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 9083 ins_encode %{ 9084 int vector_len = 0; 9085 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9086 %} 9087 ins_pipe( pipe_slow ); 9088 %} 9089 9090 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 9091 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9092 match(Set dst (DivVF src1 src2)); 9093 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 9094 ins_encode %{ 9095 int vector_len = 1; 9096 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9097 %} 9098 ins_pipe( pipe_slow ); 9099 %} 9100 9101 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 9102 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9103 match(Set dst (DivVF src (LoadVector mem))); 9104 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 9105 ins_encode %{ 9106 int vector_len = 1; 9107 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9108 %} 9109 ins_pipe( pipe_slow ); 9110 %} 9111 9112 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9113 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 9114 match(Set dst (DivVF src1 src2)); 9115 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 9116 ins_encode %{ 9117 int vector_len = 2; 9118 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9119 %} 9120 ins_pipe( pipe_slow ); 9121 %} 9122 9123 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 9124 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 9125 match(Set dst (DivVF src (LoadVector mem))); 9126 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 9127 ins_encode %{ 9128 int vector_len = 2; 9129 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9130 %} 9131 ins_pipe( pipe_slow ); 9132 %} 9133 9134 // Doubles vector div 9135 instruct vdiv2D(vecX dst, vecX src) %{ 9136 predicate(n->as_Vector()->length() == 2); 9137 match(Set dst (DivVD dst src)); 9138 format %{ "divpd $dst,$src\t! div packed2D" %} 9139 ins_encode %{ 9140 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 9141 %} 9142 ins_pipe( pipe_slow ); 9143 %} 9144 9145 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 9146 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9147 match(Set dst (DivVD src1 src2)); 9148 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 9149 ins_encode %{ 9150 int vector_len = 0; 9151 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9152 %} 9153 ins_pipe( pipe_slow ); 9154 %} 9155 9156 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 9157 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9158 match(Set dst (DivVD src (LoadVector mem))); 9159 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 9160 ins_encode %{ 9161 int vector_len = 0; 9162 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9163 %} 9164 ins_pipe( pipe_slow ); 9165 %} 9166 9167 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 9168 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9169 match(Set dst (DivVD src1 src2)); 9170 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 9171 ins_encode %{ 9172 int vector_len = 1; 9173 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9174 %} 9175 ins_pipe( pipe_slow ); 9176 %} 9177 9178 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 9179 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9180 match(Set dst (DivVD src (LoadVector mem))); 9181 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 9182 ins_encode %{ 9183 int vector_len = 1; 9184 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9185 %} 9186 ins_pipe( pipe_slow ); 9187 %} 9188 9189 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9190 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9191 match(Set dst (DivVD src1 src2)); 9192 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 9193 ins_encode %{ 9194 int vector_len = 2; 9195 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9196 %} 9197 ins_pipe( pipe_slow ); 9198 %} 9199 9200 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 9201 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9202 match(Set dst (DivVD src (LoadVector mem))); 9203 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 9204 ins_encode %{ 9205 int vector_len = 2; 9206 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9207 %} 9208 ins_pipe( pipe_slow ); 9209 %} 9210 9211 // ------------------------------ Shift --------------------------------------- 9212 9213 // Left and right shift count vectors are the same on x86 9214 // (only lowest bits of xmm reg are used for count). 9215 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 9216 match(Set dst (LShiftCntV cnt)); 9217 match(Set dst (RShiftCntV cnt)); 9218 format %{ "movd $dst,$cnt\t! load shift count" %} 9219 ins_encode %{ 9220 __ movdl($dst$$XMMRegister, $cnt$$Register); 9221 %} 9222 ins_pipe( pipe_slow ); 9223 %} 9224 9225 // --------------------------------- Sqrt -------------------------------------- 9226 9227 // Floating point vector sqrt 9228 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 9229 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9230 match(Set dst (SqrtVD src)); 9231 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 9232 ins_encode %{ 9233 int vector_len = 0; 9234 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9235 %} 9236 ins_pipe( pipe_slow ); 9237 %} 9238 9239 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 9240 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9241 match(Set dst (SqrtVD (LoadVector mem))); 9242 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 9243 ins_encode %{ 9244 int vector_len = 0; 9245 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 9251 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9252 match(Set dst (SqrtVD src)); 9253 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 9254 ins_encode %{ 9255 int vector_len = 1; 9256 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9257 %} 9258 ins_pipe( pipe_slow ); 9259 %} 9260 9261 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 9262 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9263 match(Set dst (SqrtVD (LoadVector mem))); 9264 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 9265 ins_encode %{ 9266 int vector_len = 1; 9267 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 9268 %} 9269 ins_pipe( pipe_slow ); 9270 %} 9271 9272 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 9273 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9274 match(Set dst (SqrtVD src)); 9275 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 9276 ins_encode %{ 9277 int vector_len = 2; 9278 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9279 %} 9280 ins_pipe( pipe_slow ); 9281 %} 9282 9283 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 9284 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9285 match(Set dst (SqrtVD (LoadVector mem))); 9286 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 9287 ins_encode %{ 9288 int vector_len = 2; 9289 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 9290 %} 9291 ins_pipe( pipe_slow ); 9292 %} 9293 9294 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 9295 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9296 match(Set dst (SqrtVF src)); 9297 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 9298 ins_encode %{ 9299 int vector_len = 0; 9300 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9301 %} 9302 ins_pipe( pipe_slow ); 9303 %} 9304 9305 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 9306 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9307 match(Set dst (SqrtVF (LoadVector mem))); 9308 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 9309 ins_encode %{ 9310 int vector_len = 0; 9311 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 9312 %} 9313 ins_pipe( pipe_slow ); 9314 %} 9315 9316 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 9317 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9318 match(Set dst (SqrtVF src)); 9319 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 9320 ins_encode %{ 9321 int vector_len = 0; 9322 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9323 %} 9324 ins_pipe( pipe_slow ); 9325 %} 9326 9327 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 9328 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9329 match(Set dst (SqrtVF (LoadVector mem))); 9330 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 9331 ins_encode %{ 9332 int vector_len = 0; 9333 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 9334 %} 9335 ins_pipe( pipe_slow ); 9336 %} 9337 9338 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 9339 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9340 match(Set dst (SqrtVF src)); 9341 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 9342 ins_encode %{ 9343 int vector_len = 1; 9344 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9345 %} 9346 ins_pipe( pipe_slow ); 9347 %} 9348 9349 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 9350 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9351 match(Set dst (SqrtVF (LoadVector mem))); 9352 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 9353 ins_encode %{ 9354 int vector_len = 1; 9355 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 9356 %} 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 9361 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9362 match(Set dst (SqrtVF src)); 9363 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 9364 ins_encode %{ 9365 int vector_len = 2; 9366 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9367 %} 9368 ins_pipe( pipe_slow ); 9369 %} 9370 9371 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 9372 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9373 match(Set dst (SqrtVF (LoadVector mem))); 9374 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 9375 ins_encode %{ 9376 int vector_len = 2; 9377 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 9378 %} 9379 ins_pipe( pipe_slow ); 9380 %} 9381 9382 // ------------------------------ LeftShift ----------------------------------- 9383 9384 // Shorts/Chars vector left shift 9385 instruct vsll2S(vecS dst, vecS shift) %{ 9386 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9387 match(Set dst (LShiftVS dst shift)); 9388 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 9389 ins_encode %{ 9390 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 9391 %} 9392 ins_pipe( pipe_slow ); 9393 %} 9394 9395 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 9396 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9397 match(Set dst (LShiftVS dst shift)); 9398 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 9399 ins_encode %{ 9400 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9401 %} 9402 ins_pipe( pipe_slow ); 9403 %} 9404 9405 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9406 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9407 match(Set dst (LShiftVS src shift)); 9408 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9409 ins_encode %{ 9410 int vector_len = 0; 9411 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9412 %} 9413 ins_pipe( pipe_slow ); 9414 %} 9415 9416 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9417 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9418 match(Set dst (LShiftVS src shift)); 9419 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9420 ins_encode %{ 9421 int vector_len = 0; 9422 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9423 %} 9424 ins_pipe( pipe_slow ); 9425 %} 9426 9427 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9428 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9429 match(Set dst (LShiftVS dst shift)); 9430 effect(TEMP src); 9431 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9432 ins_encode %{ 9433 int vector_len = 0; 9434 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9435 %} 9436 ins_pipe( pipe_slow ); 9437 %} 9438 9439 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9440 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9441 match(Set dst (LShiftVS src shift)); 9442 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9443 ins_encode %{ 9444 int vector_len = 0; 9445 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9446 %} 9447 ins_pipe( pipe_slow ); 9448 %} 9449 9450 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9451 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9452 match(Set dst (LShiftVS src shift)); 9453 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9454 ins_encode %{ 9455 int vector_len = 0; 9456 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9457 %} 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9462 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9463 match(Set dst (LShiftVS dst shift)); 9464 effect(TEMP src); 9465 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9466 ins_encode %{ 9467 int vector_len = 0; 9468 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9469 %} 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 instruct vsll4S(vecD dst, vecS shift) %{ 9474 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9475 match(Set dst (LShiftVS dst shift)); 9476 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 9477 ins_encode %{ 9478 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 9479 %} 9480 ins_pipe( pipe_slow ); 9481 %} 9482 9483 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 9484 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9485 match(Set dst (LShiftVS dst shift)); 9486 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 9487 ins_encode %{ 9488 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9489 %} 9490 ins_pipe( pipe_slow ); 9491 %} 9492 9493 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9494 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9495 match(Set dst (LShiftVS src shift)); 9496 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9497 ins_encode %{ 9498 int vector_len = 0; 9499 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9500 %} 9501 ins_pipe( pipe_slow ); 9502 %} 9503 9504 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9505 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9506 match(Set dst (LShiftVS src shift)); 9507 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9508 ins_encode %{ 9509 int vector_len = 0; 9510 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9511 %} 9512 ins_pipe( pipe_slow ); 9513 %} 9514 9515 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9516 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9517 match(Set dst (LShiftVS dst shift)); 9518 effect(TEMP src); 9519 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9520 ins_encode %{ 9521 int vector_len = 0; 9522 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9523 %} 9524 ins_pipe( pipe_slow ); 9525 %} 9526 9527 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9528 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9529 match(Set dst (LShiftVS src shift)); 9530 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9531 ins_encode %{ 9532 int vector_len = 0; 9533 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9534 %} 9535 ins_pipe( pipe_slow ); 9536 %} 9537 9538 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9539 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9540 match(Set dst (LShiftVS src shift)); 9541 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9542 ins_encode %{ 9543 int vector_len = 0; 9544 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9550 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9551 match(Set dst (LShiftVS dst shift)); 9552 effect(TEMP src); 9553 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9554 ins_encode %{ 9555 int vector_len = 0; 9556 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9557 %} 9558 ins_pipe( pipe_slow ); 9559 %} 9560 9561 instruct vsll8S(vecX dst, vecS shift) %{ 9562 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9563 match(Set dst (LShiftVS dst shift)); 9564 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9565 ins_encode %{ 9566 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 9567 %} 9568 ins_pipe( pipe_slow ); 9569 %} 9570 9571 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 9572 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9573 match(Set dst (LShiftVS dst shift)); 9574 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9575 ins_encode %{ 9576 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9577 %} 9578 ins_pipe( pipe_slow ); 9579 %} 9580 9581 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9582 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9583 match(Set dst (LShiftVS src shift)); 9584 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9585 ins_encode %{ 9586 int vector_len = 0; 9587 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9588 %} 9589 ins_pipe( pipe_slow ); 9590 %} 9591 9592 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9593 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9594 match(Set dst (LShiftVS src shift)); 9595 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9596 ins_encode %{ 9597 int vector_len = 0; 9598 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9599 %} 9600 ins_pipe( pipe_slow ); 9601 %} 9602 9603 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9604 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9605 match(Set dst (LShiftVS dst shift)); 9606 effect(TEMP src); 9607 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9608 ins_encode %{ 9609 int vector_len = 0; 9610 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9611 %} 9612 ins_pipe( pipe_slow ); 9613 %} 9614 9615 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9616 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9617 match(Set dst (LShiftVS src shift)); 9618 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9619 ins_encode %{ 9620 int vector_len = 0; 9621 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9622 %} 9623 ins_pipe( pipe_slow ); 9624 %} 9625 9626 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9627 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9628 match(Set dst (LShiftVS src shift)); 9629 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9630 ins_encode %{ 9631 int vector_len = 0; 9632 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9633 %} 9634 ins_pipe( pipe_slow ); 9635 %} 9636 9637 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9638 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9639 match(Set dst (LShiftVS dst shift)); 9640 effect(TEMP src); 9641 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9642 ins_encode %{ 9643 int vector_len = 0; 9644 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9650 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9651 match(Set dst (LShiftVS src shift)); 9652 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9653 ins_encode %{ 9654 int vector_len = 1; 9655 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9656 %} 9657 ins_pipe( pipe_slow ); 9658 %} 9659 9660 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9661 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9662 match(Set dst (LShiftVS src shift)); 9663 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9664 ins_encode %{ 9665 int vector_len = 1; 9666 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9667 %} 9668 ins_pipe( pipe_slow ); 9669 %} 9670 9671 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9672 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9673 match(Set dst (LShiftVS dst shift)); 9674 effect(TEMP src); 9675 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9676 ins_encode %{ 9677 int vector_len = 1; 9678 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9679 %} 9680 ins_pipe( pipe_slow ); 9681 %} 9682 9683 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9684 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9685 match(Set dst (LShiftVS src shift)); 9686 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9687 ins_encode %{ 9688 int vector_len = 1; 9689 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9690 %} 9691 ins_pipe( pipe_slow ); 9692 %} 9693 9694 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9695 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9696 match(Set dst (LShiftVS src shift)); 9697 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9698 ins_encode %{ 9699 int vector_len = 1; 9700 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9701 %} 9702 ins_pipe( pipe_slow ); 9703 %} 9704 9705 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9706 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9707 match(Set dst (LShiftVS dst shift)); 9708 effect(TEMP src); 9709 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9710 ins_encode %{ 9711 int vector_len = 1; 9712 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9713 %} 9714 ins_pipe( pipe_slow ); 9715 %} 9716 9717 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9718 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9719 match(Set dst (LShiftVS src shift)); 9720 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9721 ins_encode %{ 9722 int vector_len = 2; 9723 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9724 %} 9725 ins_pipe( pipe_slow ); 9726 %} 9727 9728 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9729 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9730 match(Set dst (LShiftVS src shift)); 9731 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9732 ins_encode %{ 9733 int vector_len = 2; 9734 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9735 %} 9736 ins_pipe( pipe_slow ); 9737 %} 9738 9739 // Integers vector left shift 9740 instruct vsll2I(vecD dst, vecS shift) %{ 9741 predicate(n->as_Vector()->length() == 2); 9742 match(Set dst (LShiftVI dst shift)); 9743 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9744 ins_encode %{ 9745 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9746 %} 9747 ins_pipe( pipe_slow ); 9748 %} 9749 9750 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9751 predicate(n->as_Vector()->length() == 2); 9752 match(Set dst (LShiftVI dst shift)); 9753 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9754 ins_encode %{ 9755 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9756 %} 9757 ins_pipe( pipe_slow ); 9758 %} 9759 9760 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9761 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9762 match(Set dst (LShiftVI src shift)); 9763 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9764 ins_encode %{ 9765 int vector_len = 0; 9766 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9767 %} 9768 ins_pipe( pipe_slow ); 9769 %} 9770 9771 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9772 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9773 match(Set dst (LShiftVI src shift)); 9774 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9775 ins_encode %{ 9776 int vector_len = 0; 9777 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9778 %} 9779 ins_pipe( pipe_slow ); 9780 %} 9781 9782 instruct vsll4I(vecX dst, vecS shift) %{ 9783 predicate(n->as_Vector()->length() == 4); 9784 match(Set dst (LShiftVI dst shift)); 9785 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9786 ins_encode %{ 9787 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9788 %} 9789 ins_pipe( pipe_slow ); 9790 %} 9791 9792 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9793 predicate(n->as_Vector()->length() == 4); 9794 match(Set dst (LShiftVI dst shift)); 9795 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9796 ins_encode %{ 9797 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9798 %} 9799 ins_pipe( pipe_slow ); 9800 %} 9801 9802 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9803 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9804 match(Set dst (LShiftVI src shift)); 9805 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9806 ins_encode %{ 9807 int vector_len = 0; 9808 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9809 %} 9810 ins_pipe( pipe_slow ); 9811 %} 9812 9813 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9814 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9815 match(Set dst (LShiftVI src shift)); 9816 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9817 ins_encode %{ 9818 int vector_len = 0; 9819 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9820 %} 9821 ins_pipe( pipe_slow ); 9822 %} 9823 9824 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9825 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9826 match(Set dst (LShiftVI src shift)); 9827 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9828 ins_encode %{ 9829 int vector_len = 1; 9830 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9831 %} 9832 ins_pipe( pipe_slow ); 9833 %} 9834 9835 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9836 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9837 match(Set dst (LShiftVI src shift)); 9838 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9839 ins_encode %{ 9840 int vector_len = 1; 9841 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9842 %} 9843 ins_pipe( pipe_slow ); 9844 %} 9845 9846 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9847 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9848 match(Set dst (LShiftVI src shift)); 9849 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9850 ins_encode %{ 9851 int vector_len = 2; 9852 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9853 %} 9854 ins_pipe( pipe_slow ); 9855 %} 9856 9857 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9858 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9859 match(Set dst (LShiftVI src shift)); 9860 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9861 ins_encode %{ 9862 int vector_len = 2; 9863 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9864 %} 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 // Longs vector left shift 9869 instruct vsll2L(vecX dst, vecS shift) %{ 9870 predicate(n->as_Vector()->length() == 2); 9871 match(Set dst (LShiftVL dst shift)); 9872 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9873 ins_encode %{ 9874 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9875 %} 9876 ins_pipe( pipe_slow ); 9877 %} 9878 9879 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9880 predicate(n->as_Vector()->length() == 2); 9881 match(Set dst (LShiftVL dst shift)); 9882 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9883 ins_encode %{ 9884 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9885 %} 9886 ins_pipe( pipe_slow ); 9887 %} 9888 9889 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9890 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9891 match(Set dst (LShiftVL src shift)); 9892 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9893 ins_encode %{ 9894 int vector_len = 0; 9895 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9896 %} 9897 ins_pipe( pipe_slow ); 9898 %} 9899 9900 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9901 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9902 match(Set dst (LShiftVL src shift)); 9903 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9904 ins_encode %{ 9905 int vector_len = 0; 9906 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9907 %} 9908 ins_pipe( pipe_slow ); 9909 %} 9910 9911 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9912 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9913 match(Set dst (LShiftVL src shift)); 9914 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9915 ins_encode %{ 9916 int vector_len = 1; 9917 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9918 %} 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9923 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9924 match(Set dst (LShiftVL src shift)); 9925 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9926 ins_encode %{ 9927 int vector_len = 1; 9928 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9929 %} 9930 ins_pipe( pipe_slow ); 9931 %} 9932 9933 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9934 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9935 match(Set dst (LShiftVL src shift)); 9936 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9937 ins_encode %{ 9938 int vector_len = 2; 9939 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9940 %} 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9945 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9946 match(Set dst (LShiftVL src shift)); 9947 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9948 ins_encode %{ 9949 int vector_len = 2; 9950 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9951 %} 9952 ins_pipe( pipe_slow ); 9953 %} 9954 9955 // ----------------------- LogicalRightShift ----------------------------------- 9956 9957 // Shorts vector logical right shift produces incorrect Java result 9958 // for negative data because java code convert short value into int with 9959 // sign extension before a shift. But char vectors are fine since chars are 9960 // unsigned values. 9961 9962 instruct vsrl2S(vecS dst, vecS shift) %{ 9963 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9964 match(Set dst (URShiftVS dst shift)); 9965 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9966 ins_encode %{ 9967 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9968 %} 9969 ins_pipe( pipe_slow ); 9970 %} 9971 9972 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9973 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9974 match(Set dst (URShiftVS dst shift)); 9975 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9976 ins_encode %{ 9977 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9978 %} 9979 ins_pipe( pipe_slow ); 9980 %} 9981 9982 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9983 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9984 match(Set dst (URShiftVS src shift)); 9985 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9986 ins_encode %{ 9987 int vector_len = 0; 9988 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9989 %} 9990 ins_pipe( pipe_slow ); 9991 %} 9992 9993 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9994 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9995 match(Set dst (URShiftVS src shift)); 9996 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9997 ins_encode %{ 9998 int vector_len = 0; 9999 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10000 %} 10001 ins_pipe( pipe_slow ); 10002 %} 10003 10004 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 10005 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10006 match(Set dst (URShiftVS dst shift)); 10007 effect(TEMP src); 10008 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 10009 ins_encode %{ 10010 int vector_len = 0; 10011 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10012 %} 10013 ins_pipe( pipe_slow ); 10014 %} 10015 10016 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 10017 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10018 match(Set dst (URShiftVS src shift)); 10019 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 10020 ins_encode %{ 10021 int vector_len = 0; 10022 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 10028 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10029 match(Set dst (URShiftVS src shift)); 10030 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 10031 ins_encode %{ 10032 int vector_len = 0; 10033 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10034 %} 10035 ins_pipe( pipe_slow ); 10036 %} 10037 10038 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 10039 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10040 match(Set dst (URShiftVS dst shift)); 10041 effect(TEMP src); 10042 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 10043 ins_encode %{ 10044 int vector_len = 0; 10045 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10046 %} 10047 ins_pipe( pipe_slow ); 10048 %} 10049 10050 instruct vsrl4S(vecD dst, vecS shift) %{ 10051 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10052 match(Set dst (URShiftVS dst shift)); 10053 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 10054 ins_encode %{ 10055 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 10056 %} 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 10061 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10062 match(Set dst (URShiftVS dst shift)); 10063 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 10064 ins_encode %{ 10065 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 10066 %} 10067 ins_pipe( pipe_slow ); 10068 %} 10069 10070 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 10071 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10072 match(Set dst (URShiftVS src shift)); 10073 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 10074 ins_encode %{ 10075 int vector_len = 0; 10076 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10077 %} 10078 ins_pipe( pipe_slow ); 10079 %} 10080 10081 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10082 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10083 match(Set dst (URShiftVS src shift)); 10084 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 10085 ins_encode %{ 10086 int vector_len = 0; 10087 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10088 %} 10089 ins_pipe( pipe_slow ); 10090 %} 10091 10092 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10093 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10094 match(Set dst (URShiftVS dst shift)); 10095 effect(TEMP src); 10096 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 10097 ins_encode %{ 10098 int vector_len = 0; 10099 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10100 %} 10101 ins_pipe( pipe_slow ); 10102 %} 10103 10104 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10105 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10106 match(Set dst (URShiftVS src shift)); 10107 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 10108 ins_encode %{ 10109 int vector_len = 0; 10110 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10111 %} 10112 ins_pipe( pipe_slow ); 10113 %} 10114 10115 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10116 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10117 match(Set dst (URShiftVS src shift)); 10118 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 10119 ins_encode %{ 10120 int vector_len = 0; 10121 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10122 %} 10123 ins_pipe( pipe_slow ); 10124 %} 10125 10126 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10127 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10128 match(Set dst (URShiftVS dst shift)); 10129 effect(TEMP src); 10130 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 10131 ins_encode %{ 10132 int vector_len = 0; 10133 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10134 %} 10135 ins_pipe( pipe_slow ); 10136 %} 10137 10138 instruct vsrl8S(vecX dst, vecS shift) %{ 10139 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10140 match(Set dst (URShiftVS dst shift)); 10141 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 10142 ins_encode %{ 10143 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 10144 %} 10145 ins_pipe( pipe_slow ); 10146 %} 10147 10148 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 10149 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10150 match(Set dst (URShiftVS dst shift)); 10151 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 10152 ins_encode %{ 10153 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 10154 %} 10155 ins_pipe( pipe_slow ); 10156 %} 10157 10158 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10159 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10160 match(Set dst (URShiftVS src shift)); 10161 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 10162 ins_encode %{ 10163 int vector_len = 0; 10164 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10165 %} 10166 ins_pipe( pipe_slow ); 10167 %} 10168 10169 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10170 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10171 match(Set dst (URShiftVS src shift)); 10172 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 10173 ins_encode %{ 10174 int vector_len = 0; 10175 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10176 %} 10177 ins_pipe( pipe_slow ); 10178 %} 10179 10180 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10181 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10182 match(Set dst (URShiftVS dst shift)); 10183 effect(TEMP src); 10184 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 10185 ins_encode %{ 10186 int vector_len = 0; 10187 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10188 %} 10189 ins_pipe( pipe_slow ); 10190 %} 10191 10192 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10193 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10194 match(Set dst (URShiftVS src shift)); 10195 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 10196 ins_encode %{ 10197 int vector_len = 0; 10198 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10199 %} 10200 ins_pipe( pipe_slow ); 10201 %} 10202 10203 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10204 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10205 match(Set dst (URShiftVS src shift)); 10206 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 10207 ins_encode %{ 10208 int vector_len = 0; 10209 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10210 %} 10211 ins_pipe( pipe_slow ); 10212 %} 10213 10214 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10215 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10216 match(Set dst (URShiftVS dst shift)); 10217 effect(TEMP src); 10218 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 10219 ins_encode %{ 10220 int vector_len = 0; 10221 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10222 %} 10223 ins_pipe( pipe_slow ); 10224 %} 10225 10226 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10227 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10228 match(Set dst (URShiftVS src shift)); 10229 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10230 ins_encode %{ 10231 int vector_len = 1; 10232 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10233 %} 10234 ins_pipe( pipe_slow ); 10235 %} 10236 10237 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10238 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10239 match(Set dst (URShiftVS src shift)); 10240 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10241 ins_encode %{ 10242 int vector_len = 1; 10243 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10244 %} 10245 ins_pipe( pipe_slow ); 10246 %} 10247 10248 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10249 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10250 match(Set dst (URShiftVS dst shift)); 10251 effect(TEMP src); 10252 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10253 ins_encode %{ 10254 int vector_len = 1; 10255 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10256 %} 10257 ins_pipe( pipe_slow ); 10258 %} 10259 10260 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10261 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10262 match(Set dst (URShiftVS src shift)); 10263 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10264 ins_encode %{ 10265 int vector_len = 1; 10266 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10267 %} 10268 ins_pipe( pipe_slow ); 10269 %} 10270 10271 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10272 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10273 match(Set dst (URShiftVS src shift)); 10274 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10275 ins_encode %{ 10276 int vector_len = 1; 10277 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10278 %} 10279 ins_pipe( pipe_slow ); 10280 %} 10281 10282 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10283 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10284 match(Set dst (URShiftVS dst shift)); 10285 effect(TEMP src); 10286 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10287 ins_encode %{ 10288 int vector_len = 1; 10289 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10290 %} 10291 ins_pipe( pipe_slow ); 10292 %} 10293 10294 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10295 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10296 match(Set dst (URShiftVS src shift)); 10297 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 10298 ins_encode %{ 10299 int vector_len = 2; 10300 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10301 %} 10302 ins_pipe( pipe_slow ); 10303 %} 10304 10305 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10306 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10307 match(Set dst (URShiftVS src shift)); 10308 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 10309 ins_encode %{ 10310 int vector_len = 2; 10311 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10312 %} 10313 ins_pipe( pipe_slow ); 10314 %} 10315 10316 // Integers vector logical right shift 10317 instruct vsrl2I(vecD dst, vecS shift) %{ 10318 predicate(n->as_Vector()->length() == 2); 10319 match(Set dst (URShiftVI dst shift)); 10320 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 10321 ins_encode %{ 10322 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 10323 %} 10324 ins_pipe( pipe_slow ); 10325 %} 10326 10327 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 10328 predicate(n->as_Vector()->length() == 2); 10329 match(Set dst (URShiftVI dst shift)); 10330 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 10331 ins_encode %{ 10332 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 10333 %} 10334 ins_pipe( pipe_slow ); 10335 %} 10336 10337 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 10338 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10339 match(Set dst (URShiftVI src shift)); 10340 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 10341 ins_encode %{ 10342 int vector_len = 0; 10343 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10344 %} 10345 ins_pipe( pipe_slow ); 10346 %} 10347 10348 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10349 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10350 match(Set dst (URShiftVI src shift)); 10351 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 10352 ins_encode %{ 10353 int vector_len = 0; 10354 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10355 %} 10356 ins_pipe( pipe_slow ); 10357 %} 10358 10359 instruct vsrl4I(vecX dst, vecS shift) %{ 10360 predicate(n->as_Vector()->length() == 4); 10361 match(Set dst (URShiftVI dst shift)); 10362 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 10363 ins_encode %{ 10364 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 10365 %} 10366 ins_pipe( pipe_slow ); 10367 %} 10368 10369 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 10370 predicate(n->as_Vector()->length() == 4); 10371 match(Set dst (URShiftVI dst shift)); 10372 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 10373 ins_encode %{ 10374 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 10375 %} 10376 ins_pipe( pipe_slow ); 10377 %} 10378 10379 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 10380 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10381 match(Set dst (URShiftVI src shift)); 10382 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 10383 ins_encode %{ 10384 int vector_len = 0; 10385 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10386 %} 10387 ins_pipe( pipe_slow ); 10388 %} 10389 10390 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10391 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10392 match(Set dst (URShiftVI src shift)); 10393 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 10394 ins_encode %{ 10395 int vector_len = 0; 10396 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10397 %} 10398 ins_pipe( pipe_slow ); 10399 %} 10400 10401 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 10402 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10403 match(Set dst (URShiftVI src shift)); 10404 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 10405 ins_encode %{ 10406 int vector_len = 1; 10407 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10408 %} 10409 ins_pipe( pipe_slow ); 10410 %} 10411 10412 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10413 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10414 match(Set dst (URShiftVI src shift)); 10415 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 10416 ins_encode %{ 10417 int vector_len = 1; 10418 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10419 %} 10420 ins_pipe( pipe_slow ); 10421 %} 10422 10423 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10424 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10425 match(Set dst (URShiftVI src shift)); 10426 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 10427 ins_encode %{ 10428 int vector_len = 2; 10429 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10430 %} 10431 ins_pipe( pipe_slow ); 10432 %} 10433 10434 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10435 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10436 match(Set dst (URShiftVI src shift)); 10437 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 10438 ins_encode %{ 10439 int vector_len = 2; 10440 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10441 %} 10442 ins_pipe( pipe_slow ); 10443 %} 10444 10445 // Longs vector logical right shift 10446 instruct vsrl2L(vecX dst, vecS shift) %{ 10447 predicate(n->as_Vector()->length() == 2); 10448 match(Set dst (URShiftVL dst shift)); 10449 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 10450 ins_encode %{ 10451 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 10452 %} 10453 ins_pipe( pipe_slow ); 10454 %} 10455 10456 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 10457 predicate(n->as_Vector()->length() == 2); 10458 match(Set dst (URShiftVL dst shift)); 10459 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 10460 ins_encode %{ 10461 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 10462 %} 10463 ins_pipe( pipe_slow ); 10464 %} 10465 10466 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 10467 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10468 match(Set dst (URShiftVL src shift)); 10469 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 10470 ins_encode %{ 10471 int vector_len = 0; 10472 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10473 %} 10474 ins_pipe( pipe_slow ); 10475 %} 10476 10477 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10478 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10479 match(Set dst (URShiftVL src shift)); 10480 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 10481 ins_encode %{ 10482 int vector_len = 0; 10483 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10484 %} 10485 ins_pipe( pipe_slow ); 10486 %} 10487 10488 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 10489 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 10490 match(Set dst (URShiftVL src shift)); 10491 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 10492 ins_encode %{ 10493 int vector_len = 1; 10494 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10495 %} 10496 ins_pipe( pipe_slow ); 10497 %} 10498 10499 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10500 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 10501 match(Set dst (URShiftVL src shift)); 10502 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 10503 ins_encode %{ 10504 int vector_len = 1; 10505 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10506 %} 10507 ins_pipe( pipe_slow ); 10508 %} 10509 10510 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 10511 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10512 match(Set dst (URShiftVL src shift)); 10513 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 10514 ins_encode %{ 10515 int vector_len = 2; 10516 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10517 %} 10518 ins_pipe( pipe_slow ); 10519 %} 10520 10521 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10522 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10523 match(Set dst (URShiftVL src shift)); 10524 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 10525 ins_encode %{ 10526 int vector_len = 2; 10527 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10528 %} 10529 ins_pipe( pipe_slow ); 10530 %} 10531 10532 // ------------------- ArithmeticRightShift ----------------------------------- 10533 10534 // Shorts/Chars vector arithmetic right shift 10535 instruct vsra2S(vecS dst, vecS shift) %{ 10536 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 10537 match(Set dst (RShiftVS dst shift)); 10538 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 10539 ins_encode %{ 10540 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10541 %} 10542 ins_pipe( pipe_slow ); 10543 %} 10544 10545 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 10546 predicate(n->as_Vector()->length() == 2); 10547 match(Set dst (RShiftVS dst shift)); 10548 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 10549 ins_encode %{ 10550 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10551 %} 10552 ins_pipe( pipe_slow ); 10553 %} 10554 10555 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 10556 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10557 match(Set dst (RShiftVS src shift)); 10558 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10559 ins_encode %{ 10560 int vector_len = 0; 10561 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10562 %} 10563 ins_pipe( pipe_slow ); 10564 %} 10565 10566 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 10567 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10568 match(Set dst (RShiftVS src shift)); 10569 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10570 ins_encode %{ 10571 int vector_len = 0; 10572 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10573 %} 10574 ins_pipe( pipe_slow ); 10575 %} 10576 10577 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 10578 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10579 match(Set dst (RShiftVS dst shift)); 10580 effect(TEMP src); 10581 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10582 ins_encode %{ 10583 int vector_len = 0; 10584 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10585 %} 10586 ins_pipe( pipe_slow ); 10587 %} 10588 10589 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 10590 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10591 match(Set dst (RShiftVS src shift)); 10592 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10593 ins_encode %{ 10594 int vector_len = 0; 10595 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10596 %} 10597 ins_pipe( pipe_slow ); 10598 %} 10599 10600 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 10601 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10602 match(Set dst (RShiftVS src shift)); 10603 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10604 ins_encode %{ 10605 int vector_len = 0; 10606 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10607 %} 10608 ins_pipe( pipe_slow ); 10609 %} 10610 10611 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 10612 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10613 match(Set dst (RShiftVS dst shift)); 10614 effect(TEMP src); 10615 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10616 ins_encode %{ 10617 int vector_len = 0; 10618 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10619 %} 10620 ins_pipe( pipe_slow ); 10621 %} 10622 10623 instruct vsra4S(vecD dst, vecS shift) %{ 10624 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10625 match(Set dst (RShiftVS dst shift)); 10626 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10627 ins_encode %{ 10628 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10629 %} 10630 ins_pipe( pipe_slow ); 10631 %} 10632 10633 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 10634 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10635 match(Set dst (RShiftVS dst shift)); 10636 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10637 ins_encode %{ 10638 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10639 %} 10640 ins_pipe( pipe_slow ); 10641 %} 10642 10643 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 10644 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10645 match(Set dst (RShiftVS src shift)); 10646 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10647 ins_encode %{ 10648 int vector_len = 0; 10649 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10650 %} 10651 ins_pipe( pipe_slow ); 10652 %} 10653 10654 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10655 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10656 match(Set dst (RShiftVS src shift)); 10657 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10658 ins_encode %{ 10659 int vector_len = 0; 10660 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10661 %} 10662 ins_pipe( pipe_slow ); 10663 %} 10664 10665 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10666 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10667 match(Set dst (RShiftVS dst shift)); 10668 effect(TEMP src); 10669 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10670 ins_encode %{ 10671 int vector_len = 0; 10672 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10673 %} 10674 ins_pipe( pipe_slow ); 10675 %} 10676 10677 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10678 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10679 match(Set dst (RShiftVS src shift)); 10680 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10681 ins_encode %{ 10682 int vector_len = 0; 10683 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10684 %} 10685 ins_pipe( pipe_slow ); 10686 %} 10687 10688 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10689 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10690 match(Set dst (RShiftVS src shift)); 10691 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10692 ins_encode %{ 10693 int vector_len = 0; 10694 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10695 %} 10696 ins_pipe( pipe_slow ); 10697 %} 10698 10699 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10700 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10701 match(Set dst (RShiftVS dst shift)); 10702 effect(TEMP src); 10703 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10704 ins_encode %{ 10705 int vector_len = 0; 10706 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10707 %} 10708 ins_pipe( pipe_slow ); 10709 %} 10710 10711 instruct vsra8S(vecX dst, vecS shift) %{ 10712 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10713 match(Set dst (RShiftVS dst shift)); 10714 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10715 ins_encode %{ 10716 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10717 %} 10718 ins_pipe( pipe_slow ); 10719 %} 10720 10721 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10722 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10723 match(Set dst (RShiftVS dst shift)); 10724 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10725 ins_encode %{ 10726 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10727 %} 10728 ins_pipe( pipe_slow ); 10729 %} 10730 10731 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10732 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10733 match(Set dst (RShiftVS src shift)); 10734 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10735 ins_encode %{ 10736 int vector_len = 0; 10737 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10738 %} 10739 ins_pipe( pipe_slow ); 10740 %} 10741 10742 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10743 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10744 match(Set dst (RShiftVS src shift)); 10745 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10746 ins_encode %{ 10747 int vector_len = 0; 10748 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10749 %} 10750 ins_pipe( pipe_slow ); 10751 %} 10752 10753 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10754 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10755 match(Set dst (RShiftVS dst shift)); 10756 effect(TEMP src); 10757 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10758 ins_encode %{ 10759 int vector_len = 0; 10760 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10761 %} 10762 ins_pipe( pipe_slow ); 10763 %} 10764 10765 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10766 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10767 match(Set dst (RShiftVS src shift)); 10768 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10769 ins_encode %{ 10770 int vector_len = 0; 10771 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10772 %} 10773 ins_pipe( pipe_slow ); 10774 %} 10775 10776 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10777 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10778 match(Set dst (RShiftVS src shift)); 10779 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10780 ins_encode %{ 10781 int vector_len = 0; 10782 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10783 %} 10784 ins_pipe( pipe_slow ); 10785 %} 10786 10787 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10788 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10789 match(Set dst (RShiftVS dst shift)); 10790 effect(TEMP src); 10791 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10792 ins_encode %{ 10793 int vector_len = 0; 10794 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10795 %} 10796 ins_pipe( pipe_slow ); 10797 %} 10798 10799 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10800 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10801 match(Set dst (RShiftVS src shift)); 10802 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10803 ins_encode %{ 10804 int vector_len = 1; 10805 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10806 %} 10807 ins_pipe( pipe_slow ); 10808 %} 10809 10810 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10811 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10812 match(Set dst (RShiftVS src shift)); 10813 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10814 ins_encode %{ 10815 int vector_len = 1; 10816 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10817 %} 10818 ins_pipe( pipe_slow ); 10819 %} 10820 10821 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10822 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10823 match(Set dst (RShiftVS dst shift)); 10824 effect(TEMP src); 10825 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10826 ins_encode %{ 10827 int vector_len = 1; 10828 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10829 %} 10830 ins_pipe( pipe_slow ); 10831 %} 10832 10833 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10834 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10835 match(Set dst (RShiftVS src shift)); 10836 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10837 ins_encode %{ 10838 int vector_len = 1; 10839 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10840 %} 10841 ins_pipe( pipe_slow ); 10842 %} 10843 10844 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10845 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10846 match(Set dst (RShiftVS src shift)); 10847 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10848 ins_encode %{ 10849 int vector_len = 1; 10850 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10851 %} 10852 ins_pipe( pipe_slow ); 10853 %} 10854 10855 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10856 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10857 match(Set dst (RShiftVS dst shift)); 10858 effect(TEMP src); 10859 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10860 ins_encode %{ 10861 int vector_len = 1; 10862 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10863 %} 10864 ins_pipe( pipe_slow ); 10865 %} 10866 10867 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10868 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10869 match(Set dst (RShiftVS src shift)); 10870 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10871 ins_encode %{ 10872 int vector_len = 2; 10873 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10874 %} 10875 ins_pipe( pipe_slow ); 10876 %} 10877 10878 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10879 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10880 match(Set dst (RShiftVS src shift)); 10881 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10882 ins_encode %{ 10883 int vector_len = 2; 10884 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10885 %} 10886 ins_pipe( pipe_slow ); 10887 %} 10888 10889 // Integers vector arithmetic right shift 10890 instruct vsra2I(vecD dst, vecS shift) %{ 10891 predicate(n->as_Vector()->length() == 2); 10892 match(Set dst (RShiftVI dst shift)); 10893 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10894 ins_encode %{ 10895 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10896 %} 10897 ins_pipe( pipe_slow ); 10898 %} 10899 10900 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10901 predicate(n->as_Vector()->length() == 2); 10902 match(Set dst (RShiftVI dst shift)); 10903 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10904 ins_encode %{ 10905 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10906 %} 10907 ins_pipe( pipe_slow ); 10908 %} 10909 10910 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10911 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10912 match(Set dst (RShiftVI src shift)); 10913 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10914 ins_encode %{ 10915 int vector_len = 0; 10916 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10917 %} 10918 ins_pipe( pipe_slow ); 10919 %} 10920 10921 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10922 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10923 match(Set dst (RShiftVI src shift)); 10924 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10925 ins_encode %{ 10926 int vector_len = 0; 10927 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10928 %} 10929 ins_pipe( pipe_slow ); 10930 %} 10931 10932 instruct vsra4I(vecX dst, vecS shift) %{ 10933 predicate(n->as_Vector()->length() == 4); 10934 match(Set dst (RShiftVI dst shift)); 10935 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10936 ins_encode %{ 10937 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10938 %} 10939 ins_pipe( pipe_slow ); 10940 %} 10941 10942 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10943 predicate(n->as_Vector()->length() == 4); 10944 match(Set dst (RShiftVI dst shift)); 10945 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10946 ins_encode %{ 10947 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10948 %} 10949 ins_pipe( pipe_slow ); 10950 %} 10951 10952 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10953 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10954 match(Set dst (RShiftVI src shift)); 10955 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10956 ins_encode %{ 10957 int vector_len = 0; 10958 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10959 %} 10960 ins_pipe( pipe_slow ); 10961 %} 10962 10963 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10964 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10965 match(Set dst (RShiftVI src shift)); 10966 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10967 ins_encode %{ 10968 int vector_len = 0; 10969 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10970 %} 10971 ins_pipe( pipe_slow ); 10972 %} 10973 10974 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10975 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10976 match(Set dst (RShiftVI src shift)); 10977 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10978 ins_encode %{ 10979 int vector_len = 1; 10980 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10981 %} 10982 ins_pipe( pipe_slow ); 10983 %} 10984 10985 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10986 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10987 match(Set dst (RShiftVI src shift)); 10988 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10989 ins_encode %{ 10990 int vector_len = 1; 10991 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10992 %} 10993 ins_pipe( pipe_slow ); 10994 %} 10995 10996 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10997 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10998 match(Set dst (RShiftVI src shift)); 10999 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 11000 ins_encode %{ 11001 int vector_len = 2; 11002 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11003 %} 11004 ins_pipe( pipe_slow ); 11005 %} 11006 11007 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 11008 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11009 match(Set dst (RShiftVI src shift)); 11010 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 11011 ins_encode %{ 11012 int vector_len = 2; 11013 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11014 %} 11015 ins_pipe( pipe_slow ); 11016 %} 11017 11018 // There are no longs vector arithmetic right shift instructions. 11019 11020 11021 // --------------------------------- AND -------------------------------------- 11022 11023 instruct vand4B(vecS dst, vecS src) %{ 11024 predicate(n->as_Vector()->length_in_bytes() == 4); 11025 match(Set dst (AndV dst src)); 11026 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 11027 ins_encode %{ 11028 __ pand($dst$$XMMRegister, $src$$XMMRegister); 11029 %} 11030 ins_pipe( pipe_slow ); 11031 %} 11032 11033 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 11034 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11035 match(Set dst (AndV src1 src2)); 11036 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 11037 ins_encode %{ 11038 int vector_len = 0; 11039 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11040 %} 11041 ins_pipe( pipe_slow ); 11042 %} 11043 11044 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 11045 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11046 match(Set dst (AndV src (LoadVector mem))); 11047 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 11048 ins_encode %{ 11049 int vector_len = 0; 11050 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11051 %} 11052 ins_pipe( pipe_slow ); 11053 %} 11054 11055 instruct vand8B(vecD dst, vecD src) %{ 11056 predicate(n->as_Vector()->length_in_bytes() == 8); 11057 match(Set dst (AndV dst src)); 11058 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 11059 ins_encode %{ 11060 __ pand($dst$$XMMRegister, $src$$XMMRegister); 11061 %} 11062 ins_pipe( pipe_slow ); 11063 %} 11064 11065 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 11066 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 11067 match(Set dst (AndV src1 src2)); 11068 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 11069 ins_encode %{ 11070 int vector_len = 0; 11071 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11072 %} 11073 ins_pipe( pipe_slow ); 11074 %} 11075 11076 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 11077 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 11078 match(Set dst (AndV src (LoadVector mem))); 11079 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 11080 ins_encode %{ 11081 int vector_len = 0; 11082 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11083 %} 11084 ins_pipe( pipe_slow ); 11085 %} 11086 11087 instruct vand16B(vecX dst, vecX src) %{ 11088 predicate(n->as_Vector()->length_in_bytes() == 16); 11089 match(Set dst (AndV dst src)); 11090 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 11091 ins_encode %{ 11092 __ pand($dst$$XMMRegister, $src$$XMMRegister); 11093 %} 11094 ins_pipe( pipe_slow ); 11095 %} 11096 11097 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 11098 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11099 match(Set dst (AndV src1 src2)); 11100 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 11101 ins_encode %{ 11102 int vector_len = 0; 11103 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11104 %} 11105 ins_pipe( pipe_slow ); 11106 %} 11107 11108 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 11109 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11110 match(Set dst (AndV src (LoadVector mem))); 11111 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 11112 ins_encode %{ 11113 int vector_len = 0; 11114 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11115 %} 11116 ins_pipe( pipe_slow ); 11117 %} 11118 11119 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 11120 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11121 match(Set dst (AndV src1 src2)); 11122 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 11123 ins_encode %{ 11124 int vector_len = 1; 11125 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11126 %} 11127 ins_pipe( pipe_slow ); 11128 %} 11129 11130 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 11131 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11132 match(Set dst (AndV src (LoadVector mem))); 11133 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 11134 ins_encode %{ 11135 int vector_len = 1; 11136 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11137 %} 11138 ins_pipe( pipe_slow ); 11139 %} 11140 11141 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11142 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11143 match(Set dst (AndV src1 src2)); 11144 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 11145 ins_encode %{ 11146 int vector_len = 2; 11147 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11148 %} 11149 ins_pipe( pipe_slow ); 11150 %} 11151 11152 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 11153 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11154 match(Set dst (AndV src (LoadVector mem))); 11155 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 11156 ins_encode %{ 11157 int vector_len = 2; 11158 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11159 %} 11160 ins_pipe( pipe_slow ); 11161 %} 11162 11163 // --------------------------------- OR --------------------------------------- 11164 11165 instruct vor4B(vecS dst, vecS src) %{ 11166 predicate(n->as_Vector()->length_in_bytes() == 4); 11167 match(Set dst (OrV dst src)); 11168 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 11169 ins_encode %{ 11170 __ por($dst$$XMMRegister, $src$$XMMRegister); 11171 %} 11172 ins_pipe( pipe_slow ); 11173 %} 11174 11175 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 11176 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11177 match(Set dst (OrV src1 src2)); 11178 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 11179 ins_encode %{ 11180 int vector_len = 0; 11181 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11182 %} 11183 ins_pipe( pipe_slow ); 11184 %} 11185 11186 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 11187 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11188 match(Set dst (OrV src (LoadVector mem))); 11189 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 11190 ins_encode %{ 11191 int vector_len = 0; 11192 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11193 %} 11194 ins_pipe( pipe_slow ); 11195 %} 11196 11197 instruct vor8B(vecD dst, vecD src) %{ 11198 predicate(n->as_Vector()->length_in_bytes() == 8); 11199 match(Set dst (OrV dst src)); 11200 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 11201 ins_encode %{ 11202 __ por($dst$$XMMRegister, $src$$XMMRegister); 11203 %} 11204 ins_pipe( pipe_slow ); 11205 %} 11206 11207 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 11208 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 11209 match(Set dst (OrV src1 src2)); 11210 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 11211 ins_encode %{ 11212 int vector_len = 0; 11213 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11214 %} 11215 ins_pipe( pipe_slow ); 11216 %} 11217 11218 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 11219 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11220 match(Set dst (OrV src (LoadVector mem))); 11221 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 11222 ins_encode %{ 11223 int vector_len = 0; 11224 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11225 %} 11226 ins_pipe( pipe_slow ); 11227 %} 11228 11229 instruct vor16B(vecX dst, vecX src) %{ 11230 predicate(n->as_Vector()->length_in_bytes() == 16); 11231 match(Set dst (OrV dst src)); 11232 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 11233 ins_encode %{ 11234 __ por($dst$$XMMRegister, $src$$XMMRegister); 11235 %} 11236 ins_pipe( pipe_slow ); 11237 %} 11238 11239 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 11240 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11241 match(Set dst (OrV src1 src2)); 11242 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 11243 ins_encode %{ 11244 int vector_len = 0; 11245 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11246 %} 11247 ins_pipe( pipe_slow ); 11248 %} 11249 11250 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 11251 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11252 match(Set dst (OrV src (LoadVector mem))); 11253 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 11254 ins_encode %{ 11255 int vector_len = 0; 11256 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11257 %} 11258 ins_pipe( pipe_slow ); 11259 %} 11260 11261 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 11262 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11263 match(Set dst (OrV src1 src2)); 11264 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 11265 ins_encode %{ 11266 int vector_len = 1; 11267 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11268 %} 11269 ins_pipe( pipe_slow ); 11270 %} 11271 11272 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 11273 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11274 match(Set dst (OrV src (LoadVector mem))); 11275 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 11276 ins_encode %{ 11277 int vector_len = 1; 11278 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11279 %} 11280 ins_pipe( pipe_slow ); 11281 %} 11282 11283 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11284 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11285 match(Set dst (OrV src1 src2)); 11286 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 11287 ins_encode %{ 11288 int vector_len = 2; 11289 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11290 %} 11291 ins_pipe( pipe_slow ); 11292 %} 11293 11294 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 11295 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11296 match(Set dst (OrV src (LoadVector mem))); 11297 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 11298 ins_encode %{ 11299 int vector_len = 2; 11300 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11301 %} 11302 ins_pipe( pipe_slow ); 11303 %} 11304 11305 // --------------------------------- XOR -------------------------------------- 11306 11307 instruct vxor4B(vecS dst, vecS src) %{ 11308 predicate(n->as_Vector()->length_in_bytes() == 4); 11309 match(Set dst (XorV dst src)); 11310 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 11311 ins_encode %{ 11312 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 11313 %} 11314 ins_pipe( pipe_slow ); 11315 %} 11316 11317 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 11318 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11319 match(Set dst (XorV src1 src2)); 11320 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 11321 ins_encode %{ 11322 int vector_len = 0; 11323 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11324 %} 11325 ins_pipe( pipe_slow ); 11326 %} 11327 11328 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 11329 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11330 match(Set dst (XorV src (LoadVector mem))); 11331 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 11332 ins_encode %{ 11333 int vector_len = 0; 11334 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11335 %} 11336 ins_pipe( pipe_slow ); 11337 %} 11338 11339 instruct vxor8B(vecD dst, vecD src) %{ 11340 predicate(n->as_Vector()->length_in_bytes() == 8); 11341 match(Set dst (XorV dst src)); 11342 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 11343 ins_encode %{ 11344 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 11345 %} 11346 ins_pipe( pipe_slow ); 11347 %} 11348 11349 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 11350 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 11351 match(Set dst (XorV src1 src2)); 11352 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 11353 ins_encode %{ 11354 int vector_len = 0; 11355 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11356 %} 11357 ins_pipe( pipe_slow ); 11358 %} 11359 11360 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 11361 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 11362 match(Set dst (XorV src (LoadVector mem))); 11363 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 11364 ins_encode %{ 11365 int vector_len = 0; 11366 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11367 %} 11368 ins_pipe( pipe_slow ); 11369 %} 11370 11371 instruct vxor16B(vecX dst, vecX src) %{ 11372 predicate(n->as_Vector()->length_in_bytes() == 16); 11373 match(Set dst (XorV dst src)); 11374 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 11375 ins_encode %{ 11376 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 11377 %} 11378 ins_pipe( pipe_slow ); 11379 %} 11380 11381 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 11382 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11383 match(Set dst (XorV src1 src2)); 11384 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 11385 ins_encode %{ 11386 int vector_len = 0; 11387 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11388 %} 11389 ins_pipe( pipe_slow ); 11390 %} 11391 11392 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 11393 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11394 match(Set dst (XorV src (LoadVector mem))); 11395 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 11396 ins_encode %{ 11397 int vector_len = 0; 11398 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11399 %} 11400 ins_pipe( pipe_slow ); 11401 %} 11402 11403 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 11404 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11405 match(Set dst (XorV src1 src2)); 11406 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 11407 ins_encode %{ 11408 int vector_len = 1; 11409 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11410 %} 11411 ins_pipe( pipe_slow ); 11412 %} 11413 11414 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 11415 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11416 match(Set dst (XorV src (LoadVector mem))); 11417 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 11418 ins_encode %{ 11419 int vector_len = 1; 11420 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11421 %} 11422 ins_pipe( pipe_slow ); 11423 %} 11424 11425 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11426 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11427 match(Set dst (XorV src1 src2)); 11428 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 11429 ins_encode %{ 11430 int vector_len = 2; 11431 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11432 %} 11433 ins_pipe( pipe_slow ); 11434 %} 11435 11436 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 11437 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11438 match(Set dst (XorV src (LoadVector mem))); 11439 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 11440 ins_encode %{ 11441 int vector_len = 2; 11442 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11443 %} 11444 ins_pipe( pipe_slow ); 11445 %} 11446 11447 instruct vcvt2Fto2D_reg(vecX dst, vecD src) %{ 11448 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11449 match(Set dst (ConvertVF2VD src)); 11450 format %{ "vcvtps2pd $dst,$src\t! convert 2F to 2D vector" %} 11451 ins_encode %{ 11452 int vector_len = 0; 11453 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11454 %} 11455 ins_pipe( pipe_slow ); 11456 %} 11457 11458 instruct vcvt4Fto4D_reg(vecY dst, vecX src) %{ 11459 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 11460 match(Set dst (ConvertVF2VD src)); 11461 format %{ "vcvtps2pd $dst,$src\t! convert 4F to 4D vector" %} 11462 ins_encode %{ 11463 int vector_len = 1; 11464 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11465 %} 11466 ins_pipe( pipe_slow ); 11467 %} 11468 11469 instruct vcvt8Fto4D_reg(vecY dst, vecY src) %{ 11470 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 11471 match(Set dst (ConvertVF2VD src)); 11472 format %{ "vcvtps2pd $dst,$src\t! convert 8F to 4D vector" %} 11473 ins_encode %{ 11474 int vector_len = 1; 11475 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11476 %} 11477 ins_pipe( pipe_slow ); 11478 %} 11479 11480 instruct vcvt8Fto8D_reg(vecZ dst, vecY src) %{ 11481 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11482 match(Set dst (ConvertVF2VD src)); 11483 format %{ "evcvtps2pd $dst,$src\t! convert 8F to 8D vector" %} 11484 ins_encode %{ 11485 int vector_len = 2; 11486 __ evcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11487 %} 11488 ins_pipe( pipe_slow ); 11489 %} 11490 11491 instruct vcmpeq2F(vecD dst, vecD src1, vecD src2) %{ 11492 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11493 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11494 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11495 match(Set dst (VectorMaskCmp src1 src2)); 11496 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed2F" %} 11497 ins_encode %{ 11498 int vector_len = 0; 11499 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11500 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11501 %} 11502 ins_pipe( pipe_slow ); 11503 %} 11504 11505 instruct vcmpeq4F(vecX dst, vecX src1, vecX src2) %{ 11506 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11507 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11508 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11509 match(Set dst (VectorMaskCmp src1 src2)); 11510 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed4F" %} 11511 ins_encode %{ 11512 int vector_len = 0; 11513 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11514 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11515 %} 11516 ins_pipe( pipe_slow ); 11517 %} 11518 11519 instruct vcmpeq8F(vecY dst, vecY src1, vecY src2) %{ 11520 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11521 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11522 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11523 match(Set dst (VectorMaskCmp src1 src2)); 11524 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed8F" %} 11525 ins_encode %{ 11526 int vector_len = 1; 11527 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11528 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11529 %} 11530 ins_pipe( pipe_slow ); 11531 %} 11532 11533 instruct vcmpeq16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11534 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11535 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11536 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11537 match(Set dst (VectorMaskCmp src1 src2)); 11538 effect(TEMP dst, TEMP scratch); 11539 format %{ "vcmpeqps k2,$src1,$src2\n\t" 11540 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16F" %} 11541 ins_encode %{ 11542 int vector_len = 2; 11543 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11544 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11545 KRegister mask = k0; // The comparison itself is not being masked. 11546 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11547 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11548 %} 11549 ins_pipe( pipe_slow ); 11550 %} 11551 11552 instruct vcmplt2F(vecD dst, vecD src1, vecD src2) %{ 11553 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11554 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11555 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11556 match(Set dst (VectorMaskCmp src1 src2)); 11557 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed2F" %} 11558 ins_encode %{ 11559 int vector_len = 0; 11560 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11561 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11562 %} 11563 ins_pipe( pipe_slow ); 11564 %} 11565 11566 instruct vcmplt4F(vecX dst, vecX src1, vecX src2) %{ 11567 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11568 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11569 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11570 match(Set dst (VectorMaskCmp src1 src2)); 11571 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed4F" %} 11572 ins_encode %{ 11573 int vector_len = 0; 11574 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11575 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11576 %} 11577 ins_pipe( pipe_slow ); 11578 %} 11579 11580 instruct vcmplt8F(vecY dst, vecY src1, vecY src2) %{ 11581 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11582 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11583 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11584 match(Set dst (VectorMaskCmp src1 src2)); 11585 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed8F" %} 11586 ins_encode %{ 11587 int vector_len = 1; 11588 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11589 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11590 %} 11591 ins_pipe( pipe_slow ); 11592 %} 11593 11594 instruct vcmplt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11595 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11596 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11597 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11598 match(Set dst (VectorMaskCmp src1 src2)); 11599 effect(TEMP dst, TEMP scratch); 11600 format %{ "vcmpltps k2,$src1,$src2\n\t" 11601 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed16F" %} 11602 ins_encode %{ 11603 int vector_len = 2; 11604 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11605 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11606 KRegister mask = k0; // The comparison itself is not being masked. 11607 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11608 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11609 %} 11610 ins_pipe( pipe_slow ); 11611 %} 11612 11613 instruct vcmpgt2F(vecD dst, vecD src1, vecD src2) %{ 11614 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11615 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11616 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11617 match(Set dst (VectorMaskCmp src1 src2)); 11618 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed2F" %} 11619 ins_encode %{ 11620 int vector_len = 0; 11621 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11622 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11623 %} 11624 ins_pipe( pipe_slow ); 11625 %} 11626 11627 instruct vcmpgt4F(vecX dst, vecX src1, vecX src2) %{ 11628 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11629 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11630 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11631 match(Set dst (VectorMaskCmp src1 src2)); 11632 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed4F" %} 11633 ins_encode %{ 11634 int vector_len = 0; 11635 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11636 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11637 %} 11638 ins_pipe( pipe_slow ); 11639 %} 11640 11641 instruct vcmpgt8F(vecY dst, vecY src1, vecY src2) %{ 11642 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11643 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11644 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11645 match(Set dst (VectorMaskCmp src1 src2)); 11646 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed8F" %} 11647 ins_encode %{ 11648 int vector_len = 1; 11649 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11650 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11651 %} 11652 ins_pipe( pipe_slow ); 11653 %} 11654 11655 instruct vcmpgt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11656 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11657 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11658 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11659 match(Set dst (VectorMaskCmp src1 src2)); 11660 effect(TEMP dst, TEMP scratch); 11661 format %{ "vcmpgtps k2,$src1,$src2\n\t" 11662 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16F" %} 11663 ins_encode %{ 11664 int vector_len = 2; 11665 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11666 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11667 KRegister mask = k0; // The comparison itself is not being masked. 11668 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11669 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11670 %} 11671 ins_pipe( pipe_slow ); 11672 %} 11673 11674 instruct vcmpge2F(vecD dst, vecD src1, vecD src2) %{ 11675 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11676 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11677 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11678 match(Set dst (VectorMaskCmp src1 src2)); 11679 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed2F" %} 11680 ins_encode %{ 11681 int vector_len = 0; 11682 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11683 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11684 %} 11685 ins_pipe( pipe_slow ); 11686 %} 11687 11688 instruct vcmpge4F(vecX dst, vecX src1, vecX src2) %{ 11689 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11690 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11691 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11692 match(Set dst (VectorMaskCmp src1 src2)); 11693 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed4F" %} 11694 ins_encode %{ 11695 int vector_len = 0; 11696 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11697 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11698 %} 11699 ins_pipe( pipe_slow ); 11700 %} 11701 11702 instruct vcmpge8F(vecY dst, vecY src1, vecY src2) %{ 11703 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11704 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11705 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11706 match(Set dst (VectorMaskCmp src1 src2)); 11707 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed8F" %} 11708 ins_encode %{ 11709 int vector_len = 1; 11710 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11711 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11712 %} 11713 ins_pipe( pipe_slow ); 11714 %} 11715 11716 instruct vcmpge16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11717 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11718 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11719 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11720 match(Set dst (VectorMaskCmp src1 src2)); 11721 effect(TEMP dst, TEMP scratch); 11722 format %{ "vcmpgeps k2,$src1,$src2\n\t" 11723 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16F" %} 11724 ins_encode %{ 11725 int vector_len = 2; 11726 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11727 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11728 KRegister mask = k0; // The comparison itself is not being masked. 11729 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11730 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11731 %} 11732 ins_pipe( pipe_slow ); 11733 %} 11734 11735 instruct vcmple2F(vecD dst, vecD src1, vecD src2) %{ 11736 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11737 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11738 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11739 match(Set dst (VectorMaskCmp src1 src2)); 11740 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed2F" %} 11741 ins_encode %{ 11742 int vector_len = 0; 11743 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11744 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11745 %} 11746 ins_pipe( pipe_slow ); 11747 %} 11748 11749 instruct vcmple4F(vecX dst, vecX src1, vecX src2) %{ 11750 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11751 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11752 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11753 match(Set dst (VectorMaskCmp src1 src2)); 11754 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed4F" %} 11755 ins_encode %{ 11756 int vector_len = 0; 11757 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11758 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11759 %} 11760 ins_pipe( pipe_slow ); 11761 %} 11762 11763 instruct vcmple8F(vecY dst, vecY src1, vecY src2) %{ 11764 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11765 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11766 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11767 match(Set dst (VectorMaskCmp src1 src2)); 11768 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed8F" %} 11769 ins_encode %{ 11770 int vector_len = 1; 11771 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11772 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11773 %} 11774 ins_pipe( pipe_slow ); 11775 %} 11776 11777 instruct vcmple16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11778 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11779 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11780 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11781 match(Set dst (VectorMaskCmp src1 src2)); 11782 effect(TEMP dst, TEMP scratch); 11783 format %{ "vcmpleps k2,$src1,$src2\n\t" 11784 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16F" %} 11785 ins_encode %{ 11786 int vector_len = 2; 11787 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11788 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11789 KRegister mask = k0; // The comparison itself is not being masked. 11790 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11791 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11792 %} 11793 ins_pipe( pipe_slow ); 11794 %} 11795 11796 instruct vcmpne2F(vecD dst, vecD src1, vecD src2) %{ 11797 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11798 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11799 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11800 match(Set dst (VectorMaskCmp src1 src2)); 11801 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed2F" %} 11802 ins_encode %{ 11803 int vector_len = 0; 11804 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11805 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11806 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11807 %} 11808 ins_pipe( pipe_slow ); 11809 %} 11810 11811 instruct vcmpne4F(vecX dst, vecX src1, vecX src2) %{ 11812 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11813 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11814 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11815 match(Set dst (VectorMaskCmp src1 src2)); 11816 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed4F" %} 11817 ins_encode %{ 11818 int vector_len = 0; 11819 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11820 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11821 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11822 %} 11823 ins_pipe( pipe_slow ); 11824 %} 11825 11826 instruct vcmpne8F(vecY dst, vecY src1, vecY src2) %{ 11827 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11828 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11829 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11830 match(Set dst (VectorMaskCmp src1 src2)); 11831 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed8F" %} 11832 ins_encode %{ 11833 int vector_len = 1; 11834 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11835 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11836 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11837 %} 11838 ins_pipe( pipe_slow ); 11839 %} 11840 11841 instruct vcmpne16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11842 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11843 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11844 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11845 match(Set dst (VectorMaskCmp src1 src2)); 11846 effect(TEMP dst, TEMP scratch); 11847 format %{ "vcmpneps k2,$src1,$src2\n\t" 11848 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed16F" %} 11849 ins_encode %{ 11850 int vector_len = 2; 11851 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11852 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11853 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11854 KRegister mask = k0; // The comparison itself is not being masked. 11855 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11856 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11857 %} 11858 ins_pipe( pipe_slow ); 11859 %} 11860 11861 instruct vcmpeq1D(vecD dst, vecD src1, vecD src2) %{ 11862 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11863 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11864 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11865 match(Set dst (VectorMaskCmp src1 src2)); 11866 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed1D" %} 11867 ins_encode %{ 11868 int vector_len = 0; 11869 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11870 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11871 %} 11872 ins_pipe( pipe_slow ); 11873 %} 11874 11875 instruct vcmpeq2D(vecX dst, vecX src1, vecX src2) %{ 11876 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11877 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11878 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11879 match(Set dst (VectorMaskCmp src1 src2)); 11880 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed2D" %} 11881 ins_encode %{ 11882 int vector_len = 0; 11883 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11884 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11885 %} 11886 ins_pipe( pipe_slow ); 11887 %} 11888 11889 instruct vcmpeq4D(vecY dst, vecY src1, vecY src2) %{ 11890 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11891 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11892 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11893 match(Set dst (VectorMaskCmp src1 src2)); 11894 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed4D" %} 11895 ins_encode %{ 11896 int vector_len = 1; 11897 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11898 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11899 %} 11900 ins_pipe( pipe_slow ); 11901 %} 11902 11903 instruct vcmpeq8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11904 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 11905 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11906 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11907 match(Set dst (VectorMaskCmp src1 src2)); 11908 effect(TEMP dst, TEMP scratch); 11909 format %{ "vcmpeqpd k2,$src1,$src2\n\t" 11910 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8D" %} 11911 ins_encode %{ 11912 int vector_len = 2; 11913 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11914 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11915 KRegister mask = k0; // The comparison itself is not being masked. 11916 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11917 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11918 %} 11919 ins_pipe( pipe_slow ); 11920 %} 11921 11922 instruct vcmplt1D(vecD dst, vecD src1, vecD src2) %{ 11923 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11924 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11925 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11926 match(Set dst (VectorMaskCmp src1 src2)); 11927 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed1D" %} 11928 ins_encode %{ 11929 int vector_len = 0; 11930 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11931 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11932 %} 11933 ins_pipe( pipe_slow ); 11934 %} 11935 11936 instruct vcmplt2D(vecX dst, vecX src1, vecX src2) %{ 11937 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11938 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11939 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11940 match(Set dst (VectorMaskCmp src1 src2)); 11941 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed2D" %} 11942 ins_encode %{ 11943 int vector_len = 0; 11944 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11945 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11946 %} 11947 ins_pipe( pipe_slow ); 11948 %} 11949 11950 instruct vcmplt4D(vecY dst, vecY src1, vecY src2) %{ 11951 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11952 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11953 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11954 match(Set dst (VectorMaskCmp src1 src2)); 11955 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed4D" %} 11956 ins_encode %{ 11957 int vector_len = 1; 11958 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11959 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11960 %} 11961 ins_pipe( pipe_slow ); 11962 %} 11963 11964 instruct vcmplt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11965 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 11966 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11967 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11968 match(Set dst (VectorMaskCmp src1 src2)); 11969 effect(TEMP dst, TEMP scratch); 11970 format %{ "vcmpltpd k2,$src1,$src2\n\t" 11971 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed8D" %} 11972 ins_encode %{ 11973 int vector_len = 2; 11974 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11975 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11976 KRegister mask = k0; // The comparison itself is not being masked. 11977 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11978 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11979 %} 11980 ins_pipe( pipe_slow ); 11981 %} 11982 11983 instruct vcmpgt1D(vecD dst, vecD src1, vecD src2) %{ 11984 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11985 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11986 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11987 match(Set dst (VectorMaskCmp src1 src2)); 11988 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed1D" %} 11989 ins_encode %{ 11990 int vector_len = 0; 11991 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11992 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11993 %} 11994 ins_pipe( pipe_slow ); 11995 %} 11996 11997 instruct vcmpgt2D(vecX dst, vecX src1, vecX src2) %{ 11998 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11999 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12000 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12001 match(Set dst (VectorMaskCmp src1 src2)); 12002 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed2D" %} 12003 ins_encode %{ 12004 int vector_len = 0; 12005 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 12006 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12007 %} 12008 ins_pipe( pipe_slow ); 12009 %} 12010 12011 instruct vcmpgt4D(vecY dst, vecY src1, vecY src2) %{ 12012 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12013 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12014 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12015 match(Set dst (VectorMaskCmp src1 src2)); 12016 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed4D" %} 12017 ins_encode %{ 12018 int vector_len = 1; 12019 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 12020 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12021 %} 12022 ins_pipe( pipe_slow ); 12023 %} 12024 12025 instruct vcmpgt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12026 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 12027 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12028 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12029 match(Set dst (VectorMaskCmp src1 src2)); 12030 effect(TEMP dst, TEMP scratch); 12031 format %{ "vcmpgtpd k2,$src1,$src2\n\t" 12032 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8D" %} 12033 ins_encode %{ 12034 int vector_len = 2; 12035 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 12036 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12037 KRegister mask = k0; // The comparison itself is not being masked. 12038 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12039 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12040 %} 12041 ins_pipe( pipe_slow ); 12042 %} 12043 12044 instruct vcmpge1D(vecD dst, vecD src1, vecD src2) %{ 12045 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 12046 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12047 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12048 match(Set dst (VectorMaskCmp src1 src2)); 12049 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed1D" %} 12050 ins_encode %{ 12051 int vector_len = 0; 12052 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 12053 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12054 %} 12055 ins_pipe( pipe_slow ); 12056 %} 12057 12058 instruct vcmpge2D(vecX dst, vecX src1, vecX src2) %{ 12059 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12060 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12061 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12062 match(Set dst (VectorMaskCmp src1 src2)); 12063 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed2D" %} 12064 ins_encode %{ 12065 int vector_len = 0; 12066 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 12067 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12068 %} 12069 ins_pipe( pipe_slow ); 12070 %} 12071 12072 instruct vcmpge4D(vecY dst, vecY src1, vecY src2) %{ 12073 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12074 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12075 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12076 match(Set dst (VectorMaskCmp src1 src2)); 12077 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed4D" %} 12078 ins_encode %{ 12079 int vector_len = 1; 12080 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 12081 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12082 %} 12083 ins_pipe( pipe_slow ); 12084 %} 12085 12086 instruct vcmpge8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12087 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 12088 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12089 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12090 match(Set dst (VectorMaskCmp src1 src2)); 12091 effect(TEMP dst, TEMP scratch); 12092 format %{ "vcmpgepd k2,$src1,$src2\n\t" 12093 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8D" %} 12094 ins_encode %{ 12095 int vector_len = 2; 12096 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 12097 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12098 KRegister mask = k0; // The comparison itself is not being masked. 12099 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12100 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12101 %} 12102 ins_pipe( pipe_slow ); 12103 %} 12104 12105 instruct vcmple1D(vecD dst, vecD src1, vecD src2) %{ 12106 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 12107 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12108 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12109 match(Set dst (VectorMaskCmp src1 src2)); 12110 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed1D" %} 12111 ins_encode %{ 12112 int vector_len = 0; 12113 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 12114 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12115 %} 12116 ins_pipe( pipe_slow ); 12117 %} 12118 12119 instruct vcmple2D(vecX dst, vecX src1, vecX src2) %{ 12120 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12121 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12122 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12123 match(Set dst (VectorMaskCmp src1 src2)); 12124 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed2D" %} 12125 ins_encode %{ 12126 int vector_len = 0; 12127 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 12128 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12129 %} 12130 ins_pipe( pipe_slow ); 12131 %} 12132 12133 instruct vcmple4D(vecY dst, vecY src1, vecY src2) %{ 12134 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12135 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12136 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12137 match(Set dst (VectorMaskCmp src1 src2)); 12138 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed4D" %} 12139 ins_encode %{ 12140 int vector_len = 1; 12141 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 12142 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12143 %} 12144 ins_pipe( pipe_slow ); 12145 %} 12146 12147 instruct vcmple8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12148 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 12149 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12150 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12151 match(Set dst (VectorMaskCmp src1 src2)); 12152 effect(TEMP dst, TEMP scratch); 12153 format %{ "vcmplepd k2,$src1,$src2\n\t" 12154 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8D" %} 12155 ins_encode %{ 12156 int vector_len = 2; 12157 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 12158 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12159 KRegister mask = k0; // The comparison itself is not being masked. 12160 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12161 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12162 %} 12163 ins_pipe( pipe_slow ); 12164 %} 12165 12166 instruct vcmpne1D(vecD dst, vecD src1, vecD src2) %{ 12167 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 12168 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12169 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12170 match(Set dst (VectorMaskCmp src1 src2)); 12171 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed1D" %} 12172 ins_encode %{ 12173 int vector_len = 0; 12174 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 12175 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 12176 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12177 %} 12178 ins_pipe( pipe_slow ); 12179 %} 12180 12181 instruct vcmpne2D(vecX dst, vecX src1, vecX src2) %{ 12182 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12183 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12184 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12185 match(Set dst (VectorMaskCmp src1 src2)); 12186 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed2D" %} 12187 ins_encode %{ 12188 int vector_len = 0; 12189 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 12190 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 12191 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12192 %} 12193 ins_pipe( pipe_slow ); 12194 %} 12195 12196 instruct vcmpne4D(vecY dst, vecY src1, vecY src2) %{ 12197 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12198 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12199 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12200 match(Set dst (VectorMaskCmp src1 src2)); 12201 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed4D" %} 12202 ins_encode %{ 12203 int vector_len = 1; 12204 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 12205 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 12206 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12207 %} 12208 ins_pipe( pipe_slow ); 12209 %} 12210 12211 instruct vcmpne8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12212 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 12213 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12214 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 12215 match(Set dst (VectorMaskCmp src1 src2)); 12216 effect(TEMP dst, TEMP scratch); 12217 format %{ "vcmpnepd k2,$src1,$src2\n\t" 12218 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed8D" %} 12219 ins_encode %{ 12220 int vector_len = 2; 12221 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 12222 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 12223 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12224 KRegister mask = k0; // The comparison itself is not being masked. 12225 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12226 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12227 %} 12228 ins_pipe( pipe_slow ); 12229 %} 12230 12231 instruct vcmpeq2I(vecD dst, vecD src1, vecD src2) %{ 12232 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12233 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12234 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12235 match(Set dst (VectorMaskCmp src1 src2)); 12236 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed2I" %} 12237 ins_encode %{ 12238 int vector_len = 0; 12239 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12240 %} 12241 ins_pipe( pipe_slow ); 12242 %} 12243 12244 instruct vcmpeq4I(vecX dst, vecX src1, vecX src2) %{ 12245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12246 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12247 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12248 match(Set dst (VectorMaskCmp src1 src2)); 12249 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed4I" %} 12250 ins_encode %{ 12251 int vector_len = 0; 12252 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12253 %} 12254 ins_pipe( pipe_slow ); 12255 %} 12256 12257 instruct vcmpeq8I(vecY dst, vecY src1, vecY src2) %{ 12258 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12259 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12260 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12261 match(Set dst (VectorMaskCmp src1 src2)); 12262 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed8I" %} 12263 ins_encode %{ 12264 int vector_len = 1; 12265 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12266 %} 12267 ins_pipe( pipe_slow ); 12268 %} 12269 12270 instruct vcmpeq16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12271 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12272 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12273 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12274 match(Set dst (VectorMaskCmp src1 src2)); 12275 effect(TEMP dst, TEMP scratch); 12276 format %{ "vpcmpeqd k2,$src1,$src2\n\t" 12277 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16I" %} 12278 ins_encode %{ 12279 int vector_len = 2; 12280 Assembler::ComparisonPredicate cmp = Assembler::eq; 12281 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12282 KRegister mask = k0; // The comparison itself is not being masked. 12283 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12284 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12285 %} 12286 ins_pipe( pipe_slow ); 12287 %} 12288 12289 instruct vcmplt2I(vecD dst, vecD src1, vecD src2) %{ 12290 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12291 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12292 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12293 match(Set dst (VectorMaskCmp src1 src2)); 12294 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed2I" %} 12295 ins_encode %{ 12296 int vector_len = 0; 12297 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12298 %} 12299 ins_pipe( pipe_slow ); 12300 %} 12301 12302 instruct vcmplt4I(vecX dst, vecX src1, vecX src2) %{ 12303 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12304 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12305 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12306 match(Set dst (VectorMaskCmp src1 src2)); 12307 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed4I" %} 12308 ins_encode %{ 12309 int vector_len = 0; 12310 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12311 %} 12312 ins_pipe( pipe_slow ); 12313 %} 12314 12315 instruct vcmplt8I(vecY dst, vecY src1, vecY src2) %{ 12316 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12317 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12318 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12319 match(Set dst (VectorMaskCmp src1 src2)); 12320 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed8I" %} 12321 ins_encode %{ 12322 int vector_len = 1; 12323 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12324 %} 12325 ins_pipe( pipe_slow ); 12326 %} 12327 12328 instruct vcmplt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12329 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12330 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12331 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12332 match(Set dst (VectorMaskCmp src1 src2)); 12333 effect(TEMP dst, TEMP scratch); 12334 format %{ "vpcmpnled k2,$src1,$src2\n\t" 12335 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 12336 ins_encode %{ 12337 int vector_len = 2; 12338 Assembler::ComparisonPredicate cmp = Assembler::lt; 12339 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12340 KRegister mask = k0; // The comparison itself is not being masked. 12341 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12342 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12343 %} 12344 ins_pipe( pipe_slow ); 12345 %} 12346 12347 instruct vcmpgt2I(vecD dst, vecD src1, vecD src2) %{ 12348 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12349 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12350 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12351 match(Set dst (VectorMaskCmp src1 src2)); 12352 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed2I" %} 12353 ins_encode %{ 12354 int vector_len = 0; 12355 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12356 %} 12357 ins_pipe( pipe_slow ); 12358 %} 12359 12360 instruct vcmpgt4I(vecX dst, vecX src1, vecX src2) %{ 12361 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12362 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12363 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12364 match(Set dst (VectorMaskCmp src1 src2)); 12365 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed4I" %} 12366 ins_encode %{ 12367 int vector_len = 0; 12368 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12369 %} 12370 ins_pipe( pipe_slow ); 12371 %} 12372 12373 instruct vcmpgt8I(vecY dst, vecY src1, vecY src2) %{ 12374 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12375 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12376 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12377 match(Set dst (VectorMaskCmp src1 src2)); 12378 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed8I" %} 12379 ins_encode %{ 12380 int vector_len = 1; 12381 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12382 %} 12383 ins_pipe( pipe_slow ); 12384 %} 12385 12386 instruct vcmpgt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12387 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12388 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12389 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12390 match(Set dst (VectorMaskCmp src1 src2)); 12391 effect(TEMP dst, TEMP scratch); 12392 format %{ "vpcmpnled k2,$src1,$src2\n\t" 12393 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 12394 ins_encode %{ 12395 int vector_len = 2; 12396 Assembler::ComparisonPredicate cmp = Assembler::nle; 12397 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12398 KRegister mask = k0; // The comparison itself is not being masked. 12399 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12400 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12401 %} 12402 ins_pipe( pipe_slow ); 12403 %} 12404 12405 instruct vcmpge2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12406 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12407 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12408 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12409 match(Set dst (VectorMaskCmp src1 src2)); 12410 effect(TEMP scratch); 12411 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 12412 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2I" %} 12413 ins_encode %{ 12414 int vector_len = 0; 12415 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12416 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12417 %} 12418 ins_pipe( pipe_slow ); 12419 %} 12420 12421 instruct vcmpge4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12422 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12423 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12424 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12425 match(Set dst (VectorMaskCmp src1 src2)); 12426 effect(TEMP scratch); 12427 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 12428 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4I" %} 12429 ins_encode %{ 12430 int vector_len = 0; 12431 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12432 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12433 %} 12434 ins_pipe( pipe_slow ); 12435 %} 12436 12437 instruct vcmpge8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12438 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12439 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12440 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12441 match(Set dst (VectorMaskCmp src1 src2)); 12442 effect(TEMP scratch); 12443 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 12444 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8I" %} 12445 ins_encode %{ 12446 int vector_len = 1; 12447 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12448 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12449 %} 12450 ins_pipe( pipe_slow ); 12451 %} 12452 12453 instruct vcmpge16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12454 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12455 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12456 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12457 match(Set dst (VectorMaskCmp src1 src2)); 12458 effect(TEMP dst, TEMP scratch); 12459 format %{ "vpcmpnltd k2,$src1,$src2\n\t" 12460 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16I" %} 12461 ins_encode %{ 12462 int vector_len = 2; 12463 Assembler::ComparisonPredicate cmp = Assembler::nlt; 12464 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12465 KRegister mask = k0; // The comparison itself is not being masked. 12466 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12467 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12468 %} 12469 ins_pipe( pipe_slow ); 12470 %} 12471 12472 instruct vcmple2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12473 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12474 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12475 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12476 match(Set dst (VectorMaskCmp src1 src2)); 12477 effect(TEMP scratch); 12478 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 12479 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2I" %} 12480 ins_encode %{ 12481 int vector_len = 0; 12482 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12483 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12484 %} 12485 ins_pipe( pipe_slow ); 12486 %} 12487 12488 instruct vcmple4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12489 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12490 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12491 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12492 match(Set dst (VectorMaskCmp src1 src2)); 12493 effect(TEMP scratch); 12494 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 12495 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4I" %} 12496 ins_encode %{ 12497 int vector_len = 0; 12498 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12499 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12500 %} 12501 ins_pipe( pipe_slow ); 12502 %} 12503 12504 instruct vcmple8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12505 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12506 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12507 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12508 match(Set dst (VectorMaskCmp src1 src2)); 12509 effect(TEMP scratch); 12510 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 12511 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8I" %} 12512 ins_encode %{ 12513 int vector_len = 1; 12514 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12515 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12516 %} 12517 ins_pipe( pipe_slow ); 12518 %} 12519 12520 instruct vcmple16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12521 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12522 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12523 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12524 match(Set dst (VectorMaskCmp src1 src2)); 12525 effect(TEMP dst, TEMP scratch); 12526 format %{ "vpcmpled k2,$src1,$src2\n\t" 12527 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16I" %} 12528 ins_encode %{ 12529 int vector_len = 2; 12530 Assembler::ComparisonPredicate cmp = Assembler::le; 12531 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12532 KRegister mask = k0; // The comparison itself is not being masked. 12533 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12534 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12535 %} 12536 ins_pipe( pipe_slow ); 12537 %} 12538 12539 instruct vcmpne2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12540 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12541 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12542 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12543 match(Set dst (VectorMaskCmp src1 src2)); 12544 effect(TEMP scratch); 12545 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 12546 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2I" %} 12547 ins_encode %{ 12548 int vector_len = 0; 12549 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12550 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12551 %} 12552 ins_pipe( pipe_slow ); 12553 %} 12554 12555 instruct vcmpne4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12556 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12557 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12558 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12559 match(Set dst (VectorMaskCmp src1 src2)); 12560 effect(TEMP scratch); 12561 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 12562 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4I" %} 12563 ins_encode %{ 12564 int vector_len = 0; 12565 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12566 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12567 %} 12568 ins_pipe( pipe_slow ); 12569 %} 12570 12571 instruct vcmpne8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12572 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12573 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12574 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12575 match(Set dst (VectorMaskCmp src1 src2)); 12576 effect(TEMP scratch); 12577 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 12578 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8I" %} 12579 ins_encode %{ 12580 int vector_len = 1; 12581 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12582 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12583 %} 12584 ins_pipe( pipe_slow ); 12585 %} 12586 12587 instruct vcmpne16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12588 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12589 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12590 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12591 match(Set dst (VectorMaskCmp src1 src2)); 12592 effect(TEMP dst, TEMP scratch); 12593 format %{ "vpcmpneqd k2,$src1,$src2\n\t" 12594 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed16I" %} 12595 ins_encode %{ 12596 int vector_len = 2; 12597 Assembler::ComparisonPredicate cmp = Assembler::neq; 12598 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12599 KRegister mask = k0; // The comparison itself is not being masked. 12600 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12601 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12602 %} 12603 ins_pipe( pipe_slow ); 12604 %} 12605 12606 instruct vcmpeq8B(vecD dst, vecD src1, vecD src2) %{ 12607 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12608 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12609 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12610 match(Set dst (VectorMaskCmp src1 src2)); 12611 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed8B" %} 12612 ins_encode %{ 12613 int vector_len = 0; 12614 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12615 %} 12616 ins_pipe( pipe_slow ); 12617 %} 12618 12619 instruct vcmpeq16B(vecX dst, vecX src1, vecX src2) %{ 12620 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12621 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12622 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12623 match(Set dst (VectorMaskCmp src1 src2)); 12624 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed16B" %} 12625 ins_encode %{ 12626 int vector_len = 0; 12627 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12628 %} 12629 ins_pipe( pipe_slow ); 12630 %} 12631 12632 instruct vcmpeq32B(vecY dst, vecY src1, vecY src2) %{ 12633 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12634 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12635 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12636 match(Set dst (VectorMaskCmp src1 src2)); 12637 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed32B" %} 12638 ins_encode %{ 12639 int vector_len = 1; 12640 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12641 %} 12642 ins_pipe( pipe_slow ); 12643 %} 12644 12645 instruct vcmpeq64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12646 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12647 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12648 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12649 match(Set dst (VectorMaskCmp src1 src2)); 12650 effect(TEMP dst, TEMP scratch); 12651 format %{ "vpcmpeqb k2,$src1,$src2\n\t" 12652 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed64B" %} 12653 ins_encode %{ 12654 int vector_len = 2; 12655 Assembler::ComparisonPredicate cmp = Assembler::eq; 12656 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12657 KRegister mask = k0; // The comparison itself is not being masked. 12658 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12659 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12660 %} 12661 ins_pipe( pipe_slow ); 12662 %} 12663 12664 instruct vcmplt8B(vecD dst, vecD src1, vecD src2) %{ 12665 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12666 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12667 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12668 match(Set dst (VectorMaskCmp src1 src2)); 12669 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed8B" %} 12670 ins_encode %{ 12671 int vector_len = 0; 12672 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12673 %} 12674 ins_pipe( pipe_slow ); 12675 %} 12676 12677 instruct vcmplt16B(vecX dst, vecX src1, vecX src2) %{ 12678 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12679 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12680 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12681 match(Set dst (VectorMaskCmp src1 src2)); 12682 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed16B" %} 12683 ins_encode %{ 12684 int vector_len = 0; 12685 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12686 %} 12687 ins_pipe( pipe_slow ); 12688 %} 12689 12690 instruct vcmplt32B(vecY dst, vecY src1, vecY src2) %{ 12691 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12692 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12693 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12694 match(Set dst (VectorMaskCmp src1 src2)); 12695 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed32B" %} 12696 ins_encode %{ 12697 int vector_len = 1; 12698 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12699 %} 12700 ins_pipe( pipe_slow ); 12701 %} 12702 12703 instruct vcmplt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12704 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12705 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12706 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12707 match(Set dst (VectorMaskCmp src1 src2)); 12708 effect(TEMP dst, TEMP scratch); 12709 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 12710 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 12711 ins_encode %{ 12712 int vector_len = 2; 12713 Assembler::ComparisonPredicate cmp = Assembler::lt; 12714 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12715 KRegister mask = k0; // The comparison itself is not being masked. 12716 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12717 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12718 %} 12719 ins_pipe( pipe_slow ); 12720 %} 12721 12722 instruct vcmpgt8B(vecD dst, vecD src1, vecD src2) %{ 12723 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12724 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12725 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12726 match(Set dst (VectorMaskCmp src1 src2)); 12727 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed8B" %} 12728 ins_encode %{ 12729 int vector_len = 0; 12730 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12731 %} 12732 ins_pipe( pipe_slow ); 12733 %} 12734 12735 instruct vcmpgt16B(vecX dst, vecX src1, vecX src2) %{ 12736 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12737 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12738 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12739 match(Set dst (VectorMaskCmp src1 src2)); 12740 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed16B" %} 12741 ins_encode %{ 12742 int vector_len = 0; 12743 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12744 %} 12745 ins_pipe( pipe_slow ); 12746 %} 12747 12748 instruct vcmpgt32B(vecY dst, vecY src1, vecY src2) %{ 12749 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12750 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12751 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12752 match(Set dst (VectorMaskCmp src1 src2)); 12753 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed32B" %} 12754 ins_encode %{ 12755 int vector_len = 1; 12756 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12757 %} 12758 ins_pipe( pipe_slow ); 12759 %} 12760 12761 instruct vcmpgt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12762 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12763 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12764 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12765 match(Set dst (VectorMaskCmp src1 src2)); 12766 effect(TEMP dst, TEMP scratch); 12767 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 12768 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 12769 ins_encode %{ 12770 int vector_len = 2; 12771 Assembler::ComparisonPredicate cmp = Assembler::nle; 12772 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12773 KRegister mask = k0; // The comparison itself is not being masked. 12774 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12775 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12776 %} 12777 ins_pipe( pipe_slow ); 12778 %} 12779 12780 instruct vcmpge8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12781 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12782 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12783 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12784 match(Set dst (VectorMaskCmp src1 src2)); 12785 effect(TEMP scratch); 12786 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 12787 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8B" %} 12788 ins_encode %{ 12789 int vector_len = 0; 12790 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12791 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12792 %} 12793 ins_pipe( pipe_slow ); 12794 %} 12795 12796 instruct vcmpge16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12797 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12798 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12799 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12800 match(Set dst (VectorMaskCmp src1 src2)); 12801 effect(TEMP scratch); 12802 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 12803 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16B" %} 12804 ins_encode %{ 12805 int vector_len = 0; 12806 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12807 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12808 %} 12809 ins_pipe( pipe_slow ); 12810 %} 12811 12812 instruct vcmpge32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12813 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12814 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12815 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12816 match(Set dst (VectorMaskCmp src1 src2)); 12817 effect(TEMP scratch); 12818 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 12819 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed32B" %} 12820 ins_encode %{ 12821 int vector_len = 1; 12822 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12823 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12824 %} 12825 ins_pipe( pipe_slow ); 12826 %} 12827 12828 instruct vcmpge64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12829 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12830 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12831 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12832 match(Set dst (VectorMaskCmp src1 src2)); 12833 effect(TEMP dst, TEMP scratch); 12834 format %{ "vpcmpnltb k2,$src1,$src2\n\t" 12835 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed64B" %} 12836 ins_encode %{ 12837 int vector_len = 2; 12838 Assembler::ComparisonPredicate cmp = Assembler::nlt; 12839 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12840 KRegister mask = k0; // The comparison itself is not being masked. 12841 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12842 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12843 %} 12844 ins_pipe( pipe_slow ); 12845 %} 12846 12847 instruct vcmple8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12848 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12849 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12850 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12851 match(Set dst (VectorMaskCmp src1 src2)); 12852 effect(TEMP scratch); 12853 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 12854 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8B" %} 12855 ins_encode %{ 12856 int vector_len = 0; 12857 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12858 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12859 %} 12860 ins_pipe( pipe_slow ); 12861 %} 12862 12863 instruct vcmple16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12864 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12865 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12866 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12867 match(Set dst (VectorMaskCmp src1 src2)); 12868 effect(TEMP scratch); 12869 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 12870 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16B" %} 12871 ins_encode %{ 12872 int vector_len = 0; 12873 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12874 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12875 %} 12876 ins_pipe( pipe_slow ); 12877 %} 12878 12879 instruct vcmple32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12880 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12881 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12882 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12883 match(Set dst (VectorMaskCmp src1 src2)); 12884 effect(TEMP scratch); 12885 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 12886 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed32B" %} 12887 ins_encode %{ 12888 int vector_len = 1; 12889 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12890 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12891 %} 12892 ins_pipe( pipe_slow ); 12893 %} 12894 12895 instruct vcmple64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12896 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12897 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12898 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12899 match(Set dst (VectorMaskCmp src1 src2)); 12900 effect(TEMP dst, TEMP scratch); 12901 format %{ "vpcmpleb k2,$src1,$src2\n\t" 12902 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed64B" %} 12903 ins_encode %{ 12904 int vector_len = 2; 12905 Assembler::ComparisonPredicate cmp = Assembler::le; 12906 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12907 KRegister mask = k0; // The comparison itself is not being masked. 12908 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12909 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12910 %} 12911 ins_pipe( pipe_slow ); 12912 %} 12913 12914 instruct vcmpne8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12915 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12916 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12917 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12918 match(Set dst (VectorMaskCmp src1 src2)); 12919 effect(TEMP scratch); 12920 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 12921 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8B" %} 12922 ins_encode %{ 12923 int vector_len = 0; 12924 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12925 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12926 %} 12927 ins_pipe( pipe_slow ); 12928 %} 12929 12930 instruct vcmpne16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12931 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12932 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12933 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12934 match(Set dst (VectorMaskCmp src1 src2)); 12935 effect(TEMP scratch); 12936 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 12937 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16B" %} 12938 ins_encode %{ 12939 int vector_len = 0; 12940 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12941 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12942 %} 12943 ins_pipe( pipe_slow ); 12944 %} 12945 12946 instruct vcmpne32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12947 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12948 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12949 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12950 match(Set dst (VectorMaskCmp src1 src2)); 12951 effect(TEMP scratch); 12952 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 12953 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed32B" %} 12954 ins_encode %{ 12955 int vector_len = 1; 12956 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12957 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12958 %} 12959 ins_pipe( pipe_slow ); 12960 %} 12961 12962 instruct vcmpne64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12963 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12964 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12965 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12966 match(Set dst (VectorMaskCmp src1 src2)); 12967 effect(TEMP dst, TEMP scratch); 12968 format %{ "vpcmpneqb k2,$src1,$src2\n\t" 12969 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed64B" %} 12970 ins_encode %{ 12971 int vector_len = 2; 12972 Assembler::ComparisonPredicate cmp = Assembler::neq; 12973 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12974 KRegister mask = k0; // The comparison itself is not being masked. 12975 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12976 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12977 %} 12978 ins_pipe( pipe_slow ); 12979 %} 12980 12981 instruct vcmpeq4S(vecD dst, vecD src1, vecD src2) %{ 12982 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12983 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12984 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12985 match(Set dst (VectorMaskCmp src1 src2)); 12986 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed4S" %} 12987 ins_encode %{ 12988 int vector_len = 0; 12989 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12990 %} 12991 ins_pipe( pipe_slow ); 12992 %} 12993 12994 instruct vcmpeq8S(vecX dst, vecX src1, vecX src2) %{ 12995 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12996 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12997 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12998 match(Set dst (VectorMaskCmp src1 src2)); 12999 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed8S" %} 13000 ins_encode %{ 13001 int vector_len = 0; 13002 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13003 %} 13004 ins_pipe( pipe_slow ); 13005 %} 13006 13007 instruct vcmpeq16S(vecY dst, vecY src1, vecY src2) %{ 13008 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 13009 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13010 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13011 match(Set dst (VectorMaskCmp src1 src2)); 13012 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed16S" %} 13013 ins_encode %{ 13014 int vector_len = 1; 13015 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13016 %} 13017 ins_pipe( pipe_slow ); 13018 %} 13019 13020 instruct vcmpeq32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13021 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 13022 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13023 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13024 match(Set dst (VectorMaskCmp src1 src2)); 13025 effect(TEMP dst, TEMP scratch); 13026 format %{ "vpcmpeqw k2,$src1,$src2\n\t" 13027 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed32S" %} 13028 ins_encode %{ 13029 int vector_len = 2; 13030 Assembler::ComparisonPredicate cmp = Assembler::eq; 13031 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13032 KRegister mask = k0; // The comparison itself is not being masked. 13033 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13034 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13035 %} 13036 ins_pipe( pipe_slow ); 13037 %} 13038 13039 instruct vcmplt4S(vecD dst, vecD src1, vecD src2) %{ 13040 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 13041 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13042 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13043 match(Set dst (VectorMaskCmp src1 src2)); 13044 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed4S" %} 13045 ins_encode %{ 13046 int vector_len = 0; 13047 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13048 %} 13049 ins_pipe( pipe_slow ); 13050 %} 13051 13052 instruct vcmplt8S(vecX dst, vecX src1, vecX src2) %{ 13053 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 13054 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13055 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13056 match(Set dst (VectorMaskCmp src1 src2)); 13057 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed8S" %} 13058 ins_encode %{ 13059 int vector_len = 0; 13060 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13061 %} 13062 ins_pipe( pipe_slow ); 13063 %} 13064 13065 instruct vcmplt16S(vecY dst, vecY src1, vecY src2) %{ 13066 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 13067 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13068 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13069 match(Set dst (VectorMaskCmp src1 src2)); 13070 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed16S" %} 13071 ins_encode %{ 13072 int vector_len = 1; 13073 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13074 %} 13075 ins_pipe( pipe_slow ); 13076 %} 13077 13078 instruct vcmplt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13079 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 13080 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13081 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13082 match(Set dst (VectorMaskCmp src1 src2)); 13083 effect(TEMP dst, TEMP scratch); 13084 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 13085 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 13086 ins_encode %{ 13087 int vector_len = 2; 13088 Assembler::ComparisonPredicate cmp = Assembler::lt; 13089 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13090 KRegister mask = k0; // The comparison itself is not being masked. 13091 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13092 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13093 %} 13094 ins_pipe( pipe_slow ); 13095 %} 13096 13097 instruct vcmpgt4S(vecD dst, vecD src1, vecD src2) %{ 13098 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 13099 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13100 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13101 match(Set dst (VectorMaskCmp src1 src2)); 13102 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed4S" %} 13103 ins_encode %{ 13104 int vector_len = 0; 13105 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13106 %} 13107 ins_pipe( pipe_slow ); 13108 %} 13109 13110 instruct vcmpgt8S(vecX dst, vecX src1, vecX src2) %{ 13111 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 13112 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13113 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13114 match(Set dst (VectorMaskCmp src1 src2)); 13115 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed8S" %} 13116 ins_encode %{ 13117 int vector_len = 0; 13118 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13119 %} 13120 ins_pipe( pipe_slow ); 13121 %} 13122 13123 instruct vcmpgt16S(vecY dst, vecY src1, vecY src2) %{ 13124 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 13125 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13126 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13127 match(Set dst (VectorMaskCmp src1 src2)); 13128 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed16S" %} 13129 ins_encode %{ 13130 int vector_len = 1; 13131 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13132 %} 13133 ins_pipe( pipe_slow ); 13134 %} 13135 13136 instruct vcmpgt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13137 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 13138 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13139 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13140 match(Set dst (VectorMaskCmp src1 src2)); 13141 effect(TEMP dst, TEMP scratch); 13142 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 13143 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 13144 ins_encode %{ 13145 int vector_len = 2; 13146 Assembler::ComparisonPredicate cmp = Assembler::nle; 13147 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13148 KRegister mask = k0; // The comparison itself is not being masked. 13149 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13150 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13151 %} 13152 ins_pipe( pipe_slow ); 13153 %} 13154 13155 instruct vcmpge4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13156 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 13157 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13158 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13159 match(Set dst (VectorMaskCmp src1 src2)); 13160 effect(TEMP scratch); 13161 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 13162 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4S" %} 13163 ins_encode %{ 13164 int vector_len = 0; 13165 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13166 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13167 %} 13168 ins_pipe( pipe_slow ); 13169 %} 13170 13171 instruct vcmpge8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13172 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 13173 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13174 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13175 match(Set dst (VectorMaskCmp src1 src2)); 13176 effect(TEMP scratch); 13177 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 13178 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8S" %} 13179 ins_encode %{ 13180 int vector_len = 0; 13181 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13182 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13183 %} 13184 ins_pipe( pipe_slow ); 13185 %} 13186 13187 instruct vcmpge16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13188 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 13189 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13190 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13191 match(Set dst (VectorMaskCmp src1 src2)); 13192 effect(TEMP scratch); 13193 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 13194 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16S" %} 13195 ins_encode %{ 13196 int vector_len = 1; 13197 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13198 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13199 %} 13200 ins_pipe( pipe_slow ); 13201 %} 13202 13203 instruct vcmpge32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13204 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 13205 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13206 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13207 match(Set dst (VectorMaskCmp src1 src2)); 13208 effect(TEMP dst, TEMP scratch); 13209 format %{ "vpcmpnltw k2,$src1,$src2\n\t" 13210 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed32S" %} 13211 ins_encode %{ 13212 int vector_len = 2; 13213 Assembler::ComparisonPredicate cmp = Assembler::nlt; 13214 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13215 KRegister mask = k0; // The comparison itself is not being masked. 13216 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13217 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13218 %} 13219 ins_pipe( pipe_slow ); 13220 %} 13221 13222 instruct vcmple4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13223 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 13224 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13225 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13226 match(Set dst (VectorMaskCmp src1 src2)); 13227 effect(TEMP scratch); 13228 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 13229 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4S" %} 13230 ins_encode %{ 13231 int vector_len = 0; 13232 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13233 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13234 %} 13235 ins_pipe( pipe_slow ); 13236 %} 13237 13238 instruct vcmple8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13239 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 13240 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13241 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13242 match(Set dst (VectorMaskCmp src1 src2)); 13243 effect(TEMP scratch); 13244 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 13245 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8S" %} 13246 ins_encode %{ 13247 int vector_len = 0; 13248 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13249 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13250 %} 13251 ins_pipe( pipe_slow ); 13252 %} 13253 13254 instruct vcmple16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13255 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 13256 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13257 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13258 match(Set dst (VectorMaskCmp src1 src2)); 13259 effect(TEMP scratch); 13260 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 13261 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16S" %} 13262 ins_encode %{ 13263 int vector_len = 1; 13264 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13265 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13266 %} 13267 ins_pipe( pipe_slow ); 13268 %} 13269 13270 instruct vcmple32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13271 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 13272 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13273 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13274 match(Set dst (VectorMaskCmp src1 src2)); 13275 effect(TEMP dst, TEMP scratch); 13276 format %{ "vpcmplew k2,$src1,$src2\n\t" 13277 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed32S" %} 13278 ins_encode %{ 13279 int vector_len = 2; 13280 Assembler::ComparisonPredicate cmp = Assembler::le; 13281 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13282 KRegister mask = k0; // The comparison itself is not being masked. 13283 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13284 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13285 %} 13286 ins_pipe( pipe_slow ); 13287 %} 13288 13289 instruct vcmpne4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13290 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 13291 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13292 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13293 match(Set dst (VectorMaskCmp src1 src2)); 13294 effect(TEMP scratch); 13295 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 13296 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4S" %} 13297 ins_encode %{ 13298 int vector_len = 0; 13299 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13300 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13301 %} 13302 ins_pipe( pipe_slow ); 13303 %} 13304 13305 instruct vcmpne8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13306 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 13307 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13308 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13309 match(Set dst (VectorMaskCmp src1 src2)); 13310 effect(TEMP scratch); 13311 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 13312 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8S" %} 13313 ins_encode %{ 13314 int vector_len = 0; 13315 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13316 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13317 %} 13318 ins_pipe( pipe_slow ); 13319 %} 13320 13321 instruct vcmpne16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13322 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 13323 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13324 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13325 match(Set dst (VectorMaskCmp src1 src2)); 13326 effect(TEMP scratch); 13327 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 13328 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16S" %} 13329 ins_encode %{ 13330 int vector_len = 1; 13331 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13332 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13333 %} 13334 ins_pipe( pipe_slow ); 13335 %} 13336 13337 instruct vcmpne32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13338 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 13339 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13340 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13341 match(Set dst (VectorMaskCmp src1 src2)); 13342 effect(TEMP dst, TEMP scratch); 13343 format %{ "vpcmpneqw k2,$src1,$src2\n\t" 13344 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed32S" %} 13345 ins_encode %{ 13346 int vector_len = 2; 13347 Assembler::ComparisonPredicate cmp = Assembler::neq; 13348 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13349 KRegister mask = k0; // The comparison itself is not being masked. 13350 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13351 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13352 %} 13353 ins_pipe( pipe_slow ); 13354 %} 13355 13356 instruct vcmpeq1L(vecD dst, vecD src1, vecD src2) %{ 13357 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13358 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13359 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13360 match(Set dst (VectorMaskCmp src1 src2)); 13361 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed1L" %} 13362 ins_encode %{ 13363 int vector_len = 0; 13364 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13365 %} 13366 ins_pipe( pipe_slow ); 13367 %} 13368 13369 instruct vcmpeq2L(vecX dst, vecX src1, vecX src2) %{ 13370 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13371 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13372 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13373 match(Set dst (VectorMaskCmp src1 src2)); 13374 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed2L" %} 13375 ins_encode %{ 13376 int vector_len = 0; 13377 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13378 %} 13379 ins_pipe( pipe_slow ); 13380 %} 13381 13382 instruct vcmpeq4L(vecY dst, vecY src1, vecY src2) %{ 13383 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13384 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13385 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13386 match(Set dst (VectorMaskCmp src1 src2)); 13387 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed4L" %} 13388 ins_encode %{ 13389 int vector_len = 1; 13390 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13391 %} 13392 ins_pipe( pipe_slow ); 13393 %} 13394 13395 instruct vcmpeq8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13396 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13397 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13398 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13399 match(Set dst (VectorMaskCmp src1 src2)); 13400 effect(TEMP dst, TEMP scratch); 13401 format %{ "vpcmpeqq k2,$src1,$src2\n\t" 13402 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8L" %} 13403 ins_encode %{ 13404 int vector_len = 2; 13405 Assembler::ComparisonPredicate cmp = Assembler::eq; 13406 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13407 KRegister mask = k0; // The comparison itself is not being masked. 13408 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13409 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13410 %} 13411 ins_pipe( pipe_slow ); 13412 %} 13413 13414 instruct vcmplt1L(vecD dst, vecD src1, vecD src2) %{ 13415 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13416 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13417 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13418 match(Set dst (VectorMaskCmp src1 src2)); 13419 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed1L" %} 13420 ins_encode %{ 13421 int vector_len = 0; 13422 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13423 %} 13424 ins_pipe( pipe_slow ); 13425 %} 13426 13427 instruct vcmplt2L(vecX dst, vecX src1, vecX src2) %{ 13428 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13429 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13430 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13431 match(Set dst (VectorMaskCmp src1 src2)); 13432 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed2L" %} 13433 ins_encode %{ 13434 int vector_len = 0; 13435 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13436 %} 13437 ins_pipe( pipe_slow ); 13438 %} 13439 13440 instruct vcmplt4L(vecY dst, vecY src1, vecY src2) %{ 13441 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13442 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13443 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13444 match(Set dst (VectorMaskCmp src1 src2)); 13445 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed4L" %} 13446 ins_encode %{ 13447 int vector_len = 1; 13448 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13449 %} 13450 ins_pipe( pipe_slow ); 13451 %} 13452 13453 instruct vcmplt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13454 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13455 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13456 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13457 match(Set dst (VectorMaskCmp src1 src2)); 13458 effect(TEMP dst, TEMP scratch); 13459 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 13460 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 13461 ins_encode %{ 13462 int vector_len = 2; 13463 Assembler::ComparisonPredicate cmp = Assembler::lt; 13464 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13465 KRegister mask = k0; // The comparison itself is not being masked. 13466 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13467 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13468 %} 13469 ins_pipe( pipe_slow ); 13470 %} 13471 13472 instruct vcmpgt1L(vecD dst, vecD src1, vecD src2) %{ 13473 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13474 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13475 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13476 match(Set dst (VectorMaskCmp src1 src2)); 13477 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed1L" %} 13478 ins_encode %{ 13479 int vector_len = 0; 13480 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13481 %} 13482 ins_pipe( pipe_slow ); 13483 %} 13484 13485 instruct vcmpgt2L(vecX dst, vecX src1, vecX src2) %{ 13486 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13487 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13488 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13489 match(Set dst (VectorMaskCmp src1 src2)); 13490 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed2L" %} 13491 ins_encode %{ 13492 int vector_len = 0; 13493 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13494 %} 13495 ins_pipe( pipe_slow ); 13496 %} 13497 13498 instruct vcmpgt4L(vecY dst, vecY src1, vecY src2) %{ 13499 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13500 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13501 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13502 match(Set dst (VectorMaskCmp src1 src2)); 13503 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed4L" %} 13504 ins_encode %{ 13505 int vector_len = 1; 13506 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13507 %} 13508 ins_pipe( pipe_slow ); 13509 %} 13510 13511 instruct vcmpgt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13512 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13513 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13514 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13515 match(Set dst (VectorMaskCmp src1 src2)); 13516 effect(TEMP dst, TEMP scratch); 13517 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 13518 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 13519 ins_encode %{ 13520 int vector_len = 2; 13521 Assembler::ComparisonPredicate cmp = Assembler::nle; 13522 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13523 KRegister mask = k0; // The comparison itself is not being masked. 13524 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13525 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13526 %} 13527 ins_pipe( pipe_slow ); 13528 %} 13529 13530 instruct vcmpge1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13531 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13532 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13533 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13534 match(Set dst (VectorMaskCmp src1 src2)); 13535 effect(TEMP scratch); 13536 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 13537 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed1L" %} 13538 ins_encode %{ 13539 int vector_len = 0; 13540 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13541 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13542 %} 13543 ins_pipe( pipe_slow ); 13544 %} 13545 13546 instruct vcmpge2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13547 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13548 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13549 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13550 match(Set dst (VectorMaskCmp src1 src2)); 13551 effect(TEMP scratch); 13552 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 13553 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2L" %} 13554 ins_encode %{ 13555 int vector_len = 0; 13556 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13557 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13558 %} 13559 ins_pipe( pipe_slow ); 13560 %} 13561 13562 instruct vcmpge4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13563 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13564 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13565 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13566 match(Set dst (VectorMaskCmp src1 src2)); 13567 effect(TEMP scratch); 13568 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 13569 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4L" %} 13570 ins_encode %{ 13571 int vector_len = 1; 13572 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13573 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13574 %} 13575 ins_pipe( pipe_slow ); 13576 %} 13577 13578 instruct vcmpge8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13579 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13580 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13581 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13582 match(Set dst (VectorMaskCmp src1 src2)); 13583 effect(TEMP dst, TEMP scratch); 13584 format %{ "vpcmpnltq k2,$src1,$src2\n\t" 13585 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8L" %} 13586 ins_encode %{ 13587 int vector_len = 2; 13588 Assembler::ComparisonPredicate cmp = Assembler::nlt; 13589 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13590 KRegister mask = k0; // The comparison itself is not being masked. 13591 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13592 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13593 %} 13594 ins_pipe( pipe_slow ); 13595 %} 13596 13597 instruct vcmple1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13598 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13599 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13600 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13601 match(Set dst (VectorMaskCmp src1 src2)); 13602 effect(TEMP scratch); 13603 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 13604 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed1L" %} 13605 ins_encode %{ 13606 int vector_len = 0; 13607 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13608 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13609 %} 13610 ins_pipe( pipe_slow ); 13611 %} 13612 13613 instruct vcmple2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13614 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13615 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13616 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13617 match(Set dst (VectorMaskCmp src1 src2)); 13618 effect(TEMP scratch); 13619 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 13620 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2L" %} 13621 ins_encode %{ 13622 int vector_len = 0; 13623 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13624 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13625 %} 13626 ins_pipe( pipe_slow ); 13627 %} 13628 13629 instruct vcmple4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13630 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13631 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13632 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13633 match(Set dst (VectorMaskCmp src1 src2)); 13634 effect(TEMP scratch); 13635 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 13636 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4L" %} 13637 ins_encode %{ 13638 int vector_len = 1; 13639 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13640 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13641 %} 13642 ins_pipe( pipe_slow ); 13643 %} 13644 13645 instruct vcmple8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13646 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13647 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13648 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13649 match(Set dst (VectorMaskCmp src1 src2)); 13650 effect(TEMP dst, TEMP scratch); 13651 format %{ "vpcmpleq k2,$src1,$src2\n\t" 13652 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8L" %} 13653 ins_encode %{ 13654 int vector_len = 2; 13655 Assembler::ComparisonPredicate cmp = Assembler::le; 13656 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13657 KRegister mask = k0; // The comparison itself is not being masked. 13658 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13659 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13660 %} 13661 ins_pipe( pipe_slow ); 13662 %} 13663 13664 instruct vcmpne1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13665 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13666 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13667 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13668 match(Set dst (VectorMaskCmp src1 src2)); 13669 effect(TEMP scratch); 13670 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 13671 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed1L" %} 13672 ins_encode %{ 13673 int vector_len = 0; 13674 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13675 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13676 %} 13677 ins_pipe( pipe_slow ); 13678 %} 13679 13680 instruct vcmpne2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13681 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13682 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13683 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13684 match(Set dst (VectorMaskCmp src1 src2)); 13685 effect(TEMP scratch); 13686 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 13687 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2L" %} 13688 ins_encode %{ 13689 int vector_len = 0; 13690 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13691 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13692 %} 13693 ins_pipe( pipe_slow ); 13694 %} 13695 13696 instruct vcmpne4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13697 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13698 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13699 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13700 match(Set dst (VectorMaskCmp src1 src2)); 13701 effect(TEMP scratch); 13702 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 13703 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4L" %} 13704 ins_encode %{ 13705 int vector_len = 1; 13706 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13707 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13708 %} 13709 ins_pipe( pipe_slow ); 13710 %} 13711 13712 instruct vcmpne8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13713 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13714 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13715 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13716 match(Set dst (VectorMaskCmp src1 src2)); 13717 effect(TEMP dst, TEMP scratch); 13718 format %{ "vpcmpneqq k2,$src1,$src2\n\t" 13719 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed8L" %} 13720 ins_encode %{ 13721 int vector_len = 2; 13722 Assembler::ComparisonPredicate cmp = Assembler::neq; 13723 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13724 KRegister mask = k0; // The comparison itself is not being masked. 13725 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13726 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13727 %} 13728 ins_pipe( pipe_slow ); 13729 %} 13730 13731 instruct blendvps2F(vecD dst, vecD src, rxmm0 mask) %{ 13732 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13733 match(Set dst (VectorBlend (Binary dst src) mask)); 13734 format %{ "blendvps $dst,$src,$mask\t! packed2F" %} 13735 ins_encode %{ 13736 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 13737 %} 13738 ins_pipe( pipe_slow ); 13739 %} 13740 13741 instruct vblendvps2F(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13742 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13743 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13744 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed2F" %} 13745 ins_encode %{ 13746 int vector_len = 0; 13747 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13748 %} 13749 ins_pipe( pipe_slow ); 13750 %} 13751 13752 instruct blendvps4F(vecX dst, vecX src, rxmm0 mask) %{ 13753 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13754 match(Set dst (VectorBlend (Binary dst src) mask)); 13755 format %{ "blendvps $dst,$src,$mask\t! packed4F" %} 13756 ins_encode %{ 13757 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 13758 %} 13759 ins_pipe( pipe_slow ); 13760 %} 13761 13762 instruct vblendvps4F(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13763 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13764 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13765 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed4F" %} 13766 ins_encode %{ 13767 int vector_len = 0; 13768 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13769 %} 13770 ins_pipe( pipe_slow ); 13771 %} 13772 13773 instruct vblendvps8F(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13774 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13775 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13776 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed8F" %} 13777 ins_encode %{ 13778 int vector_len = 1; 13779 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13780 %} 13781 ins_pipe( pipe_slow ); 13782 %} 13783 13784 instruct vblendvps16F(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13785 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13786 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13787 effect(TEMP scratch); 13788 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 13789 "vblendmps $dst,k2,$src1,$src2\t! blend packed16F " %} 13790 ins_encode %{ 13791 int vector_len = 2; 13792 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13793 __ evpcmpeqd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13794 __ evblendmps($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13795 %} 13796 ins_pipe( pipe_slow ); 13797 %} 13798 13799 instruct vblendvpd8D(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13800 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 13801 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13802 effect(TEMP scratch); 13803 format %{ "evpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 13804 "vblendmpd $dst,k2,$src1,$src2\t! blend packed16F " %} 13805 ins_encode %{ 13806 int vector_len = 2; 13807 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13808 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13809 __ evblendmpd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13810 %} 13811 ins_pipe( pipe_slow ); 13812 %} 13813 13814 instruct vpblendmb64B(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13815 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE && VM_Version::supports_avx512bw()); 13816 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13817 effect(TEMP scratch); 13818 format %{ "vpcmpeqb k2,$mask,0xFFFFFFFF\n\t" 13819 "vpblendmb $dst,k2,$src1,$src2\t! blend packed64B " %} 13820 ins_encode %{ 13821 int vector_len = 2; 13822 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13823 __ evpcmpb(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13824 __ evpblendmb($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13825 %} 13826 ins_pipe( pipe_slow ); 13827 %} 13828 13829 instruct vpblendmw32S(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13830 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT && VM_Version::supports_avx512bw()); 13831 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13832 effect(TEMP scratch); 13833 format %{ "vpcmpeqw k2,$mask,0xFFFFFFFF\n\t" 13834 "vpblendmw $dst,k2,$src1,$src2\t! blend packed32S " %} 13835 ins_encode %{ 13836 int vector_len = 2; 13837 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13838 __ evpcmpw(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13839 __ evpblendmw($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13840 %} 13841 ins_pipe( pipe_slow ); 13842 %} 13843 13844 instruct vpblendmd16I(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13845 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13846 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13847 effect(TEMP scratch); 13848 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 13849 "vpblendmd $dst,k2,$src1,$src2\t! blend packed16I " %} 13850 ins_encode %{ 13851 int vector_len = 2; 13852 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13853 __ evpcmpd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13854 __ evpblendmd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13855 %} 13856 ins_pipe( pipe_slow ); 13857 %} 13858 13859 instruct vpblendmq8L(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13860 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13861 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13862 effect(TEMP scratch); 13863 format %{ "vpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 13864 "vpblendmq $dst,k2,$src1,$src2\t! blend packed8L " %} 13865 ins_encode %{ 13866 int vector_len = 2; 13867 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13868 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13869 __ evpblendmq($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13870 %} 13871 ins_pipe( pipe_slow ); 13872 %} 13873 13874 13875 instruct pblendvb2I(vecD dst, vecD src, rxmm0 mask) %{ 13876 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13877 match(Set dst (VectorBlend (Binary dst src) mask)); 13878 format %{ "vpblendvb $dst,$src,$mask\t! blend packed2I" %} 13879 ins_encode %{ 13880 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13881 %} 13882 ins_pipe( pipe_slow ); 13883 %} 13884 13885 instruct vpblendvb2I(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13886 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13887 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13888 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2I" %} 13889 ins_encode %{ 13890 int vector_len = 0; 13891 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13892 %} 13893 ins_pipe( pipe_slow ); 13894 %} 13895 13896 instruct pblendvb4I(vecX dst, vecX src, rxmm0 mask) %{ 13897 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13898 match(Set dst (VectorBlend (Binary dst src) mask)); 13899 format %{ "vpblendvb $dst,$src,$mask\t! blend packed4I" %} 13900 ins_encode %{ 13901 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13902 %} 13903 ins_pipe( pipe_slow ); 13904 %} 13905 13906 instruct vpblendvb4I(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13907 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13908 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13909 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4I" %} 13910 ins_encode %{ 13911 int vector_len = 0; 13912 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13913 %} 13914 ins_pipe( pipe_slow ); 13915 %} 13916 13917 instruct vpblendvb8I(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13918 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13919 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13920 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8I" %} 13921 ins_encode %{ 13922 int vector_len = 1; 13923 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13924 %} 13925 ins_pipe( pipe_slow ); 13926 %} 13927 13928 instruct pblendvb8B(vecD dst, vecD src, rxmm0 mask) %{ 13929 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13930 match(Set dst (VectorBlend (Binary dst src) mask)); 13931 format %{ "pblendvb $dst,$src,$mask\t! blend packed8B" %} 13932 ins_encode %{ 13933 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13934 %} 13935 ins_pipe( pipe_slow ); 13936 %} 13937 13938 instruct vpblendvb8B(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13939 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13940 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13941 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8B" %} 13942 ins_encode %{ 13943 int vector_len = 0; 13944 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13945 %} 13946 ins_pipe( pipe_slow ); 13947 %} 13948 13949 instruct pblendvb16B(vecX dst, vecX src, rxmm0 mask) %{ 13950 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13951 match(Set dst (VectorBlend (Binary dst src) mask)); 13952 format %{ "pblendvb $dst,$src,$mask\t! blend packed16B" %} 13953 ins_encode %{ 13954 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13955 %} 13956 ins_pipe( pipe_slow ); 13957 %} 13958 13959 instruct vpblendvb16B(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13960 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13961 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13962 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16B" %} 13963 ins_encode %{ 13964 int vector_len = 0; 13965 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13966 %} 13967 ins_pipe( pipe_slow ); 13968 %} 13969 13970 instruct vpblendvb32B(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13971 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13972 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13973 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed32B" %} 13974 ins_encode %{ 13975 int vector_len = 1; 13976 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13977 %} 13978 ins_pipe( pipe_slow ); 13979 %} 13980 13981 instruct pblendvb4S(vecD dst, vecD src, rxmm0 mask) %{ 13982 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13983 match(Set dst (VectorBlend (Binary dst src) mask)); 13984 format %{ "pblendvb $dst,$src,$mask\t! blend packed4S" %} 13985 ins_encode %{ 13986 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13987 %} 13988 ins_pipe( pipe_slow ); 13989 %} 13990 13991 instruct vpblendvb4S(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13992 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13993 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13994 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4S" %} 13995 ins_encode %{ 13996 int vector_len = 0; 13997 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13998 %} 13999 ins_pipe( pipe_slow ); 14000 %} 14001 14002 instruct pblendvb8S(vecX dst, vecX src, rxmm0 mask) %{ 14003 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14004 match(Set dst (VectorBlend (Binary dst src) mask)); 14005 format %{ "pblendvb $dst,$src,$mask\t! blend packed8S" %} 14006 ins_encode %{ 14007 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 14008 %} 14009 ins_pipe( pipe_slow ); 14010 %} 14011 14012 instruct vpblendvb8S(vecX dst, vecX src1, vecX src2, vecX mask) %{ 14013 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14014 match(Set dst (VectorBlend (Binary src1 src2) mask)); 14015 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8S" %} 14016 ins_encode %{ 14017 int vector_len = 0; 14018 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 14019 %} 14020 ins_pipe( pipe_slow ); 14021 %} 14022 14023 instruct vpblendvb16S(vecY dst, vecY src1, vecY src2, vecY mask) %{ 14024 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14025 match(Set dst (VectorBlend (Binary src1 src2) mask)); 14026 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16S" %} 14027 ins_encode %{ 14028 int vector_len = 1; 14029 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 14030 %} 14031 ins_pipe( pipe_slow ); 14032 %} 14033 14034 instruct pblendvb1L(vecD dst, vecD src, rxmm0 mask) %{ 14035 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14036 match(Set dst (VectorBlend (Binary dst src) mask)); 14037 format %{ "pblendvb $dst,$src,$mask\t! blend packed1L" %} 14038 ins_encode %{ 14039 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 14040 %} 14041 ins_pipe( pipe_slow ); 14042 %} 14043 14044 instruct vpblendvb1L(vecD dst, vecD src1, vecD src2, vecD mask) %{ 14045 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14046 match(Set dst (VectorBlend (Binary src1 src2) mask)); 14047 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed1L" %} 14048 ins_encode %{ 14049 int vector_len = 0; 14050 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 14051 %} 14052 ins_pipe( pipe_slow ); 14053 %} 14054 14055 instruct pblendvb2L(vecX dst, vecX src, rxmm0 mask) %{ 14056 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14057 match(Set dst (VectorBlend (Binary dst src) mask)); 14058 format %{ "pblendvb $dst,$src,$mask\t! blend packed2L" %} 14059 ins_encode %{ 14060 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 14061 %} 14062 ins_pipe( pipe_slow ); 14063 %} 14064 14065 instruct vpblendvb2L(vecX dst, vecX src1, vecX src2, vecX mask) %{ 14066 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14067 match(Set dst (VectorBlend (Binary src1 src2) mask)); 14068 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2L" %} 14069 ins_encode %{ 14070 int vector_len = 0; 14071 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 14072 %} 14073 ins_pipe( pipe_slow ); 14074 %} 14075 14076 instruct vpblendvb4L(vecY dst, vecY src1, vecY src2, vecY mask) %{ 14077 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14078 match(Set dst (VectorBlend (Binary src1 src2) mask)); 14079 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4L" %} 14080 ins_encode %{ 14081 int vector_len = 1; 14082 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 14083 %} 14084 ins_pipe( pipe_slow ); 14085 %} 14086 14087 instruct blendvpd1D(vecD dst, vecD src, rxmm0 mask) %{ 14088 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14089 match(Set dst (VectorBlend (Binary dst src) mask)); 14090 format %{ "blendvpd $dst,$src,$mask\t! packed1D" %} 14091 ins_encode %{ 14092 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 14093 %} 14094 ins_pipe( pipe_slow ); 14095 %} 14096 14097 instruct vblendvpd1D(vecD dst, vecD src1, vecD src2, vecD mask) %{ 14098 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14099 match(Set dst (VectorBlend (Binary src1 src2) mask)); 14100 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed1D" %} 14101 ins_encode %{ 14102 int vector_len = 0; 14103 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 14104 %} 14105 ins_pipe( pipe_slow ); 14106 %} 14107 14108 instruct blendvpd2D(vecX dst, vecX src, rxmm0 mask) %{ 14109 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14110 match(Set dst (VectorBlend (Binary dst src) mask)); 14111 format %{ "blendvpd $dst,$src,$mask\t! packed2D" %} 14112 ins_encode %{ 14113 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 14114 %} 14115 ins_pipe( pipe_slow ); 14116 %} 14117 14118 instruct vblendvpd2D(vecX dst, vecX src1, vecX src2, vecX mask) %{ 14119 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14120 match(Set dst (VectorBlend (Binary src1 src2) mask)); 14121 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed2D" %} 14122 ins_encode %{ 14123 int vector_len = 0; 14124 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 14125 %} 14126 ins_pipe( pipe_slow ); 14127 %} 14128 14129 instruct vblendvpd4D(vecY dst, vecY src1, vecY src2, vecY mask) %{ 14130 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14131 match(Set dst (VectorBlend (Binary src1 src2) mask)); 14132 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed4D" %} 14133 ins_encode %{ 14134 int vector_len = 1; 14135 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 14136 %} 14137 ins_pipe( pipe_slow ); 14138 %} 14139 14140 // --------------------------------- NEG -------------------------------------- 14141 // a = -a 14142 instruct vneg2I_reg(vecD dst, vecD src) %{ 14143 predicate(UseSSE > 1 && n->as_Vector()->length() == 2); 14144 match(Set dst (NegVI src)); 14145 effect(TEMP dst); 14146 format %{ "pxor $dst,$dst\n\t" 14147 "psubd $dst, $src\t! neg packed2I" %} 14148 ins_cost(150); 14149 ins_encode %{ 14150 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14151 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 14152 %} 14153 ins_pipe( pipe_slow ); 14154 %} 14155 14156 instruct vneg4I_reg(vecX dst, vecX src) %{ 14157 predicate(UseSSE > 1 && n->as_Vector()->length() == 4); 14158 match(Set dst (NegVI src)); 14159 effect(TEMP dst); 14160 format %{ "pxor $dst,$dst\n\t" 14161 "psubd $dst, $src\t! neg packed4I" %} 14162 ins_cost(150); 14163 ins_encode %{ 14164 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14165 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 14166 %} 14167 ins_pipe( pipe_slow ); 14168 %} 14169 14170 instruct vneg8I_reg(vecY dst, vecY src, vecY tmp) %{ 14171 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14172 match(Set dst (NegVI src)); 14173 effect(TEMP tmp); 14174 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 14175 "vpsubd $dst,$tmp,$src\t! neg packed8I" %} 14176 ins_cost(150); 14177 ins_encode %{ 14178 int vector_len = 1; 14179 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 14180 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 14181 %} 14182 ins_pipe( pipe_slow ); 14183 %} 14184 14185 instruct vneg16I_reg(vecZ dst, vecZ src, vecZ tmp) %{ 14186 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14187 match(Set dst (NegVI src)); 14188 effect(TEMP tmp); 14189 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 14190 "vpsubd $dst,$tmp,$src\t! neg packed16I" %} 14191 ins_cost(150); 14192 ins_encode %{ 14193 int vector_len = 2; 14194 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 14195 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 14196 %} 14197 ins_pipe( pipe_slow ); 14198 %} 14199 14200 instruct vneg1D(regD dst) %{ 14201 predicate((UseSSE>=2) && (UseAVX == 0)); 14202 match(Set dst (NegVD dst)); 14203 ins_cost(150); 14204 format %{ "xorpd $dst,[0x8000000000000000] \t# $dst = -$dst neg packed1D" %} 14205 ins_encode %{ 14206 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 14207 %} 14208 ins_pipe(pipe_slow); 14209 %} 14210 14211 instruct vneg1D_reg(vecX dst, vecX src) %{ 14212 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 14213 match(Set dst (NegVD src)); 14214 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed1D" %} 14215 ins_cost(150); 14216 ins_encode %{ 14217 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 14218 ExternalAddress(double_signflip())); 14219 %} 14220 ins_pipe( pipe_slow ); 14221 %} 14222 14223 instruct vneg2D_reg(vecX dst) %{ 14224 predicate((UseSSE>=2)); 14225 match(Set dst (NegVD dst)); 14226 ins_cost(150); 14227 format %{ "xorpd $dst,[0x8000000000000000]\t# $dst = -$dst neg packed2D" %} 14228 ins_encode %{ 14229 __ xorpd($dst$$XMMRegister, ExternalAddress(vector_double_signflip())); 14230 %} 14231 ins_pipe(pipe_slow); 14232 %} 14233 14234 14235 instruct vneg4D_reg(vecY dst, vecY src) %{ 14236 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14237 match(Set dst (NegVD src)); 14238 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed4D" %} 14239 ins_cost(150); 14240 ins_encode %{ 14241 int vector_len = 1; 14242 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 14243 %} 14244 ins_pipe( pipe_slow ); 14245 %} 14246 14247 instruct vneg8D_reg(vecZ dst, vecZ src) %{ 14248 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14249 match(Set dst (NegVD src)); 14250 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed8D" %} 14251 ins_cost(150); 14252 ins_encode %{ 14253 int vector_len = 2; 14254 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 14255 %} 14256 ins_pipe( pipe_slow ); 14257 %} 14258 14259 instruct vneg2F_reg(vecD dst) %{ 14260 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 14261 match(Set dst (NegVF dst)); 14262 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed2F" %} 14263 ins_cost(150); 14264 ins_encode %{ 14265 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 14266 %} 14267 ins_pipe( pipe_slow ); 14268 %} 14269 14270 instruct vneg4F_reg(vecX dst) %{ 14271 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 14272 match(Set dst (NegVF dst)); 14273 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed4F" %} 14274 ins_cost(150); 14275 ins_encode %{ 14276 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 14277 %} 14278 ins_pipe( pipe_slow ); 14279 %} 14280 14281 instruct vneg8F_reg(vecY dst, vecY src) %{ 14282 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14283 match(Set dst (NegVF src)); 14284 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed8F" %} 14285 ins_cost(150); 14286 ins_encode %{ 14287 int vector_len = 1; 14288 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 14289 %} 14290 ins_pipe( pipe_slow ); 14291 %} 14292 14293 instruct vneg16F_reg(vecZ dst, vecZ src) %{ 14294 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14295 match(Set dst (NegVF src)); 14296 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed16F" %} 14297 ins_cost(150); 14298 ins_encode %{ 14299 int vector_len = 2; 14300 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 14301 %} 14302 ins_pipe( pipe_slow ); 14303 %} 14304 14305 // --------------------------------- ABS -------------------------------------- 14306 // a = |a| 14307 instruct vabs2I_reg(vecD dst, vecD src) %{ 14308 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 14309 match(Set dst (AbsVI src)); 14310 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 14311 ins_cost(150); 14312 ins_encode %{ 14313 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 14314 %} 14315 ins_pipe( pipe_slow ); 14316 %} 14317 14318 instruct vabs4I_reg(vecX dst, vecX src) %{ 14319 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 14320 match(Set dst (AbsVI src)); 14321 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 14322 ins_cost(150); 14323 ins_encode %{ 14324 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 14325 %} 14326 ins_pipe( pipe_slow ); 14327 %} 14328 14329 instruct vabs8I_reg(vecY dst, vecY src) %{ 14330 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14331 match(Set dst (AbsVI src)); 14332 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 14333 ins_cost(150); 14334 ins_encode %{ 14335 int vector_len = 1; 14336 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14337 %} 14338 ins_pipe( pipe_slow ); 14339 %} 14340 14341 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 14342 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14343 match(Set dst (AbsVI src)); 14344 format %{ "evpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 14345 ins_cost(150); 14346 ins_encode %{ 14347 int vector_len = 2; 14348 __ evpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14349 %} 14350 ins_pipe( pipe_slow ); 14351 %} 14352 14353 instruct vabs1D_reg(vecD dst) %{ 14354 predicate(UseSSE > 0 && n->as_Vector()->length() == 1); 14355 match(Set dst (AbsVD dst)); 14356 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed1D" %} 14357 ins_cost(150); 14358 ins_encode %{ 14359 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 14360 %} 14361 ins_pipe( pipe_slow ); 14362 %} 14363 14364 instruct vabs2D_reg(vecX dst) %{ 14365 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 14366 match(Set dst (AbsVD dst)); 14367 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed2D" %} 14368 ins_cost(150); 14369 ins_encode %{ 14370 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 14371 %} 14372 ins_pipe( pipe_slow ); 14373 %} 14374 14375 instruct vabs4D_reg(vecY dst, vecY src) %{ 14376 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14377 match(Set dst (AbsVD src)); 14378 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed4D" %} 14379 ins_cost(150); 14380 ins_encode %{ 14381 int vector_len = 1; 14382 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 14383 %} 14384 ins_pipe( pipe_slow ); 14385 %} 14386 14387 instruct vabs8D_reg(vecZ dst, vecZ src) %{ 14388 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14389 match(Set dst (AbsVD src)); 14390 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed8D" %} 14391 ins_cost(150); 14392 ins_encode %{ 14393 int vector_len = 2; 14394 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 14395 %} 14396 ins_pipe( pipe_slow ); 14397 %} 14398 14399 instruct vabs2F_reg(vecD dst) %{ 14400 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 14401 match(Set dst (AbsVF dst)); 14402 format %{ "andps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed2F" %} 14403 ins_cost(150); 14404 ins_encode %{ 14405 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 14406 %} 14407 ins_pipe( pipe_slow ); 14408 %} 14409 14410 instruct vabs4F_reg(vecX dst) %{ 14411 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 14412 match(Set dst (AbsVF dst)); 14413 format %{ "vandps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed4F" %} 14414 ins_cost(150); 14415 ins_encode %{ 14416 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 14417 %} 14418 ins_pipe( pipe_slow ); 14419 %} 14420 14421 instruct vabs8F_reg(vecY dst, vecY src) %{ 14422 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14423 match(Set dst (AbsVF src)); 14424 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed8F" %} 14425 ins_cost(150); 14426 ins_encode %{ 14427 int vector_len = 1; 14428 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 14429 %} 14430 ins_pipe( pipe_slow ); 14431 %} 14432 14433 instruct vabs16F_reg(vecZ dst, vecZ src) %{ 14434 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14435 match(Set dst (AbsVF src)); 14436 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed16F" %} 14437 ins_cost(150); 14438 ins_encode %{ 14439 int vector_len = 2; 14440 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 14441 %} 14442 ins_pipe( pipe_slow ); 14443 %} 14444 14445 //------------------------------------- NOT -------------------------------------------- 14446 instruct vnot4B(vecS dst, vecS src) %{ 14447 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 4); 14448 match(Set dst (NotV src)); 14449 effect(TEMP dst); 14450 format %{ "pxor $dst,$src\t! not vectors (4 bytes)" %} 14451 ins_encode %{ 14452 __ movdl($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 14453 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 14454 %} 14455 ins_pipe( pipe_slow ); 14456 %} 14457 14458 instruct vnot4B_reg(vecS dst, vecS src, rRegL scratch) %{ 14459 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 14460 match(Set dst (NotV src)); 14461 effect(TEMP scratch); 14462 format %{ "vpxor $dst,$src\t! not vectors (4 bytes)" %} 14463 ins_encode %{ 14464 int vector_len = 0; 14465 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14466 %} 14467 ins_pipe( pipe_slow ); 14468 %} 14469 14470 instruct vnot8B(vecD dst, vecD src) %{ 14471 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 8); 14472 match(Set dst (NotV src)); 14473 effect(TEMP dst); 14474 format %{ "pxor $dst,$src\t! not vectors (8 bytes)" %} 14475 ins_encode %{ 14476 __ movq($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 14477 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 14478 %} 14479 ins_pipe( pipe_slow ); 14480 %} 14481 14482 instruct vnot8B_reg(vecD dst, vecD src, rRegL scratch) %{ 14483 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 14484 match(Set dst (NotV src)); 14485 effect(TEMP scratch); 14486 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (8 bytes)" %} 14487 ins_encode %{ 14488 int vector_len = 0; 14489 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14490 %} 14491 ins_pipe( pipe_slow ); 14492 %} 14493 14494 instruct vnot16B(vecX dst, vecX src) %{ 14495 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 16); 14496 match(Set dst (NotV src)); 14497 effect(TEMP dst); 14498 format %{ "pxor $dst,$src\t! not vectors (16 bytes)" %} 14499 ins_encode %{ 14500 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 14501 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 14502 %} 14503 ins_pipe( pipe_slow ); 14504 %} 14505 14506 instruct vnot16B_reg(vecX dst, vecX src, rRegL scratch) %{ 14507 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 14508 match(Set dst (NotV src)); 14509 effect(TEMP scratch); 14510 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (16 bytes)" %} 14511 ins_encode %{ 14512 int vector_len = 0; 14513 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14514 %} 14515 ins_pipe( pipe_slow ); 14516 %} 14517 14518 instruct vnot32B_reg(vecY dst, vecY src, rRegL scratch) %{ 14519 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 14520 match(Set dst (NotV src)); 14521 effect(TEMP scratch); 14522 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (32 bytes)" %} 14523 ins_encode %{ 14524 int vector_len = 1; 14525 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14526 %} 14527 ins_pipe( pipe_slow ); 14528 %} 14529 14530 instruct vnot64B_reg(vecZ dst, vecZ src, rRegL scratch) %{ 14531 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 14532 match(Set dst (NotV src)); 14533 effect(TEMP scratch); 14534 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (64 bytes)" %} 14535 ins_encode %{ 14536 int vector_len = 2; 14537 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14538 %} 14539 ins_pipe( pipe_slow ); 14540 %} 14541 14542 instruct vptest4inae(rRegI dst, vecX src1, vecX src2) %{ 14543 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 14544 match(Set dst (VectorTest src1 src2 )); 14545 format %{ "vptest $src1,$src2\n\t" 14546 "setb $dst\t!" %} 14547 ins_encode %{ 14548 int vector_len = 0; 14549 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14550 __ setb(Assembler::carrySet, $dst$$Register); 14551 __ movzbl($dst$$Register, $dst$$Register); 14552 %} 14553 ins_pipe( pipe_slow ); 14554 %} 14555 14556 instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{ 14557 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 14558 match(Set dst (VectorTest src1 src2 )); 14559 format %{ "vptest $src1,$src2\n\t" 14560 "setb $dst\t!" %} 14561 ins_encode %{ 14562 int vector_len = 0; 14563 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14564 __ setb(Assembler::notZero, $dst$$Register); 14565 __ movzbl($dst$$Register, $dst$$Register); 14566 %} 14567 ins_pipe( pipe_slow ); 14568 %} 14569 14570 instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{ 14571 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 14572 match(Set dst (VectorTest src1 src2 )); 14573 format %{ "vptest $src1,$src2\n\t" 14574 "setb $dst\t!" %} 14575 ins_encode %{ 14576 int vector_len = 1; 14577 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14578 __ setb(Assembler::carrySet, $dst$$Register); 14579 __ movzbl($dst$$Register, $dst$$Register); 14580 %} 14581 ins_pipe( pipe_slow ); 14582 %} 14583 14584 instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{ 14585 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 14586 match(Set dst (VectorTest src1 src2 )); 14587 format %{ "vptest $src1,$src2\n\t" 14588 "setb $dst\t!" %} 14589 ins_encode %{ 14590 int vector_len = 1; 14591 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14592 __ setb(Assembler::notZero, $dst$$Register); 14593 __ movzbl($dst$$Register, $dst$$Register); 14594 %} 14595 ins_pipe( pipe_slow ); 14596 %} 14597 14598 instruct loadmask8b(vecD dst, vecD src) %{ 14599 predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14600 match(Set dst (VectorLoadMask src)); 14601 effect(TEMP dst); 14602 format %{ "pxor $dst,$dst\n\t" 14603 "psubb $dst,$src\t! load mask (8B to 8B)" %} 14604 ins_encode %{ 14605 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14606 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14607 %} 14608 ins_pipe( pipe_slow ); 14609 %} 14610 14611 instruct loadmask16b(vecX dst, vecX src) %{ 14612 predicate(UseSSE >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14613 match(Set dst (VectorLoadMask src)); 14614 effect(TEMP dst); 14615 format %{ "vpxor $dst,$dst\n\t" 14616 "vpsubb $dst,$src\t! load mask (16B to 16B)" %} 14617 ins_encode %{ 14618 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14619 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14620 %} 14621 ins_pipe( pipe_slow ); 14622 %} 14623 14624 instruct loadmask32b(vecY dst, vecY src) %{ 14625 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14626 match(Set dst (VectorLoadMask src)); 14627 effect(TEMP dst); 14628 format %{ "vpxor $dst,$dst\n\t" 14629 "vpsubb $dst,$src\t! load mask (32B to 32B)" %} 14630 ins_encode %{ 14631 int vector_len = 1; 14632 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14633 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 14634 %} 14635 ins_pipe( pipe_slow ); 14636 %} 14637 14638 instruct loadmask64b(vecZ dst, vecZ src) %{ 14639 predicate(UseAVX > 0 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14640 match(Set dst (VectorLoadMask src)); 14641 effect(TEMP dst); 14642 format %{ "vpxor $dst,$dst\n\t" 14643 "vpsubb $dst,$src\t! load mask (64B to 64B)" %} 14644 ins_encode %{ 14645 int vector_len = 2; 14646 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14647 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 14648 %} 14649 ins_pipe( pipe_slow ); 14650 %} 14651 14652 instruct loadmask4s(vecD dst, vecS src) %{ 14653 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14654 match(Set dst (VectorLoadMask src)); 14655 effect(TEMP dst); 14656 format %{ "pxor $dst,$dst\n\t" 14657 "psubb $dst,$src\n\t" 14658 "pmovsxbw $dst\t! load mask (4B to 4S)" %} 14659 ins_encode %{ 14660 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14661 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14662 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 14663 %} 14664 ins_pipe( pipe_slow ); 14665 %} 14666 14667 instruct loadmask8s(vecX dst, vecD src) %{ 14668 predicate(UseSSE >= 4 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14669 match(Set dst (VectorLoadMask src)); 14670 effect(TEMP dst); 14671 format %{ "pxor $dst,$dst\n\t" 14672 "psubb $dst,$src\n\t" 14673 "pmovsxbw $dst\t! load mask (8B to 8S)" %} 14674 ins_encode %{ 14675 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14676 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14677 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 14678 %} 14679 ins_pipe( pipe_slow ); 14680 %} 14681 14682 instruct loadmask16s(vecY dst, vecX src) %{ 14683 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14684 match(Set dst (VectorLoadMask src)); 14685 effect(TEMP dst); 14686 format %{ "vpxor $dst,$dst\n\t" 14687 "vpsubb $dst,$src\n\t" 14688 "vpmovsxbw $dst\t! load mask (16B to 16S)" %} 14689 ins_encode %{ 14690 int vector_len = 1; 14691 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14692 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 14693 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14694 %} 14695 ins_pipe( pipe_slow ); 14696 %} 14697 14698 instruct loadmask32s(vecZ dst, vecY src) %{ 14699 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14700 match(Set dst (VectorLoadMask src)); 14701 effect(TEMP dst); 14702 format %{ "vpxor $dst,$dst\n\t" 14703 "vpsubb $dst,$src\n\t" 14704 "vpmovsxbw $dst\t! load mask (32B to 32S)" %} 14705 ins_encode %{ 14706 int vector_len = 2; 14707 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 1); 14708 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 1); 14709 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14710 %} 14711 ins_pipe( pipe_slow ); 14712 %} 14713 14714 instruct loadmask2i(vecD dst, vecS src) %{ 14715 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14716 match(Set dst (VectorLoadMask src)); 14717 effect(TEMP dst); 14718 format %{ "pxor $dst,$dst\n\t" 14719 "psubb $dst,$src\n\t" 14720 "pmovsxbd $dst\t! load mask (2B to 2I)" %} 14721 ins_encode %{ 14722 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14723 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14724 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 14725 %} 14726 ins_pipe( pipe_slow ); 14727 %} 14728 14729 instruct loadmask4i(vecX dst, vecS src) %{ 14730 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14731 match(Set dst (VectorLoadMask src)); 14732 effect(TEMP dst); 14733 format %{ "pxor $dst,$dst\n\t" 14734 "psubb $dst,$src\n\t" 14735 "pmovsxbd $dst\t! load mask (4B to 4I)" %} 14736 ins_encode %{ 14737 int vector_len = 0; 14738 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14739 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14740 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 14741 %} 14742 ins_pipe( pipe_slow ); 14743 %} 14744 14745 instruct loadmask8i(vecY dst, vecD src) %{ 14746 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14747 match(Set dst (VectorLoadMask src)); 14748 effect(TEMP dst); 14749 format %{ "vpxor $dst,$dst\n\t" 14750 "vpsubb $dst,$src\n\t" 14751 "vpmovsxbd $dst\t! load mask (8B to 8I)" %} 14752 ins_encode %{ 14753 int vector_len = 1; 14754 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14755 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 14756 __ vpmovsxbd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14757 %} 14758 ins_pipe( pipe_slow ); 14759 %} 14760 14761 instruct loadmask16i(vecZ dst, vecX src, vecZ tmp) %{ 14762 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14763 match(Set dst (VectorLoadMask src)); 14764 effect(TEMP dst, TEMP tmp); 14765 format %{ "vpxor $dst,$dst\n\t" 14766 "vpmovzxbd $tmp,$src\n\t" 14767 "vpsubd $dst,$tmp\t! load mask (16B to 16I)" %} 14768 ins_encode %{ 14769 int vector_len = 2; 14770 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14771 __ vpmovzxbd($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 14772 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14773 %} 14774 ins_pipe( pipe_slow ); 14775 %} 14776 14777 instruct loadmask1l(vecD dst, vecS src) %{ 14778 predicate(UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14779 match(Set dst (VectorLoadMask src)); 14780 effect(TEMP dst); 14781 format %{ "pxor $dst,$dst\n\t" 14782 "psubb $dst,$src\n\t" 14783 "pmovsxbq $dst\t! load mask (1B to 1L)" %} 14784 ins_encode %{ 14785 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14786 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14787 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 14788 %} 14789 ins_pipe( pipe_slow ); 14790 %} 14791 14792 instruct loadmask2l(vecX dst, vecS src) %{ 14793 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14794 match(Set dst (VectorLoadMask src)); 14795 effect(TEMP dst); 14796 format %{ "pxor $dst,$dst\n\t" 14797 "psubb $dst,$src\n\t" 14798 "pmovsxbq $dst\t! load mask (2B to 2L)" %} 14799 ins_encode %{ 14800 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14801 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14802 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 14803 %} 14804 ins_pipe( pipe_slow ); 14805 %} 14806 14807 instruct loadmask4l(vecY dst, vecS src) %{ 14808 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14809 match(Set dst (VectorLoadMask src)); 14810 effect(TEMP dst); 14811 format %{ "vpxor $dst,$dst\n\t" 14812 "vpsubb $dst,$src\n\t" 14813 "vpmovsxbq $dst\t! load mask (4B to 4L)" %} 14814 ins_encode %{ 14815 int vector_len = 1; 14816 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14817 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 14818 __ vpmovsxbq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14819 %} 14820 ins_pipe( pipe_slow ); 14821 %} 14822 14823 instruct loadmask8l(vecZ dst, vecD src, vecZ tmp) %{ 14824 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14825 match(Set dst (VectorLoadMask src)); 14826 effect(TEMP dst, TEMP tmp); 14827 format %{ "vpxor $dst,$dst\n\t" 14828 "vpmovzxbq $tmp,$src\n\t" 14829 "vpsubq $dst,$tmp\t! load mask (8B to 8L)" %} 14830 ins_encode %{ 14831 int vector_len = 2; 14832 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14833 __ vpmovzxbq($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 14834 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14835 %} 14836 ins_pipe( pipe_slow ); 14837 %} 14838 14839 instruct storemask8b(vecD dst, vecD src) %{ 14840 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 14841 match(Set dst (VectorStoreMask src)); 14842 format %{ "vpabsb $dst,$src\t! store mask (8B to 8B)" %} 14843 ins_encode %{ 14844 int vector_len = 0; 14845 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14846 %} 14847 ins_pipe( pipe_slow ); 14848 %} 14849 14850 instruct storemask16b(vecX dst, vecX src) %{ 14851 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 14852 match(Set dst (VectorStoreMask src)); 14853 format %{ "vpabsb $dst,$src\t! store mask (16B to 16B)" %} 14854 ins_encode %{ 14855 int vector_len = 0; 14856 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14857 %} 14858 ins_pipe( pipe_slow ); 14859 %} 14860 14861 instruct storemask32b(vecY dst, vecY src) %{ 14862 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 14863 match(Set dst (VectorStoreMask src)); 14864 format %{ "vpabsb $dst,$src\t! store mask (32B to 32B)" %} 14865 ins_encode %{ 14866 int vector_len = 1; 14867 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14868 %} 14869 ins_pipe( pipe_slow ); 14870 %} 14871 14872 instruct storemask64b(vecZ dst, vecZ src, rRegL scratch) %{ 14873 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 14874 match(Set dst (VectorStoreMask src)); 14875 effect(TEMP scratch); 14876 format %{ "vpcmpeqb k2,$src,0xFFFFFFFF\n\t" 14877 "vmovdqub $dst,k2,0x01010101\t! store mask (64B to 64B)" %} 14878 ins_encode %{ 14879 int vector_len = 2; 14880 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14881 Assembler::ComparisonPredicate cp = Assembler::eq; 14882 __ evpcmpb(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 14883 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, vector_len, $scratch$$Register); 14884 %} 14885 ins_pipe( pipe_slow ); 14886 %} 14887 14888 instruct storemask4s(vecS dst, vecD src) %{ 14889 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 14890 match(Set dst (VectorStoreMask src)); 14891 format %{ "vpabsw $dst,$src\n\t" 14892 "vpackuswb $dst,$dst,$dst\t! store mask (4S to 4B)" %} 14893 ins_encode %{ 14894 int vector_len = 0; 14895 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14896 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14897 %} 14898 ins_pipe( pipe_slow ); 14899 %} 14900 14901 instruct storemask8s(vecD dst, vecX src) %{ 14902 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 14903 match(Set dst (VectorStoreMask src)); 14904 format %{ "vpabsw $dst,$src\n\t" 14905 "vpackuswb $dst,$dst,$dst\t! store mask (8S to 8B)" %} 14906 ins_encode %{ 14907 int vector_len = 0; 14908 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14909 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14910 %} 14911 ins_pipe( pipe_slow ); 14912 %} 14913 14914 instruct storemask16s(vecX dst, vecY src, vecY tmp) %{ 14915 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 14916 match(Set dst (VectorStoreMask src)); 14917 effect(TEMP dst, TEMP tmp); 14918 format %{ "vpabsw $dst,$src\n\t" 14919 "vextracti128 $tmp,$dst\n\t" 14920 "vpackuswb $dst,$dst,$tmp\t! store mask (16S to 16B)" %} 14921 ins_encode %{ 14922 int vector_len = 1; 14923 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14924 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 14925 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14926 %} 14927 ins_pipe( pipe_slow ); 14928 %} 14929 14930 instruct storemask32s(vecY dst, vecZ src, rRegL scratch) %{ 14931 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 14932 match(Set dst (VectorStoreMask src)); 14933 effect(TEMP scratch); 14934 format %{ "vpcmpeqw k2,$src,0xFFFFFFFF\n\t" 14935 "vmovdqub $dst,k2,0x01010101\t! store mask (32S to 32B)" %} 14936 ins_encode %{ 14937 int vector_len = 2; 14938 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14939 Assembler::ComparisonPredicate cp = Assembler::eq; 14940 __ evpcmpw(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 14941 // The dst is 256-bit - thus we can do a smaller move. 14942 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 1, $scratch$$Register); 14943 %} 14944 ins_pipe( pipe_slow ); 14945 %} 14946 14947 14948 instruct storemask2i(vecS dst, vecD src) %{ 14949 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 14950 match(Set dst (VectorStoreMask src)); 14951 format %{ "vpabsd $dst,$src\n\t" 14952 "vpackusdw $dst,$dst,$dst\n\t" 14953 "vpackuswb $dst,$dst,$dst\t! store mask (2I to 2B)" %} 14954 ins_encode %{ 14955 int vector_len = 0; 14956 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14957 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14958 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14959 %} 14960 ins_pipe( pipe_slow ); 14961 %} 14962 14963 instruct storemask4i(vecS dst, vecX src) %{ 14964 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 14965 match(Set dst (VectorStoreMask src)); 14966 format %{ "vpabsd $dst,$src\n\t" 14967 "vpackusdw $dst,$dst,$dst\n\t" 14968 "vpackuswb $dst,$dst,$dst\t! store mask (4I to 4B)" %} 14969 ins_encode %{ 14970 int vector_len = 0; 14971 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14972 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14973 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14974 %} 14975 ins_pipe( pipe_slow ); 14976 %} 14977 14978 instruct storemask8i(vecD dst, vecY src, vecY tmp) %{ 14979 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 14980 match(Set dst (VectorStoreMask src)); 14981 effect(TEMP dst, TEMP tmp); 14982 format %{ "vpxor $dst,$dst\n\t" 14983 "vpsubd $dst,$src\n\t" 14984 "vextracti128 $tmp,$dst\n\t" 14985 "vpackusdw $dst,$dst,$tmp\n\t" 14986 "vpackuswb $dst,$dst,$dst\t! store mask (8I to 8B)" %} 14987 ins_encode %{ 14988 int vector_len = 1; 14989 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14990 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 14991 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 14992 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14993 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14994 %} 14995 ins_pipe( pipe_slow ); 14996 %} 14997 14998 instruct storemask16i(vecX dst, vecZ src, rRegL scratch) %{ 14999 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 15000 match(Set dst (VectorStoreMask src)); 15001 effect(TEMP scratch); 15002 format %{ "vpcmpeqd k2,$src,0xFFFFFFFF\n\t" 15003 "vmovdqub $dst,k2,0x01010101\t! store mask (16I to 16B)" %} 15004 ins_encode %{ 15005 int vector_len = 2; 15006 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15007 __ evpcmpeqd(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15008 // The dst is only 128-bit - thus we can do a smaller move. 15009 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 0, $scratch$$Register); 15010 %} 15011 ins_pipe( pipe_slow ); 15012 %} 15013 15014 instruct storemask1l(vecS dst, vecD src) %{ 15015 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 15016 match(Set dst (VectorStoreMask src)); 15017 format %{ "vpabsd $dst,$src\n\t" 15018 "vpackusdw $dst,$dst,$dst\n\t" 15019 "vpackuswb $dst,$dst,$dst\t! store mask (1L to 1B)" %} 15020 ins_encode %{ 15021 int vector_len = 0; 15022 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15023 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 15024 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 15025 %} 15026 ins_pipe( pipe_slow ); 15027 %} 15028 15029 instruct storemask2l(vecS dst, vecX src) %{ 15030 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 15031 match(Set dst (VectorStoreMask src)); 15032 format %{ "vpshufd $dst,$src,0x8\n\t" 15033 "vpabsd $dst,$dst\n\t" 15034 "vpackusdw $dst,$dst,$dst\n\t" 15035 "vpackuswb $dst,$dst,$dst\t! store mask (2L to 2B)" %} 15036 ins_encode %{ 15037 int vector_len = 0; 15038 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8, vector_len); 15039 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 15040 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 15041 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 15042 %} 15043 ins_pipe( pipe_slow ); 15044 %} 15045 15046 instruct storemask4l(vecS dst, vecY src, rRegL scratch) %{ 15047 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 15048 match(Set dst (VectorStoreMask src)); 15049 effect(TEMP scratch, TEMP dst); 15050 format %{ "vmovdqu $dst,[0,2,4,6,1,3,5,7]\n\t" 15051 "vpermd $dst,$dst,$src," 15052 "vpabsd $dst,$dst\n\t" 15053 "vpackusdw $dst,$dst,$dst\n\t" 15054 "vpackuswb $dst,$dst,$dst\t! store mask (4L to 4B)" %} 15055 ins_encode %{ 15056 // vpermd and load are 256-bit, but all others are 128-bit instructions. 15057 int vector_len = 0; 15058 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_long_perm_mask()), $scratch$$Register); 15059 __ vpermd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister); 15060 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 15061 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 15062 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 15063 %} 15064 ins_pipe( pipe_slow ); 15065 %} 15066 15067 instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{ 15068 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 15069 match(Set dst (VectorStoreMask src)); 15070 effect(TEMP scratch); 15071 format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t" 15072 "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %} 15073 ins_encode %{ 15074 int vector_len = 2; 15075 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15076 Assembler::ComparisonPredicate cp = Assembler::eq; 15077 __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 15078 // The dst is only 128-bit - thus we can do a smaller move. 15079 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 0, $scratch$$Register); 15080 %} 15081 ins_pipe( pipe_slow ); 15082 %} 15083 15084 // --------------------------------- FMA -------------------------------------- 15085 15086 // a * b + c 15087 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 15088 predicate(UseFMA && n->as_Vector()->length() == 2); 15089 match(Set c (FmaVD c (Binary a b))); 15090 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 15091 ins_cost(150); 15092 ins_encode %{ 15093 int vector_len = 0; 15094 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 15095 %} 15096 ins_pipe( pipe_slow ); 15097 %} 15098 15099 // a * b + c 15100 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 15101 predicate(UseFMA && n->as_Vector()->length() == 2); 15102 match(Set c (FmaVD c (Binary a (LoadVector b)))); 15103 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 15104 ins_cost(150); 15105 ins_encode %{ 15106 int vector_len = 0; 15107 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 15108 %} 15109 ins_pipe( pipe_slow ); 15110 %} 15111 15112 15113 // a * b + c 15114 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 15115 predicate(UseFMA && n->as_Vector()->length() == 4); 15116 match(Set c (FmaVD c (Binary a b))); 15117 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 15118 ins_cost(150); 15119 ins_encode %{ 15120 int vector_len = 1; 15121 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 15122 %} 15123 ins_pipe( pipe_slow ); 15124 %} 15125 15126 // a * b + c 15127 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 15128 predicate(UseFMA && n->as_Vector()->length() == 4); 15129 match(Set c (FmaVD c (Binary a (LoadVector b)))); 15130 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 15131 ins_cost(150); 15132 ins_encode %{ 15133 int vector_len = 1; 15134 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 15135 %} 15136 ins_pipe( pipe_slow ); 15137 %} 15138 15139 // a * b + c 15140 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 15141 predicate(UseFMA && n->as_Vector()->length() == 8); 15142 match(Set c (FmaVD c (Binary a b))); 15143 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 15144 ins_cost(150); 15145 ins_encode %{ 15146 int vector_len = 2; 15147 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 15148 %} 15149 ins_pipe( pipe_slow ); 15150 %} 15151 15152 // a * b + c 15153 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 15154 predicate(UseFMA && n->as_Vector()->length() == 8); 15155 match(Set c (FmaVD c (Binary a (LoadVector b)))); 15156 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 15157 ins_cost(150); 15158 ins_encode %{ 15159 int vector_len = 2; 15160 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 15161 %} 15162 ins_pipe( pipe_slow ); 15163 %} 15164 15165 // a * b + c 15166 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 15167 predicate(UseFMA && n->as_Vector()->length() == 4); 15168 match(Set c (FmaVF c (Binary a b))); 15169 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 15170 ins_cost(150); 15171 ins_encode %{ 15172 int vector_len = 0; 15173 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 15174 %} 15175 ins_pipe( pipe_slow ); 15176 %} 15177 15178 // a * b + c 15179 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 15180 predicate(UseFMA && n->as_Vector()->length() == 4); 15181 match(Set c (FmaVF c (Binary a (LoadVector b)))); 15182 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 15183 ins_cost(150); 15184 ins_encode %{ 15185 int vector_len = 0; 15186 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 15187 %} 15188 ins_pipe( pipe_slow ); 15189 %} 15190 15191 // a * b + c 15192 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 15193 predicate(UseFMA && n->as_Vector()->length() == 8); 15194 match(Set c (FmaVF c (Binary a b))); 15195 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 15196 ins_cost(150); 15197 ins_encode %{ 15198 int vector_len = 1; 15199 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 15200 %} 15201 ins_pipe( pipe_slow ); 15202 %} 15203 15204 // a * b + c 15205 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 15206 predicate(UseFMA && n->as_Vector()->length() == 8); 15207 match(Set c (FmaVF c (Binary a (LoadVector b)))); 15208 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 15209 ins_cost(150); 15210 ins_encode %{ 15211 int vector_len = 1; 15212 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 15213 %} 15214 ins_pipe( pipe_slow ); 15215 %} 15216 15217 // a * b + c 15218 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 15219 predicate(UseFMA && n->as_Vector()->length() == 16); 15220 match(Set c (FmaVF c (Binary a b))); 15221 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 15222 ins_cost(150); 15223 ins_encode %{ 15224 int vector_len = 2; 15225 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 15226 %} 15227 ins_pipe( pipe_slow ); 15228 %} 15229 15230 // a * b + c 15231 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 15232 predicate(UseFMA && n->as_Vector()->length() == 16); 15233 match(Set c (FmaVF c (Binary a (LoadVector b)))); 15234 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 15235 ins_cost(150); 15236 ins_encode %{ 15237 int vector_len = 2; 15238 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 15239 %} 15240 ins_pipe( pipe_slow ); 15241 %}