1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1071 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1072 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1073 1074 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1075 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1076 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1077 1078 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1079 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1080 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1081 1082 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1083 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1084 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1085 1086 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1087 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1088 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1089 1090 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1091 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1092 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1093 1094 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1095 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1096 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1097 1098 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1099 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1100 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1101 1102 #ifdef _LP64 1103 1104 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1105 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1106 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1107 1108 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1109 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1110 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1111 1112 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1113 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1114 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1115 1116 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1117 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1118 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1119 1120 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1121 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1122 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1123 1124 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1125 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1126 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1127 1128 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1129 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1130 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1131 1132 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1133 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1134 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1135 1136 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1137 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1138 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1139 1140 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1141 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1142 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1143 1144 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1145 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1146 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1147 1148 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1149 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1150 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1151 1152 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1153 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1154 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1155 1156 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1157 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1158 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1159 1160 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1161 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1162 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1163 1164 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1165 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1166 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1167 1168 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1169 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1170 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1171 1172 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1173 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1174 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1175 1176 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1177 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1178 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1179 1180 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1181 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1182 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1183 1184 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1185 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1186 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1187 1188 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1189 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1190 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1191 1192 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1193 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1194 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1195 1196 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1197 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1198 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1199 1200 #endif 1201 1202 %} 1203 1204 1205 //----------SOURCE BLOCK------------------------------------------------------- 1206 // This is a block of C++ code which provides values, functions, and 1207 // definitions necessary in the rest of the architecture description 1208 1209 source_hpp %{ 1210 // Header information of the source block. 1211 // Method declarations/definitions which are used outside 1212 // the ad-scope can conveniently be defined here. 1213 // 1214 // To keep related declarations/definitions/uses close together, 1215 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1216 1217 class NativeJump; 1218 1219 class CallStubImpl { 1220 1221 //-------------------------------------------------------------- 1222 //---< Used for optimization in Compile::shorten_branches >--- 1223 //-------------------------------------------------------------- 1224 1225 public: 1226 // Size of call trampoline stub. 1227 static uint size_call_trampoline() { 1228 return 0; // no call trampolines on this platform 1229 } 1230 1231 // number of relocations needed by a call trampoline stub 1232 static uint reloc_call_trampoline() { 1233 return 0; // no call trampolines on this platform 1234 } 1235 }; 1236 1237 class HandlerImpl { 1238 1239 public: 1240 1241 static int emit_exception_handler(CodeBuffer &cbuf); 1242 static int emit_deopt_handler(CodeBuffer& cbuf); 1243 1244 static uint size_exception_handler() { 1245 // NativeCall instruction size is the same as NativeJump. 1246 // exception handler starts out as jump and can be patched to 1247 // a call be deoptimization. (4932387) 1248 // Note that this value is also credited (in output.cpp) to 1249 // the size of the code section. 1250 return NativeJump::instruction_size; 1251 } 1252 1253 #ifdef _LP64 1254 static uint size_deopt_handler() { 1255 // three 5 byte instructions plus one move for unreachable address. 1256 return 15+3; 1257 } 1258 #else 1259 static uint size_deopt_handler() { 1260 // NativeCall instruction size is the same as NativeJump. 1261 // exception handler starts out as jump and can be patched to 1262 // a call be deoptimization. (4932387) 1263 // Note that this value is also credited (in output.cpp) to 1264 // the size of the code section. 1265 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1266 } 1267 #endif 1268 }; 1269 1270 %} // end source_hpp 1271 1272 source %{ 1273 1274 #include "opto/addnode.hpp" 1275 1276 // Emit exception handler code. 1277 // Stuff framesize into a register and call a VM stub routine. 1278 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1279 1280 // Note that the code buffer's insts_mark is always relative to insts. 1281 // That's why we must use the macroassembler to generate a handler. 1282 MacroAssembler _masm(&cbuf); 1283 address base = __ start_a_stub(size_exception_handler()); 1284 if (base == NULL) { 1285 ciEnv::current()->record_failure("CodeCache is full"); 1286 return 0; // CodeBuffer::expand failed 1287 } 1288 int offset = __ offset(); 1289 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1290 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1291 __ end_a_stub(); 1292 return offset; 1293 } 1294 1295 // Emit deopt handler code. 1296 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1297 1298 // Note that the code buffer's insts_mark is always relative to insts. 1299 // That's why we must use the macroassembler to generate a handler. 1300 MacroAssembler _masm(&cbuf); 1301 address base = __ start_a_stub(size_deopt_handler()); 1302 if (base == NULL) { 1303 ciEnv::current()->record_failure("CodeCache is full"); 1304 return 0; // CodeBuffer::expand failed 1305 } 1306 int offset = __ offset(); 1307 1308 #ifdef _LP64 1309 address the_pc = (address) __ pc(); 1310 Label next; 1311 // push a "the_pc" on the stack without destroying any registers 1312 // as they all may be live. 1313 1314 // push address of "next" 1315 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1316 __ bind(next); 1317 // adjust it so it matches "the_pc" 1318 __ subptr(Address(rsp, 0), __ offset() - offset); 1319 #else 1320 InternalAddress here(__ pc()); 1321 __ pushptr(here.addr()); 1322 #endif 1323 1324 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1325 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1326 __ end_a_stub(); 1327 return offset; 1328 } 1329 1330 1331 //============================================================================= 1332 1333 // Float masks come from different places depending on platform. 1334 #ifdef _LP64 1335 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1336 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1337 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1338 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1339 #else 1340 static address float_signmask() { return (address)float_signmask_pool; } 1341 static address float_signflip() { return (address)float_signflip_pool; } 1342 static address double_signmask() { return (address)double_signmask_pool; } 1343 static address double_signflip() { return (address)double_signflip_pool; } 1344 #endif 1345 1346 1347 const bool Matcher::match_rule_supported(int opcode) { 1348 if (!has_match_rule(opcode)) 1349 return false; 1350 1351 bool ret_value = true; 1352 switch (opcode) { 1353 case Op_PopCountI: 1354 case Op_PopCountL: 1355 if (!UsePopCountInstruction) 1356 ret_value = false; 1357 break; 1358 case Op_PopCountVI: 1359 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1360 ret_value = false; 1361 break; 1362 case Op_MulVI: 1363 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1364 ret_value = false; 1365 break; 1366 case Op_MulVL: 1367 case Op_MulReductionVL: 1368 if (VM_Version::supports_avx512dq() == false) 1369 ret_value = false; 1370 break; 1371 case Op_AddReductionVL: 1372 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1373 ret_value = false; 1374 break; 1375 case Op_AddReductionVI: 1376 if (UseSSE < 3) // requires at least SSE3 1377 ret_value = false; 1378 break; 1379 case Op_MulReductionVI: 1380 if (UseSSE < 4) // requires at least SSE4 1381 ret_value = false; 1382 break; 1383 case Op_AddReductionVF: 1384 case Op_AddReductionVD: 1385 case Op_MulReductionVF: 1386 case Op_MulReductionVD: 1387 if (UseSSE < 1) // requires at least SSE 1388 ret_value = false; 1389 break; 1390 case Op_SqrtVD: 1391 case Op_SqrtVF: 1392 if (UseAVX < 1) // enabled for AVX only 1393 ret_value = false; 1394 break; 1395 case Op_CompareAndSwapL: 1396 #ifdef _LP64 1397 case Op_CompareAndSwapP: 1398 #endif 1399 if (!VM_Version::supports_cx8()) 1400 ret_value = false; 1401 break; 1402 case Op_CMoveVF: 1403 case Op_CMoveVD: 1404 if (UseAVX < 1 || UseAVX > 2) 1405 ret_value = false; 1406 break; 1407 case Op_StrIndexOf: 1408 if (!UseSSE42Intrinsics) 1409 ret_value = false; 1410 break; 1411 case Op_StrIndexOfChar: 1412 if (!UseSSE42Intrinsics) 1413 ret_value = false; 1414 break; 1415 case Op_OnSpinWait: 1416 if (VM_Version::supports_on_spin_wait() == false) 1417 ret_value = false; 1418 break; 1419 } 1420 1421 return ret_value; // Per default match rules are supported. 1422 } 1423 1424 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1425 // identify extra cases that we might want to provide match rules for 1426 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1427 bool ret_value = match_rule_supported(opcode); 1428 if (ret_value) { 1429 switch (opcode) { 1430 case Op_AddVB: 1431 case Op_SubVB: 1432 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1433 ret_value = false; 1434 break; 1435 case Op_URShiftVS: 1436 case Op_RShiftVS: 1437 case Op_LShiftVS: 1438 case Op_MulVS: 1439 case Op_AddVS: 1440 case Op_SubVS: 1441 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1442 ret_value = false; 1443 break; 1444 case Op_CMoveVF: 1445 if (vlen != 8) 1446 ret_value = false; 1447 break; 1448 case Op_CMoveVD: 1449 if (vlen != 4) 1450 ret_value = false; 1451 break; 1452 } 1453 } 1454 1455 return ret_value; // Per default match rules are supported. 1456 } 1457 1458 const bool Matcher::has_predicated_vectors(void) { 1459 bool ret_value = false; 1460 if (UseAVX > 2) { 1461 ret_value = VM_Version::supports_avx512vl(); 1462 } 1463 1464 return ret_value; 1465 } 1466 1467 const int Matcher::float_pressure(int default_pressure_threshold) { 1468 int float_pressure_threshold = default_pressure_threshold; 1469 #ifdef _LP64 1470 if (UseAVX > 2) { 1471 // Increase pressure threshold on machines with AVX3 which have 1472 // 2x more XMM registers. 1473 float_pressure_threshold = default_pressure_threshold * 2; 1474 } 1475 #endif 1476 return float_pressure_threshold; 1477 } 1478 1479 // Max vector size in bytes. 0 if not supported. 1480 const int Matcher::vector_width_in_bytes(BasicType bt) { 1481 assert(is_java_primitive(bt), "only primitive type vectors"); 1482 if (UseSSE < 2) return 0; 1483 // SSE2 supports 128bit vectors for all types. 1484 // AVX2 supports 256bit vectors for all types. 1485 // AVX2/EVEX supports 512bit vectors for all types. 1486 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1487 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1488 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1489 size = (UseAVX > 2) ? 64 : 32; 1490 // Use flag to limit vector size. 1491 size = MIN2(size,(int)MaxVectorSize); 1492 // Minimum 2 values in vector (or 4 for bytes). 1493 switch (bt) { 1494 case T_DOUBLE: 1495 case T_LONG: 1496 if (size < 16) return 0; 1497 break; 1498 case T_FLOAT: 1499 case T_INT: 1500 if (size < 8) return 0; 1501 break; 1502 case T_BOOLEAN: 1503 if (size < 4) return 0; 1504 break; 1505 case T_CHAR: 1506 if (size < 4) return 0; 1507 break; 1508 case T_BYTE: 1509 if (size < 4) return 0; 1510 break; 1511 case T_SHORT: 1512 if (size < 4) return 0; 1513 break; 1514 default: 1515 ShouldNotReachHere(); 1516 } 1517 return size; 1518 } 1519 1520 // Limits on vector size (number of elements) loaded into vector. 1521 const int Matcher::max_vector_size(const BasicType bt) { 1522 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1523 } 1524 const int Matcher::min_vector_size(const BasicType bt) { 1525 int max_size = max_vector_size(bt); 1526 // Min size which can be loaded into vector is 4 bytes. 1527 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1528 return MIN2(size,max_size); 1529 } 1530 1531 // Vector ideal reg corresponding to specidied size in bytes 1532 const uint Matcher::vector_ideal_reg(int size) { 1533 assert(MaxVectorSize >= size, ""); 1534 switch(size) { 1535 case 4: return Op_VecS; 1536 case 8: return Op_VecD; 1537 case 16: return Op_VecX; 1538 case 32: return Op_VecY; 1539 case 64: return Op_VecZ; 1540 } 1541 ShouldNotReachHere(); 1542 return 0; 1543 } 1544 1545 // Only lowest bits of xmm reg are used for vector shift count. 1546 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1547 return Op_VecS; 1548 } 1549 1550 // x86 supports misaligned vectors store/load. 1551 const bool Matcher::misaligned_vectors_ok() { 1552 return !AlignVector; // can be changed by flag 1553 } 1554 1555 // x86 AES instructions are compatible with SunJCE expanded 1556 // keys, hence we do not need to pass the original key to stubs 1557 const bool Matcher::pass_original_key_for_aes() { 1558 return false; 1559 } 1560 1561 1562 const bool Matcher::convi2l_type_required = true; 1563 1564 // Check for shift by small constant as well 1565 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1566 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1567 shift->in(2)->get_int() <= 3 && 1568 // Are there other uses besides address expressions? 1569 !matcher->is_visited(shift)) { 1570 address_visited.set(shift->_idx); // Flag as address_visited 1571 mstack.push(shift->in(2), Matcher::Visit); 1572 Node *conv = shift->in(1); 1573 #ifdef _LP64 1574 // Allow Matcher to match the rule which bypass 1575 // ConvI2L operation for an array index on LP64 1576 // if the index value is positive. 1577 if (conv->Opcode() == Op_ConvI2L && 1578 conv->as_Type()->type()->is_long()->_lo >= 0 && 1579 // Are there other uses besides address expressions? 1580 !matcher->is_visited(conv)) { 1581 address_visited.set(conv->_idx); // Flag as address_visited 1582 mstack.push(conv->in(1), Matcher::Pre_Visit); 1583 } else 1584 #endif 1585 mstack.push(conv, Matcher::Pre_Visit); 1586 return true; 1587 } 1588 return false; 1589 } 1590 1591 // Should the Matcher clone shifts on addressing modes, expecting them 1592 // to be subsumed into complex addressing expressions or compute them 1593 // into registers? 1594 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1595 Node *off = m->in(AddPNode::Offset); 1596 if (off->is_Con()) { 1597 address_visited.test_set(m->_idx); // Flag as address_visited 1598 Node *adr = m->in(AddPNode::Address); 1599 1600 // Intel can handle 2 adds in addressing mode 1601 // AtomicAdd is not an addressing expression. 1602 // Cheap to find it by looking for screwy base. 1603 if (adr->is_AddP() && 1604 !adr->in(AddPNode::Base)->is_top() && 1605 // Are there other uses besides address expressions? 1606 !is_visited(adr)) { 1607 address_visited.set(adr->_idx); // Flag as address_visited 1608 Node *shift = adr->in(AddPNode::Offset); 1609 if (!clone_shift(shift, this, mstack, address_visited)) { 1610 mstack.push(shift, Pre_Visit); 1611 } 1612 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1613 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1614 } else { 1615 mstack.push(adr, Pre_Visit); 1616 } 1617 1618 // Clone X+offset as it also folds into most addressing expressions 1619 mstack.push(off, Visit); 1620 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1621 return true; 1622 } else if (clone_shift(off, this, mstack, address_visited)) { 1623 address_visited.test_set(m->_idx); // Flag as address_visited 1624 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1625 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1626 return true; 1627 } 1628 return false; 1629 } 1630 1631 void Compile::reshape_address(AddPNode* addp) { 1632 } 1633 1634 // Helper methods for MachSpillCopyNode::implementation(). 1635 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1636 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1637 // In 64-bit VM size calculation is very complex. Emitting instructions 1638 // into scratch buffer is used to get size in 64-bit VM. 1639 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1640 assert(ireg == Op_VecS || // 32bit vector 1641 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1642 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1643 "no non-adjacent vector moves" ); 1644 if (cbuf) { 1645 MacroAssembler _masm(cbuf); 1646 int offset = __ offset(); 1647 switch (ireg) { 1648 case Op_VecS: // copy whole register 1649 case Op_VecD: 1650 case Op_VecX: 1651 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1652 break; 1653 case Op_VecY: 1654 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1655 break; 1656 case Op_VecZ: 1657 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1658 break; 1659 default: 1660 ShouldNotReachHere(); 1661 } 1662 int size = __ offset() - offset; 1663 #ifdef ASSERT 1664 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1665 assert(!do_size || size == 4, "incorrect size calculattion"); 1666 #endif 1667 return size; 1668 #ifndef PRODUCT 1669 } else if (!do_size) { 1670 switch (ireg) { 1671 case Op_VecS: 1672 case Op_VecD: 1673 case Op_VecX: 1674 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1675 break; 1676 case Op_VecY: 1677 case Op_VecZ: 1678 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1679 break; 1680 default: 1681 ShouldNotReachHere(); 1682 } 1683 #endif 1684 } 1685 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1686 return (UseAVX > 2) ? 6 : 4; 1687 } 1688 1689 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1690 int stack_offset, int reg, uint ireg, outputStream* st) { 1691 // In 64-bit VM size calculation is very complex. Emitting instructions 1692 // into scratch buffer is used to get size in 64-bit VM. 1693 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1694 if (cbuf) { 1695 MacroAssembler _masm(cbuf); 1696 int offset = __ offset(); 1697 if (is_load) { 1698 switch (ireg) { 1699 case Op_VecS: 1700 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1701 break; 1702 case Op_VecD: 1703 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1704 break; 1705 case Op_VecX: 1706 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1707 break; 1708 case Op_VecY: 1709 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1710 break; 1711 case Op_VecZ: 1712 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1713 break; 1714 default: 1715 ShouldNotReachHere(); 1716 } 1717 } else { // store 1718 switch (ireg) { 1719 case Op_VecS: 1720 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1721 break; 1722 case Op_VecD: 1723 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1724 break; 1725 case Op_VecX: 1726 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1727 break; 1728 case Op_VecY: 1729 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1730 break; 1731 case Op_VecZ: 1732 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1733 break; 1734 default: 1735 ShouldNotReachHere(); 1736 } 1737 } 1738 int size = __ offset() - offset; 1739 #ifdef ASSERT 1740 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1741 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1742 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1743 #endif 1744 return size; 1745 #ifndef PRODUCT 1746 } else if (!do_size) { 1747 if (is_load) { 1748 switch (ireg) { 1749 case Op_VecS: 1750 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1751 break; 1752 case Op_VecD: 1753 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1754 break; 1755 case Op_VecX: 1756 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1757 break; 1758 case Op_VecY: 1759 case Op_VecZ: 1760 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1761 break; 1762 default: 1763 ShouldNotReachHere(); 1764 } 1765 } else { // store 1766 switch (ireg) { 1767 case Op_VecS: 1768 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1769 break; 1770 case Op_VecD: 1771 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1772 break; 1773 case Op_VecX: 1774 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1775 break; 1776 case Op_VecY: 1777 case Op_VecZ: 1778 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1779 break; 1780 default: 1781 ShouldNotReachHere(); 1782 } 1783 } 1784 #endif 1785 } 1786 bool is_single_byte = false; 1787 int vec_len = 0; 1788 if ((UseAVX > 2) && (stack_offset != 0)) { 1789 int tuple_type = Assembler::EVEX_FVM; 1790 int input_size = Assembler::EVEX_32bit; 1791 switch (ireg) { 1792 case Op_VecS: 1793 tuple_type = Assembler::EVEX_T1S; 1794 break; 1795 case Op_VecD: 1796 tuple_type = Assembler::EVEX_T1S; 1797 input_size = Assembler::EVEX_64bit; 1798 break; 1799 case Op_VecX: 1800 break; 1801 case Op_VecY: 1802 vec_len = 1; 1803 break; 1804 case Op_VecZ: 1805 vec_len = 2; 1806 break; 1807 } 1808 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1809 } 1810 int offset_size = 0; 1811 int size = 5; 1812 if (UseAVX > 2 ) { 1813 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1814 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1815 size += 2; // Need an additional two bytes for EVEX encoding 1816 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1817 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1818 } else { 1819 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1820 size += 2; // Need an additional two bytes for EVEX encodding 1821 } 1822 } else { 1823 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1824 } 1825 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1826 return size+offset_size; 1827 } 1828 1829 static inline jint replicate4_imm(int con, int width) { 1830 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1831 assert(width == 1 || width == 2, "only byte or short types here"); 1832 int bit_width = width * 8; 1833 jint val = con; 1834 val &= (1 << bit_width) - 1; // mask off sign bits 1835 while(bit_width < 32) { 1836 val |= (val << bit_width); 1837 bit_width <<= 1; 1838 } 1839 return val; 1840 } 1841 1842 static inline jlong replicate8_imm(int con, int width) { 1843 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1844 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1845 int bit_width = width * 8; 1846 jlong val = con; 1847 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1848 while(bit_width < 64) { 1849 val |= (val << bit_width); 1850 bit_width <<= 1; 1851 } 1852 return val; 1853 } 1854 1855 #ifndef PRODUCT 1856 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1857 st->print("nop \t# %d bytes pad for loops and calls", _count); 1858 } 1859 #endif 1860 1861 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1862 MacroAssembler _masm(&cbuf); 1863 __ nop(_count); 1864 } 1865 1866 uint MachNopNode::size(PhaseRegAlloc*) const { 1867 return _count; 1868 } 1869 1870 #ifndef PRODUCT 1871 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1872 st->print("# breakpoint"); 1873 } 1874 #endif 1875 1876 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1877 MacroAssembler _masm(&cbuf); 1878 __ int3(); 1879 } 1880 1881 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1882 return MachNode::size(ra_); 1883 } 1884 1885 %} 1886 1887 encode %{ 1888 1889 enc_class call_epilog %{ 1890 if (VerifyStackAtCalls) { 1891 // Check that stack depth is unchanged: find majik cookie on stack 1892 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1893 MacroAssembler _masm(&cbuf); 1894 Label L; 1895 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1896 __ jccb(Assembler::equal, L); 1897 // Die if stack mismatch 1898 __ int3(); 1899 __ bind(L); 1900 } 1901 %} 1902 1903 %} 1904 1905 1906 //----------OPERANDS----------------------------------------------------------- 1907 // Operand definitions must precede instruction definitions for correct parsing 1908 // in the ADLC because operands constitute user defined types which are used in 1909 // instruction definitions. 1910 1911 // This one generically applies only for evex, so only one version 1912 operand vecZ() %{ 1913 constraint(ALLOC_IN_RC(vectorz_reg)); 1914 match(VecZ); 1915 1916 format %{ %} 1917 interface(REG_INTER); 1918 %} 1919 1920 // Comparison Code for FP conditional move 1921 operand cmpOp_vcmppd() %{ 1922 match(Bool); 1923 1924 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1925 n->as_Bool()->_test._test != BoolTest::no_overflow); 1926 format %{ "" %} 1927 interface(COND_INTER) %{ 1928 equal (0x0, "eq"); 1929 less (0x1, "lt"); 1930 less_equal (0x2, "le"); 1931 not_equal (0xC, "ne"); 1932 greater_equal(0xD, "ge"); 1933 greater (0xE, "gt"); 1934 //TODO cannot compile (adlc breaks) without two next lines with error: 1935 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1936 // equal' for overflow. 1937 overflow (0x20, "o"); // not really supported by the instruction 1938 no_overflow (0x21, "no"); // not really supported by the instruction 1939 %} 1940 %} 1941 1942 1943 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1944 1945 // ============================================================================ 1946 1947 instruct ShouldNotReachHere() %{ 1948 match(Halt); 1949 format %{ "ud2\t# ShouldNotReachHere" %} 1950 ins_encode %{ 1951 __ ud2(); 1952 %} 1953 ins_pipe(pipe_slow); 1954 %} 1955 1956 // =================================EVEX special=============================== 1957 1958 instruct setMask(rRegI dst, rRegI src) %{ 1959 predicate(Matcher::has_predicated_vectors()); 1960 match(Set dst (SetVectMaskI src)); 1961 effect(TEMP dst); 1962 format %{ "setvectmask $dst, $src" %} 1963 ins_encode %{ 1964 __ setvectmask($dst$$Register, $src$$Register); 1965 %} 1966 ins_pipe(pipe_slow); 1967 %} 1968 1969 // ============================================================================ 1970 1971 instruct addF_reg(regF dst, regF src) %{ 1972 predicate((UseSSE>=1) && (UseAVX == 0)); 1973 match(Set dst (AddF dst src)); 1974 1975 format %{ "addss $dst, $src" %} 1976 ins_cost(150); 1977 ins_encode %{ 1978 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1979 %} 1980 ins_pipe(pipe_slow); 1981 %} 1982 1983 instruct addF_mem(regF dst, memory src) %{ 1984 predicate((UseSSE>=1) && (UseAVX == 0)); 1985 match(Set dst (AddF dst (LoadF src))); 1986 1987 format %{ "addss $dst, $src" %} 1988 ins_cost(150); 1989 ins_encode %{ 1990 __ addss($dst$$XMMRegister, $src$$Address); 1991 %} 1992 ins_pipe(pipe_slow); 1993 %} 1994 1995 instruct addF_imm(regF dst, immF con) %{ 1996 predicate((UseSSE>=1) && (UseAVX == 0)); 1997 match(Set dst (AddF dst con)); 1998 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1999 ins_cost(150); 2000 ins_encode %{ 2001 __ addss($dst$$XMMRegister, $constantaddress($con)); 2002 %} 2003 ins_pipe(pipe_slow); 2004 %} 2005 2006 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2007 predicate(UseAVX > 0); 2008 match(Set dst (AddF src1 src2)); 2009 2010 format %{ "vaddss $dst, $src1, $src2" %} 2011 ins_cost(150); 2012 ins_encode %{ 2013 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2014 %} 2015 ins_pipe(pipe_slow); 2016 %} 2017 2018 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2019 predicate(UseAVX > 0); 2020 match(Set dst (AddF src1 (LoadF src2))); 2021 2022 format %{ "vaddss $dst, $src1, $src2" %} 2023 ins_cost(150); 2024 ins_encode %{ 2025 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2026 %} 2027 ins_pipe(pipe_slow); 2028 %} 2029 2030 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2031 predicate(UseAVX > 0); 2032 match(Set dst (AddF src con)); 2033 2034 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2035 ins_cost(150); 2036 ins_encode %{ 2037 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2038 %} 2039 ins_pipe(pipe_slow); 2040 %} 2041 2042 instruct addD_reg(regD dst, regD src) %{ 2043 predicate((UseSSE>=2) && (UseAVX == 0)); 2044 match(Set dst (AddD dst src)); 2045 2046 format %{ "addsd $dst, $src" %} 2047 ins_cost(150); 2048 ins_encode %{ 2049 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2050 %} 2051 ins_pipe(pipe_slow); 2052 %} 2053 2054 instruct addD_mem(regD dst, memory src) %{ 2055 predicate((UseSSE>=2) && (UseAVX == 0)); 2056 match(Set dst (AddD dst (LoadD src))); 2057 2058 format %{ "addsd $dst, $src" %} 2059 ins_cost(150); 2060 ins_encode %{ 2061 __ addsd($dst$$XMMRegister, $src$$Address); 2062 %} 2063 ins_pipe(pipe_slow); 2064 %} 2065 2066 instruct addD_imm(regD dst, immD con) %{ 2067 predicate((UseSSE>=2) && (UseAVX == 0)); 2068 match(Set dst (AddD dst con)); 2069 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2070 ins_cost(150); 2071 ins_encode %{ 2072 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2073 %} 2074 ins_pipe(pipe_slow); 2075 %} 2076 2077 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2078 predicate(UseAVX > 0); 2079 match(Set dst (AddD src1 src2)); 2080 2081 format %{ "vaddsd $dst, $src1, $src2" %} 2082 ins_cost(150); 2083 ins_encode %{ 2084 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2085 %} 2086 ins_pipe(pipe_slow); 2087 %} 2088 2089 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2090 predicate(UseAVX > 0); 2091 match(Set dst (AddD src1 (LoadD src2))); 2092 2093 format %{ "vaddsd $dst, $src1, $src2" %} 2094 ins_cost(150); 2095 ins_encode %{ 2096 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2097 %} 2098 ins_pipe(pipe_slow); 2099 %} 2100 2101 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2102 predicate(UseAVX > 0); 2103 match(Set dst (AddD src con)); 2104 2105 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2106 ins_cost(150); 2107 ins_encode %{ 2108 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2109 %} 2110 ins_pipe(pipe_slow); 2111 %} 2112 2113 instruct subF_reg(regF dst, regF src) %{ 2114 predicate((UseSSE>=1) && (UseAVX == 0)); 2115 match(Set dst (SubF dst src)); 2116 2117 format %{ "subss $dst, $src" %} 2118 ins_cost(150); 2119 ins_encode %{ 2120 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2121 %} 2122 ins_pipe(pipe_slow); 2123 %} 2124 2125 instruct subF_mem(regF dst, memory src) %{ 2126 predicate((UseSSE>=1) && (UseAVX == 0)); 2127 match(Set dst (SubF dst (LoadF src))); 2128 2129 format %{ "subss $dst, $src" %} 2130 ins_cost(150); 2131 ins_encode %{ 2132 __ subss($dst$$XMMRegister, $src$$Address); 2133 %} 2134 ins_pipe(pipe_slow); 2135 %} 2136 2137 instruct subF_imm(regF dst, immF con) %{ 2138 predicate((UseSSE>=1) && (UseAVX == 0)); 2139 match(Set dst (SubF dst con)); 2140 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2141 ins_cost(150); 2142 ins_encode %{ 2143 __ subss($dst$$XMMRegister, $constantaddress($con)); 2144 %} 2145 ins_pipe(pipe_slow); 2146 %} 2147 2148 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2149 predicate(UseAVX > 0); 2150 match(Set dst (SubF src1 src2)); 2151 2152 format %{ "vsubss $dst, $src1, $src2" %} 2153 ins_cost(150); 2154 ins_encode %{ 2155 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2156 %} 2157 ins_pipe(pipe_slow); 2158 %} 2159 2160 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2161 predicate(UseAVX > 0); 2162 match(Set dst (SubF src1 (LoadF src2))); 2163 2164 format %{ "vsubss $dst, $src1, $src2" %} 2165 ins_cost(150); 2166 ins_encode %{ 2167 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2168 %} 2169 ins_pipe(pipe_slow); 2170 %} 2171 2172 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2173 predicate(UseAVX > 0); 2174 match(Set dst (SubF src con)); 2175 2176 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2177 ins_cost(150); 2178 ins_encode %{ 2179 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2180 %} 2181 ins_pipe(pipe_slow); 2182 %} 2183 2184 instruct subD_reg(regD dst, regD src) %{ 2185 predicate((UseSSE>=2) && (UseAVX == 0)); 2186 match(Set dst (SubD dst src)); 2187 2188 format %{ "subsd $dst, $src" %} 2189 ins_cost(150); 2190 ins_encode %{ 2191 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2192 %} 2193 ins_pipe(pipe_slow); 2194 %} 2195 2196 instruct subD_mem(regD dst, memory src) %{ 2197 predicate((UseSSE>=2) && (UseAVX == 0)); 2198 match(Set dst (SubD dst (LoadD src))); 2199 2200 format %{ "subsd $dst, $src" %} 2201 ins_cost(150); 2202 ins_encode %{ 2203 __ subsd($dst$$XMMRegister, $src$$Address); 2204 %} 2205 ins_pipe(pipe_slow); 2206 %} 2207 2208 instruct subD_imm(regD dst, immD con) %{ 2209 predicate((UseSSE>=2) && (UseAVX == 0)); 2210 match(Set dst (SubD dst con)); 2211 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2212 ins_cost(150); 2213 ins_encode %{ 2214 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2215 %} 2216 ins_pipe(pipe_slow); 2217 %} 2218 2219 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2220 predicate(UseAVX > 0); 2221 match(Set dst (SubD src1 src2)); 2222 2223 format %{ "vsubsd $dst, $src1, $src2" %} 2224 ins_cost(150); 2225 ins_encode %{ 2226 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2227 %} 2228 ins_pipe(pipe_slow); 2229 %} 2230 2231 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2232 predicate(UseAVX > 0); 2233 match(Set dst (SubD src1 (LoadD src2))); 2234 2235 format %{ "vsubsd $dst, $src1, $src2" %} 2236 ins_cost(150); 2237 ins_encode %{ 2238 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2239 %} 2240 ins_pipe(pipe_slow); 2241 %} 2242 2243 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2244 predicate(UseAVX > 0); 2245 match(Set dst (SubD src con)); 2246 2247 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2248 ins_cost(150); 2249 ins_encode %{ 2250 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2251 %} 2252 ins_pipe(pipe_slow); 2253 %} 2254 2255 instruct mulF_reg(regF dst, regF src) %{ 2256 predicate((UseSSE>=1) && (UseAVX == 0)); 2257 match(Set dst (MulF dst src)); 2258 2259 format %{ "mulss $dst, $src" %} 2260 ins_cost(150); 2261 ins_encode %{ 2262 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2263 %} 2264 ins_pipe(pipe_slow); 2265 %} 2266 2267 instruct mulF_mem(regF dst, memory src) %{ 2268 predicate((UseSSE>=1) && (UseAVX == 0)); 2269 match(Set dst (MulF dst (LoadF src))); 2270 2271 format %{ "mulss $dst, $src" %} 2272 ins_cost(150); 2273 ins_encode %{ 2274 __ mulss($dst$$XMMRegister, $src$$Address); 2275 %} 2276 ins_pipe(pipe_slow); 2277 %} 2278 2279 instruct mulF_imm(regF dst, immF con) %{ 2280 predicate((UseSSE>=1) && (UseAVX == 0)); 2281 match(Set dst (MulF dst con)); 2282 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2283 ins_cost(150); 2284 ins_encode %{ 2285 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2286 %} 2287 ins_pipe(pipe_slow); 2288 %} 2289 2290 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2291 predicate(UseAVX > 0); 2292 match(Set dst (MulF src1 src2)); 2293 2294 format %{ "vmulss $dst, $src1, $src2" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2303 predicate(UseAVX > 0); 2304 match(Set dst (MulF src1 (LoadF src2))); 2305 2306 format %{ "vmulss $dst, $src1, $src2" %} 2307 ins_cost(150); 2308 ins_encode %{ 2309 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2310 %} 2311 ins_pipe(pipe_slow); 2312 %} 2313 2314 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2315 predicate(UseAVX > 0); 2316 match(Set dst (MulF src con)); 2317 2318 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2319 ins_cost(150); 2320 ins_encode %{ 2321 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2322 %} 2323 ins_pipe(pipe_slow); 2324 %} 2325 2326 instruct mulD_reg(regD dst, regD src) %{ 2327 predicate((UseSSE>=2) && (UseAVX == 0)); 2328 match(Set dst (MulD dst src)); 2329 2330 format %{ "mulsd $dst, $src" %} 2331 ins_cost(150); 2332 ins_encode %{ 2333 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2334 %} 2335 ins_pipe(pipe_slow); 2336 %} 2337 2338 instruct mulD_mem(regD dst, memory src) %{ 2339 predicate((UseSSE>=2) && (UseAVX == 0)); 2340 match(Set dst (MulD dst (LoadD src))); 2341 2342 format %{ "mulsd $dst, $src" %} 2343 ins_cost(150); 2344 ins_encode %{ 2345 __ mulsd($dst$$XMMRegister, $src$$Address); 2346 %} 2347 ins_pipe(pipe_slow); 2348 %} 2349 2350 instruct mulD_imm(regD dst, immD con) %{ 2351 predicate((UseSSE>=2) && (UseAVX == 0)); 2352 match(Set dst (MulD dst con)); 2353 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2354 ins_cost(150); 2355 ins_encode %{ 2356 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2357 %} 2358 ins_pipe(pipe_slow); 2359 %} 2360 2361 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2362 predicate(UseAVX > 0); 2363 match(Set dst (MulD src1 src2)); 2364 2365 format %{ "vmulsd $dst, $src1, $src2" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2374 predicate(UseAVX > 0); 2375 match(Set dst (MulD src1 (LoadD src2))); 2376 2377 format %{ "vmulsd $dst, $src1, $src2" %} 2378 ins_cost(150); 2379 ins_encode %{ 2380 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2381 %} 2382 ins_pipe(pipe_slow); 2383 %} 2384 2385 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2386 predicate(UseAVX > 0); 2387 match(Set dst (MulD src con)); 2388 2389 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2390 ins_cost(150); 2391 ins_encode %{ 2392 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2393 %} 2394 ins_pipe(pipe_slow); 2395 %} 2396 2397 instruct divF_reg(regF dst, regF src) %{ 2398 predicate((UseSSE>=1) && (UseAVX == 0)); 2399 match(Set dst (DivF dst src)); 2400 2401 format %{ "divss $dst, $src" %} 2402 ins_cost(150); 2403 ins_encode %{ 2404 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2405 %} 2406 ins_pipe(pipe_slow); 2407 %} 2408 2409 instruct divF_mem(regF dst, memory src) %{ 2410 predicate((UseSSE>=1) && (UseAVX == 0)); 2411 match(Set dst (DivF dst (LoadF src))); 2412 2413 format %{ "divss $dst, $src" %} 2414 ins_cost(150); 2415 ins_encode %{ 2416 __ divss($dst$$XMMRegister, $src$$Address); 2417 %} 2418 ins_pipe(pipe_slow); 2419 %} 2420 2421 instruct divF_imm(regF dst, immF con) %{ 2422 predicate((UseSSE>=1) && (UseAVX == 0)); 2423 match(Set dst (DivF dst con)); 2424 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2425 ins_cost(150); 2426 ins_encode %{ 2427 __ divss($dst$$XMMRegister, $constantaddress($con)); 2428 %} 2429 ins_pipe(pipe_slow); 2430 %} 2431 2432 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2433 predicate(UseAVX > 0); 2434 match(Set dst (DivF src1 src2)); 2435 2436 format %{ "vdivss $dst, $src1, $src2" %} 2437 ins_cost(150); 2438 ins_encode %{ 2439 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2440 %} 2441 ins_pipe(pipe_slow); 2442 %} 2443 2444 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2445 predicate(UseAVX > 0); 2446 match(Set dst (DivF src1 (LoadF src2))); 2447 2448 format %{ "vdivss $dst, $src1, $src2" %} 2449 ins_cost(150); 2450 ins_encode %{ 2451 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2452 %} 2453 ins_pipe(pipe_slow); 2454 %} 2455 2456 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2457 predicate(UseAVX > 0); 2458 match(Set dst (DivF src con)); 2459 2460 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2461 ins_cost(150); 2462 ins_encode %{ 2463 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2464 %} 2465 ins_pipe(pipe_slow); 2466 %} 2467 2468 instruct divD_reg(regD dst, regD src) %{ 2469 predicate((UseSSE>=2) && (UseAVX == 0)); 2470 match(Set dst (DivD dst src)); 2471 2472 format %{ "divsd $dst, $src" %} 2473 ins_cost(150); 2474 ins_encode %{ 2475 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2476 %} 2477 ins_pipe(pipe_slow); 2478 %} 2479 2480 instruct divD_mem(regD dst, memory src) %{ 2481 predicate((UseSSE>=2) && (UseAVX == 0)); 2482 match(Set dst (DivD dst (LoadD src))); 2483 2484 format %{ "divsd $dst, $src" %} 2485 ins_cost(150); 2486 ins_encode %{ 2487 __ divsd($dst$$XMMRegister, $src$$Address); 2488 %} 2489 ins_pipe(pipe_slow); 2490 %} 2491 2492 instruct divD_imm(regD dst, immD con) %{ 2493 predicate((UseSSE>=2) && (UseAVX == 0)); 2494 match(Set dst (DivD dst con)); 2495 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2496 ins_cost(150); 2497 ins_encode %{ 2498 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2499 %} 2500 ins_pipe(pipe_slow); 2501 %} 2502 2503 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2504 predicate(UseAVX > 0); 2505 match(Set dst (DivD src1 src2)); 2506 2507 format %{ "vdivsd $dst, $src1, $src2" %} 2508 ins_cost(150); 2509 ins_encode %{ 2510 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2511 %} 2512 ins_pipe(pipe_slow); 2513 %} 2514 2515 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2516 predicate(UseAVX > 0); 2517 match(Set dst (DivD src1 (LoadD src2))); 2518 2519 format %{ "vdivsd $dst, $src1, $src2" %} 2520 ins_cost(150); 2521 ins_encode %{ 2522 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2523 %} 2524 ins_pipe(pipe_slow); 2525 %} 2526 2527 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2528 predicate(UseAVX > 0); 2529 match(Set dst (DivD src con)); 2530 2531 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2532 ins_cost(150); 2533 ins_encode %{ 2534 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2535 %} 2536 ins_pipe(pipe_slow); 2537 %} 2538 2539 instruct absF_reg(regF dst) %{ 2540 predicate((UseSSE>=1) && (UseAVX == 0)); 2541 match(Set dst (AbsF dst)); 2542 ins_cost(150); 2543 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2544 ins_encode %{ 2545 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2546 %} 2547 ins_pipe(pipe_slow); 2548 %} 2549 2550 instruct absF_reg_reg(regF dst, regF src) %{ 2551 predicate(VM_Version::supports_avxonly()); 2552 match(Set dst (AbsF src)); 2553 ins_cost(150); 2554 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2555 ins_encode %{ 2556 int vector_len = 0; 2557 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2558 ExternalAddress(float_signmask()), vector_len); 2559 %} 2560 ins_pipe(pipe_slow); 2561 %} 2562 2563 #ifdef _LP64 2564 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2565 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2566 match(Set dst (AbsF src)); 2567 ins_cost(150); 2568 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2569 ins_encode %{ 2570 int vector_len = 0; 2571 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2572 ExternalAddress(float_signmask()), vector_len); 2573 %} 2574 ins_pipe(pipe_slow); 2575 %} 2576 2577 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2578 predicate(VM_Version::supports_avx512novl()); 2579 match(Set dst (AbsF src1)); 2580 effect(TEMP src2); 2581 ins_cost(150); 2582 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2583 ins_encode %{ 2584 int vector_len = 0; 2585 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2586 ExternalAddress(float_signmask()), vector_len); 2587 %} 2588 ins_pipe(pipe_slow); 2589 %} 2590 #else // _LP64 2591 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2592 predicate(UseAVX > 2); 2593 match(Set dst (AbsF src)); 2594 ins_cost(150); 2595 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2596 ins_encode %{ 2597 int vector_len = 0; 2598 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2599 ExternalAddress(float_signmask()), vector_len); 2600 %} 2601 ins_pipe(pipe_slow); 2602 %} 2603 #endif 2604 2605 instruct absD_reg(regD dst) %{ 2606 predicate((UseSSE>=2) && (UseAVX == 0)); 2607 match(Set dst (AbsD dst)); 2608 ins_cost(150); 2609 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2610 "# abs double by sign masking" %} 2611 ins_encode %{ 2612 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2613 %} 2614 ins_pipe(pipe_slow); 2615 %} 2616 2617 instruct absD_reg_reg(regD dst, regD src) %{ 2618 predicate(VM_Version::supports_avxonly()); 2619 match(Set dst (AbsD src)); 2620 ins_cost(150); 2621 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2622 "# abs double by sign masking" %} 2623 ins_encode %{ 2624 int vector_len = 0; 2625 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2626 ExternalAddress(double_signmask()), vector_len); 2627 %} 2628 ins_pipe(pipe_slow); 2629 %} 2630 2631 #ifdef _LP64 2632 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2633 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2634 match(Set dst (AbsD src)); 2635 ins_cost(150); 2636 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2637 "# abs double by sign masking" %} 2638 ins_encode %{ 2639 int vector_len = 0; 2640 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2641 ExternalAddress(double_signmask()), vector_len); 2642 %} 2643 ins_pipe(pipe_slow); 2644 %} 2645 2646 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2647 predicate(VM_Version::supports_avx512novl()); 2648 match(Set dst (AbsD src1)); 2649 effect(TEMP src2); 2650 ins_cost(150); 2651 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2652 ins_encode %{ 2653 int vector_len = 0; 2654 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2655 ExternalAddress(double_signmask()), vector_len); 2656 %} 2657 ins_pipe(pipe_slow); 2658 %} 2659 #else // _LP64 2660 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2661 predicate(UseAVX > 2); 2662 match(Set dst (AbsD src)); 2663 ins_cost(150); 2664 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2665 "# abs double by sign masking" %} 2666 ins_encode %{ 2667 int vector_len = 0; 2668 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2669 ExternalAddress(double_signmask()), vector_len); 2670 %} 2671 ins_pipe(pipe_slow); 2672 %} 2673 #endif 2674 2675 instruct negF_reg(regF dst) %{ 2676 predicate((UseSSE>=1) && (UseAVX == 0)); 2677 match(Set dst (NegF dst)); 2678 ins_cost(150); 2679 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2680 ins_encode %{ 2681 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2682 %} 2683 ins_pipe(pipe_slow); 2684 %} 2685 2686 instruct negF_reg_reg(regF dst, regF src) %{ 2687 predicate(UseAVX > 0); 2688 match(Set dst (NegF src)); 2689 ins_cost(150); 2690 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2691 ins_encode %{ 2692 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2693 ExternalAddress(float_signflip())); 2694 %} 2695 ins_pipe(pipe_slow); 2696 %} 2697 2698 instruct negD_reg(regD dst) %{ 2699 predicate((UseSSE>=2) && (UseAVX == 0)); 2700 match(Set dst (NegD dst)); 2701 ins_cost(150); 2702 format %{ "xorpd $dst, [0x8000000000000000]\t" 2703 "# neg double by sign flipping" %} 2704 ins_encode %{ 2705 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2706 %} 2707 ins_pipe(pipe_slow); 2708 %} 2709 2710 instruct negD_reg_reg(regD dst, regD src) %{ 2711 predicate(UseAVX > 0); 2712 match(Set dst (NegD src)); 2713 ins_cost(150); 2714 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2715 "# neg double by sign flipping" %} 2716 ins_encode %{ 2717 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2718 ExternalAddress(double_signflip())); 2719 %} 2720 ins_pipe(pipe_slow); 2721 %} 2722 2723 instruct sqrtF_reg(regF dst, regF src) %{ 2724 predicate(UseSSE>=1); 2725 match(Set dst (SqrtF src)); 2726 2727 format %{ "sqrtss $dst, $src" %} 2728 ins_cost(150); 2729 ins_encode %{ 2730 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2731 %} 2732 ins_pipe(pipe_slow); 2733 %} 2734 2735 instruct sqrtF_mem(regF dst, memory src) %{ 2736 predicate(UseSSE>=1); 2737 match(Set dst (SqrtF (LoadF src))); 2738 2739 format %{ "sqrtss $dst, $src" %} 2740 ins_cost(150); 2741 ins_encode %{ 2742 __ sqrtss($dst$$XMMRegister, $src$$Address); 2743 %} 2744 ins_pipe(pipe_slow); 2745 %} 2746 2747 instruct sqrtF_imm(regF dst, immF con) %{ 2748 predicate(UseSSE>=1); 2749 match(Set dst (SqrtF con)); 2750 2751 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2752 ins_cost(150); 2753 ins_encode %{ 2754 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2755 %} 2756 ins_pipe(pipe_slow); 2757 %} 2758 2759 instruct sqrtD_reg(regD dst, regD src) %{ 2760 predicate(UseSSE>=2); 2761 match(Set dst (SqrtD src)); 2762 2763 format %{ "sqrtsd $dst, $src" %} 2764 ins_cost(150); 2765 ins_encode %{ 2766 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2767 %} 2768 ins_pipe(pipe_slow); 2769 %} 2770 2771 instruct sqrtD_mem(regD dst, memory src) %{ 2772 predicate(UseSSE>=2); 2773 match(Set dst (SqrtD (LoadD src))); 2774 2775 format %{ "sqrtsd $dst, $src" %} 2776 ins_cost(150); 2777 ins_encode %{ 2778 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2779 %} 2780 ins_pipe(pipe_slow); 2781 %} 2782 2783 instruct sqrtD_imm(regD dst, immD con) %{ 2784 predicate(UseSSE>=2); 2785 match(Set dst (SqrtD con)); 2786 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2787 ins_cost(150); 2788 ins_encode %{ 2789 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2790 %} 2791 ins_pipe(pipe_slow); 2792 %} 2793 2794 instruct onspinwait() %{ 2795 match(OnSpinWait); 2796 ins_cost(200); 2797 2798 format %{ 2799 $$template 2800 if (os::is_MP()) { 2801 $$emit$$"pause\t! membar_onspinwait" 2802 } else { 2803 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2804 } 2805 %} 2806 ins_encode %{ 2807 __ pause(); 2808 %} 2809 ins_pipe(pipe_slow); 2810 %} 2811 2812 // a * b + c 2813 instruct fmaD_reg(regD a, regD b, regD c) %{ 2814 predicate(UseFMA); 2815 match(Set c (FmaD c (Binary a b))); 2816 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2817 ins_cost(150); 2818 ins_encode %{ 2819 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2820 %} 2821 ins_pipe( pipe_slow ); 2822 %} 2823 2824 // a * b + c 2825 instruct fmaF_reg(regF a, regF b, regF c) %{ 2826 predicate(UseFMA); 2827 match(Set c (FmaF c (Binary a b))); 2828 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2829 ins_cost(150); 2830 ins_encode %{ 2831 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2832 %} 2833 ins_pipe( pipe_slow ); 2834 %} 2835 2836 // ====================VECTOR INSTRUCTIONS===================================== 2837 2838 // Load vectors (4 bytes long) 2839 instruct loadV4(vecS dst, memory mem) %{ 2840 predicate(n->as_LoadVector()->memory_size() == 4); 2841 match(Set dst (LoadVector mem)); 2842 ins_cost(125); 2843 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2844 ins_encode %{ 2845 __ movdl($dst$$XMMRegister, $mem$$Address); 2846 %} 2847 ins_pipe( pipe_slow ); 2848 %} 2849 2850 // Load vectors (8 bytes long) 2851 instruct loadV8(vecD dst, memory mem) %{ 2852 predicate(n->as_LoadVector()->memory_size() == 8); 2853 match(Set dst (LoadVector mem)); 2854 ins_cost(125); 2855 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2856 ins_encode %{ 2857 __ movq($dst$$XMMRegister, $mem$$Address); 2858 %} 2859 ins_pipe( pipe_slow ); 2860 %} 2861 2862 // Load vectors (16 bytes long) 2863 instruct loadV16(vecX dst, memory mem) %{ 2864 predicate(n->as_LoadVector()->memory_size() == 16); 2865 match(Set dst (LoadVector mem)); 2866 ins_cost(125); 2867 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2868 ins_encode %{ 2869 __ movdqu($dst$$XMMRegister, $mem$$Address); 2870 %} 2871 ins_pipe( pipe_slow ); 2872 %} 2873 2874 // Load vectors (32 bytes long) 2875 instruct loadV32(vecY dst, memory mem) %{ 2876 predicate(n->as_LoadVector()->memory_size() == 32); 2877 match(Set dst (LoadVector mem)); 2878 ins_cost(125); 2879 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2880 ins_encode %{ 2881 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2882 %} 2883 ins_pipe( pipe_slow ); 2884 %} 2885 2886 // Load vectors (64 bytes long) 2887 instruct loadV64_dword(vecZ dst, memory mem) %{ 2888 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2889 match(Set dst (LoadVector mem)); 2890 ins_cost(125); 2891 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2892 ins_encode %{ 2893 int vector_len = 2; 2894 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2895 %} 2896 ins_pipe( pipe_slow ); 2897 %} 2898 2899 // Load vectors (64 bytes long) 2900 instruct loadV64_qword(vecZ dst, memory mem) %{ 2901 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 2902 match(Set dst (LoadVector mem)); 2903 ins_cost(125); 2904 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 2905 ins_encode %{ 2906 int vector_len = 2; 2907 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 // Store vectors 2913 instruct storeV4(memory mem, vecS src) %{ 2914 predicate(n->as_StoreVector()->memory_size() == 4); 2915 match(Set mem (StoreVector mem src)); 2916 ins_cost(145); 2917 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2918 ins_encode %{ 2919 __ movdl($mem$$Address, $src$$XMMRegister); 2920 %} 2921 ins_pipe( pipe_slow ); 2922 %} 2923 2924 instruct storeV8(memory mem, vecD src) %{ 2925 predicate(n->as_StoreVector()->memory_size() == 8); 2926 match(Set mem (StoreVector mem src)); 2927 ins_cost(145); 2928 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2929 ins_encode %{ 2930 __ movq($mem$$Address, $src$$XMMRegister); 2931 %} 2932 ins_pipe( pipe_slow ); 2933 %} 2934 2935 instruct storeV16(memory mem, vecX src) %{ 2936 predicate(n->as_StoreVector()->memory_size() == 16); 2937 match(Set mem (StoreVector mem src)); 2938 ins_cost(145); 2939 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2940 ins_encode %{ 2941 __ movdqu($mem$$Address, $src$$XMMRegister); 2942 %} 2943 ins_pipe( pipe_slow ); 2944 %} 2945 2946 instruct storeV32(memory mem, vecY src) %{ 2947 predicate(n->as_StoreVector()->memory_size() == 32); 2948 match(Set mem (StoreVector mem src)); 2949 ins_cost(145); 2950 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2951 ins_encode %{ 2952 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2953 %} 2954 ins_pipe( pipe_slow ); 2955 %} 2956 2957 instruct storeV64_dword(memory mem, vecZ src) %{ 2958 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 2959 match(Set mem (StoreVector mem src)); 2960 ins_cost(145); 2961 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 2962 ins_encode %{ 2963 int vector_len = 2; 2964 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 2965 %} 2966 ins_pipe( pipe_slow ); 2967 %} 2968 2969 instruct storeV64_qword(memory mem, vecZ src) %{ 2970 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 2971 match(Set mem (StoreVector mem src)); 2972 ins_cost(145); 2973 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 2974 ins_encode %{ 2975 int vector_len = 2; 2976 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 2977 %} 2978 ins_pipe( pipe_slow ); 2979 %} 2980 2981 // ====================LEGACY REPLICATE======================================= 2982 2983 instruct Repl4B_mem(vecS dst, memory mem) %{ 2984 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2985 match(Set dst (ReplicateB (LoadB mem))); 2986 format %{ "punpcklbw $dst,$mem\n\t" 2987 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2988 ins_encode %{ 2989 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2990 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2991 %} 2992 ins_pipe( pipe_slow ); 2993 %} 2994 2995 instruct Repl8B_mem(vecD dst, memory mem) %{ 2996 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2997 match(Set dst (ReplicateB (LoadB mem))); 2998 format %{ "punpcklbw $dst,$mem\n\t" 2999 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3000 ins_encode %{ 3001 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3002 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3003 %} 3004 ins_pipe( pipe_slow ); 3005 %} 3006 3007 instruct Repl16B(vecX dst, rRegI src) %{ 3008 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3009 match(Set dst (ReplicateB src)); 3010 format %{ "movd $dst,$src\n\t" 3011 "punpcklbw $dst,$dst\n\t" 3012 "pshuflw $dst,$dst,0x00\n\t" 3013 "punpcklqdq $dst,$dst\t! replicate16B" %} 3014 ins_encode %{ 3015 __ movdl($dst$$XMMRegister, $src$$Register); 3016 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3017 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3018 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3019 %} 3020 ins_pipe( pipe_slow ); 3021 %} 3022 3023 instruct Repl16B_mem(vecX dst, memory mem) %{ 3024 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3025 match(Set dst (ReplicateB (LoadB mem))); 3026 format %{ "punpcklbw $dst,$mem\n\t" 3027 "pshuflw $dst,$dst,0x00\n\t" 3028 "punpcklqdq $dst,$dst\t! replicate16B" %} 3029 ins_encode %{ 3030 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3031 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3032 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3033 %} 3034 ins_pipe( pipe_slow ); 3035 %} 3036 3037 instruct Repl32B(vecY dst, rRegI src) %{ 3038 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3039 match(Set dst (ReplicateB src)); 3040 format %{ "movd $dst,$src\n\t" 3041 "punpcklbw $dst,$dst\n\t" 3042 "pshuflw $dst,$dst,0x00\n\t" 3043 "punpcklqdq $dst,$dst\n\t" 3044 "vinserti128_high $dst,$dst\t! replicate32B" %} 3045 ins_encode %{ 3046 __ movdl($dst$$XMMRegister, $src$$Register); 3047 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3048 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3049 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3050 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3051 %} 3052 ins_pipe( pipe_slow ); 3053 %} 3054 3055 instruct Repl32B_mem(vecY dst, memory mem) %{ 3056 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3057 match(Set dst (ReplicateB (LoadB mem))); 3058 format %{ "punpcklbw $dst,$mem\n\t" 3059 "pshuflw $dst,$dst,0x00\n\t" 3060 "punpcklqdq $dst,$dst\n\t" 3061 "vinserti128_high $dst,$dst\t! replicate32B" %} 3062 ins_encode %{ 3063 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3064 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3065 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3066 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3067 %} 3068 ins_pipe( pipe_slow ); 3069 %} 3070 3071 instruct Repl16B_imm(vecX dst, immI con) %{ 3072 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3073 match(Set dst (ReplicateB con)); 3074 format %{ "movq $dst,[$constantaddress]\n\t" 3075 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3076 ins_encode %{ 3077 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3078 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3079 %} 3080 ins_pipe( pipe_slow ); 3081 %} 3082 3083 instruct Repl32B_imm(vecY dst, immI con) %{ 3084 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3085 match(Set dst (ReplicateB con)); 3086 format %{ "movq $dst,[$constantaddress]\n\t" 3087 "punpcklqdq $dst,$dst\n\t" 3088 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3089 ins_encode %{ 3090 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3091 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3092 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3093 %} 3094 ins_pipe( pipe_slow ); 3095 %} 3096 3097 instruct Repl4S(vecD dst, rRegI src) %{ 3098 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3099 match(Set dst (ReplicateS src)); 3100 format %{ "movd $dst,$src\n\t" 3101 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3102 ins_encode %{ 3103 __ movdl($dst$$XMMRegister, $src$$Register); 3104 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3105 %} 3106 ins_pipe( pipe_slow ); 3107 %} 3108 3109 instruct Repl4S_mem(vecD dst, memory mem) %{ 3110 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3111 match(Set dst (ReplicateS (LoadS mem))); 3112 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3113 ins_encode %{ 3114 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 instruct Repl8S(vecX dst, rRegI src) %{ 3120 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3121 match(Set dst (ReplicateS src)); 3122 format %{ "movd $dst,$src\n\t" 3123 "pshuflw $dst,$dst,0x00\n\t" 3124 "punpcklqdq $dst,$dst\t! replicate8S" %} 3125 ins_encode %{ 3126 __ movdl($dst$$XMMRegister, $src$$Register); 3127 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3128 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3129 %} 3130 ins_pipe( pipe_slow ); 3131 %} 3132 3133 instruct Repl8S_mem(vecX dst, memory mem) %{ 3134 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3135 match(Set dst (ReplicateS (LoadS mem))); 3136 format %{ "pshuflw $dst,$mem,0x00\n\t" 3137 "punpcklqdq $dst,$dst\t! replicate8S" %} 3138 ins_encode %{ 3139 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3140 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3141 %} 3142 ins_pipe( pipe_slow ); 3143 %} 3144 3145 instruct Repl8S_imm(vecX dst, immI con) %{ 3146 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3147 match(Set dst (ReplicateS con)); 3148 format %{ "movq $dst,[$constantaddress]\n\t" 3149 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3150 ins_encode %{ 3151 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3152 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3153 %} 3154 ins_pipe( pipe_slow ); 3155 %} 3156 3157 instruct Repl16S(vecY dst, rRegI src) %{ 3158 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3159 match(Set dst (ReplicateS src)); 3160 format %{ "movd $dst,$src\n\t" 3161 "pshuflw $dst,$dst,0x00\n\t" 3162 "punpcklqdq $dst,$dst\n\t" 3163 "vinserti128_high $dst,$dst\t! replicate16S" %} 3164 ins_encode %{ 3165 __ movdl($dst$$XMMRegister, $src$$Register); 3166 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3167 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3168 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3169 %} 3170 ins_pipe( pipe_slow ); 3171 %} 3172 3173 instruct Repl16S_mem(vecY dst, memory mem) %{ 3174 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3175 match(Set dst (ReplicateS (LoadS mem))); 3176 format %{ "pshuflw $dst,$mem,0x00\n\t" 3177 "punpcklqdq $dst,$dst\n\t" 3178 "vinserti128_high $dst,$dst\t! replicate16S" %} 3179 ins_encode %{ 3180 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3181 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3182 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3183 %} 3184 ins_pipe( pipe_slow ); 3185 %} 3186 3187 instruct Repl16S_imm(vecY dst, immI con) %{ 3188 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3189 match(Set dst (ReplicateS con)); 3190 format %{ "movq $dst,[$constantaddress]\n\t" 3191 "punpcklqdq $dst,$dst\n\t" 3192 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3193 ins_encode %{ 3194 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3195 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3196 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3197 %} 3198 ins_pipe( pipe_slow ); 3199 %} 3200 3201 instruct Repl4I(vecX dst, rRegI src) %{ 3202 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3203 match(Set dst (ReplicateI src)); 3204 format %{ "movd $dst,$src\n\t" 3205 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3206 ins_encode %{ 3207 __ movdl($dst$$XMMRegister, $src$$Register); 3208 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3209 %} 3210 ins_pipe( pipe_slow ); 3211 %} 3212 3213 instruct Repl4I_mem(vecX dst, memory mem) %{ 3214 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3215 match(Set dst (ReplicateI (LoadI mem))); 3216 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3217 ins_encode %{ 3218 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3219 %} 3220 ins_pipe( pipe_slow ); 3221 %} 3222 3223 instruct Repl8I(vecY dst, rRegI src) %{ 3224 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3225 match(Set dst (ReplicateI src)); 3226 format %{ "movd $dst,$src\n\t" 3227 "pshufd $dst,$dst,0x00\n\t" 3228 "vinserti128_high $dst,$dst\t! replicate8I" %} 3229 ins_encode %{ 3230 __ movdl($dst$$XMMRegister, $src$$Register); 3231 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3232 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3233 %} 3234 ins_pipe( pipe_slow ); 3235 %} 3236 3237 instruct Repl8I_mem(vecY dst, memory mem) %{ 3238 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3239 match(Set dst (ReplicateI (LoadI mem))); 3240 format %{ "pshufd $dst,$mem,0x00\n\t" 3241 "vinserti128_high $dst,$dst\t! replicate8I" %} 3242 ins_encode %{ 3243 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3244 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3245 %} 3246 ins_pipe( pipe_slow ); 3247 %} 3248 3249 instruct Repl4I_imm(vecX dst, immI con) %{ 3250 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3251 match(Set dst (ReplicateI con)); 3252 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3253 "punpcklqdq $dst,$dst" %} 3254 ins_encode %{ 3255 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3256 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3257 %} 3258 ins_pipe( pipe_slow ); 3259 %} 3260 3261 instruct Repl8I_imm(vecY dst, immI con) %{ 3262 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3263 match(Set dst (ReplicateI con)); 3264 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3265 "punpcklqdq $dst,$dst\n\t" 3266 "vinserti128_high $dst,$dst" %} 3267 ins_encode %{ 3268 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3269 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3270 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3271 %} 3272 ins_pipe( pipe_slow ); 3273 %} 3274 3275 // Long could be loaded into xmm register directly from memory. 3276 instruct Repl2L_mem(vecX dst, memory mem) %{ 3277 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3278 match(Set dst (ReplicateL (LoadL mem))); 3279 format %{ "movq $dst,$mem\n\t" 3280 "punpcklqdq $dst,$dst\t! replicate2L" %} 3281 ins_encode %{ 3282 __ movq($dst$$XMMRegister, $mem$$Address); 3283 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3284 %} 3285 ins_pipe( pipe_slow ); 3286 %} 3287 3288 // Replicate long (8 byte) scalar to be vector 3289 #ifdef _LP64 3290 instruct Repl4L(vecY dst, rRegL src) %{ 3291 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3292 match(Set dst (ReplicateL src)); 3293 format %{ "movdq $dst,$src\n\t" 3294 "punpcklqdq $dst,$dst\n\t" 3295 "vinserti128_high $dst,$dst\t! replicate4L" %} 3296 ins_encode %{ 3297 __ movdq($dst$$XMMRegister, $src$$Register); 3298 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3299 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3300 %} 3301 ins_pipe( pipe_slow ); 3302 %} 3303 #else // _LP64 3304 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3305 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3306 match(Set dst (ReplicateL src)); 3307 effect(TEMP dst, USE src, TEMP tmp); 3308 format %{ "movdl $dst,$src.lo\n\t" 3309 "movdl $tmp,$src.hi\n\t" 3310 "punpckldq $dst,$tmp\n\t" 3311 "punpcklqdq $dst,$dst\n\t" 3312 "vinserti128_high $dst,$dst\t! replicate4L" %} 3313 ins_encode %{ 3314 __ movdl($dst$$XMMRegister, $src$$Register); 3315 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3316 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3317 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3318 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3319 %} 3320 ins_pipe( pipe_slow ); 3321 %} 3322 #endif // _LP64 3323 3324 instruct Repl4L_imm(vecY dst, immL con) %{ 3325 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3326 match(Set dst (ReplicateL con)); 3327 format %{ "movq $dst,[$constantaddress]\n\t" 3328 "punpcklqdq $dst,$dst\n\t" 3329 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3330 ins_encode %{ 3331 __ movq($dst$$XMMRegister, $constantaddress($con)); 3332 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3333 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3334 %} 3335 ins_pipe( pipe_slow ); 3336 %} 3337 3338 instruct Repl4L_mem(vecY dst, memory mem) %{ 3339 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3340 match(Set dst (ReplicateL (LoadL mem))); 3341 format %{ "movq $dst,$mem\n\t" 3342 "punpcklqdq $dst,$dst\n\t" 3343 "vinserti128_high $dst,$dst\t! replicate4L" %} 3344 ins_encode %{ 3345 __ movq($dst$$XMMRegister, $mem$$Address); 3346 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3347 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3348 %} 3349 ins_pipe( pipe_slow ); 3350 %} 3351 3352 instruct Repl2F_mem(vecD dst, memory mem) %{ 3353 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3354 match(Set dst (ReplicateF (LoadF mem))); 3355 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3356 ins_encode %{ 3357 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3358 %} 3359 ins_pipe( pipe_slow ); 3360 %} 3361 3362 instruct Repl4F_mem(vecX dst, memory mem) %{ 3363 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3364 match(Set dst (ReplicateF (LoadF mem))); 3365 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3366 ins_encode %{ 3367 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3368 %} 3369 ins_pipe( pipe_slow ); 3370 %} 3371 3372 instruct Repl8F(vecY dst, regF src) %{ 3373 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3374 match(Set dst (ReplicateF src)); 3375 format %{ "pshufd $dst,$src,0x00\n\t" 3376 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3377 ins_encode %{ 3378 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3379 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3380 %} 3381 ins_pipe( pipe_slow ); 3382 %} 3383 3384 instruct Repl8F_mem(vecY dst, memory mem) %{ 3385 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3386 match(Set dst (ReplicateF (LoadF mem))); 3387 format %{ "pshufd $dst,$mem,0x00\n\t" 3388 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3389 ins_encode %{ 3390 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3391 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3392 %} 3393 ins_pipe( pipe_slow ); 3394 %} 3395 3396 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3397 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3398 match(Set dst (ReplicateF zero)); 3399 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3400 ins_encode %{ 3401 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3402 %} 3403 ins_pipe( fpu_reg_reg ); 3404 %} 3405 3406 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3407 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3408 match(Set dst (ReplicateF zero)); 3409 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3410 ins_encode %{ 3411 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3412 %} 3413 ins_pipe( fpu_reg_reg ); 3414 %} 3415 3416 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3417 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3418 match(Set dst (ReplicateF zero)); 3419 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3420 ins_encode %{ 3421 int vector_len = 1; 3422 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3423 %} 3424 ins_pipe( fpu_reg_reg ); 3425 %} 3426 3427 instruct Repl2D_mem(vecX dst, memory mem) %{ 3428 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3429 match(Set dst (ReplicateD (LoadD mem))); 3430 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3431 ins_encode %{ 3432 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3433 %} 3434 ins_pipe( pipe_slow ); 3435 %} 3436 3437 instruct Repl4D(vecY dst, regD src) %{ 3438 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3439 match(Set dst (ReplicateD src)); 3440 format %{ "pshufd $dst,$src,0x44\n\t" 3441 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3442 ins_encode %{ 3443 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3444 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3445 %} 3446 ins_pipe( pipe_slow ); 3447 %} 3448 3449 instruct Repl4D_mem(vecY dst, memory mem) %{ 3450 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3451 match(Set dst (ReplicateD (LoadD mem))); 3452 format %{ "pshufd $dst,$mem,0x44\n\t" 3453 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3454 ins_encode %{ 3455 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3456 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3457 %} 3458 ins_pipe( pipe_slow ); 3459 %} 3460 3461 // Replicate double (8 byte) scalar zero to be vector 3462 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3463 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3464 match(Set dst (ReplicateD zero)); 3465 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3466 ins_encode %{ 3467 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3468 %} 3469 ins_pipe( fpu_reg_reg ); 3470 %} 3471 3472 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3473 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3474 match(Set dst (ReplicateD zero)); 3475 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3476 ins_encode %{ 3477 int vector_len = 1; 3478 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3479 %} 3480 ins_pipe( fpu_reg_reg ); 3481 %} 3482 3483 // ====================GENERIC REPLICATE========================================== 3484 3485 // Replicate byte scalar to be vector 3486 instruct Repl4B(vecS dst, rRegI src) %{ 3487 predicate(n->as_Vector()->length() == 4); 3488 match(Set dst (ReplicateB src)); 3489 format %{ "movd $dst,$src\n\t" 3490 "punpcklbw $dst,$dst\n\t" 3491 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3492 ins_encode %{ 3493 __ movdl($dst$$XMMRegister, $src$$Register); 3494 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3495 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3496 %} 3497 ins_pipe( pipe_slow ); 3498 %} 3499 3500 instruct Repl8B(vecD dst, rRegI src) %{ 3501 predicate(n->as_Vector()->length() == 8); 3502 match(Set dst (ReplicateB src)); 3503 format %{ "movd $dst,$src\n\t" 3504 "punpcklbw $dst,$dst\n\t" 3505 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3506 ins_encode %{ 3507 __ movdl($dst$$XMMRegister, $src$$Register); 3508 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3509 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3510 %} 3511 ins_pipe( pipe_slow ); 3512 %} 3513 3514 // Replicate byte scalar immediate to be vector by loading from const table. 3515 instruct Repl4B_imm(vecS dst, immI con) %{ 3516 predicate(n->as_Vector()->length() == 4); 3517 match(Set dst (ReplicateB con)); 3518 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3519 ins_encode %{ 3520 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3521 %} 3522 ins_pipe( pipe_slow ); 3523 %} 3524 3525 instruct Repl8B_imm(vecD dst, immI con) %{ 3526 predicate(n->as_Vector()->length() == 8); 3527 match(Set dst (ReplicateB con)); 3528 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3529 ins_encode %{ 3530 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3531 %} 3532 ins_pipe( pipe_slow ); 3533 %} 3534 3535 // Replicate byte scalar zero to be vector 3536 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3537 predicate(n->as_Vector()->length() == 4); 3538 match(Set dst (ReplicateB zero)); 3539 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3540 ins_encode %{ 3541 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3542 %} 3543 ins_pipe( fpu_reg_reg ); 3544 %} 3545 3546 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3547 predicate(n->as_Vector()->length() == 8); 3548 match(Set dst (ReplicateB zero)); 3549 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3550 ins_encode %{ 3551 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3552 %} 3553 ins_pipe( fpu_reg_reg ); 3554 %} 3555 3556 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3557 predicate(n->as_Vector()->length() == 16); 3558 match(Set dst (ReplicateB zero)); 3559 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3560 ins_encode %{ 3561 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe( fpu_reg_reg ); 3564 %} 3565 3566 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3567 predicate(n->as_Vector()->length() == 32); 3568 match(Set dst (ReplicateB zero)); 3569 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3570 ins_encode %{ 3571 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3572 int vector_len = 1; 3573 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3574 %} 3575 ins_pipe( fpu_reg_reg ); 3576 %} 3577 3578 // Replicate char/short (2 byte) scalar to be vector 3579 instruct Repl2S(vecS dst, rRegI src) %{ 3580 predicate(n->as_Vector()->length() == 2); 3581 match(Set dst (ReplicateS src)); 3582 format %{ "movd $dst,$src\n\t" 3583 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3584 ins_encode %{ 3585 __ movdl($dst$$XMMRegister, $src$$Register); 3586 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3587 %} 3588 ins_pipe( fpu_reg_reg ); 3589 %} 3590 3591 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3592 instruct Repl2S_imm(vecS dst, immI con) %{ 3593 predicate(n->as_Vector()->length() == 2); 3594 match(Set dst (ReplicateS con)); 3595 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3596 ins_encode %{ 3597 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3598 %} 3599 ins_pipe( fpu_reg_reg ); 3600 %} 3601 3602 instruct Repl4S_imm(vecD dst, immI con) %{ 3603 predicate(n->as_Vector()->length() == 4); 3604 match(Set dst (ReplicateS con)); 3605 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3606 ins_encode %{ 3607 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3608 %} 3609 ins_pipe( fpu_reg_reg ); 3610 %} 3611 3612 // Replicate char/short (2 byte) scalar zero to be vector 3613 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3614 predicate(n->as_Vector()->length() == 2); 3615 match(Set dst (ReplicateS zero)); 3616 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3617 ins_encode %{ 3618 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3619 %} 3620 ins_pipe( fpu_reg_reg ); 3621 %} 3622 3623 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3624 predicate(n->as_Vector()->length() == 4); 3625 match(Set dst (ReplicateS zero)); 3626 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3627 ins_encode %{ 3628 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3629 %} 3630 ins_pipe( fpu_reg_reg ); 3631 %} 3632 3633 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3634 predicate(n->as_Vector()->length() == 8); 3635 match(Set dst (ReplicateS zero)); 3636 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3637 ins_encode %{ 3638 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3639 %} 3640 ins_pipe( fpu_reg_reg ); 3641 %} 3642 3643 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3644 predicate(n->as_Vector()->length() == 16); 3645 match(Set dst (ReplicateS zero)); 3646 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3647 ins_encode %{ 3648 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3649 int vector_len = 1; 3650 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3651 %} 3652 ins_pipe( fpu_reg_reg ); 3653 %} 3654 3655 // Replicate integer (4 byte) scalar to be vector 3656 instruct Repl2I(vecD dst, rRegI src) %{ 3657 predicate(n->as_Vector()->length() == 2); 3658 match(Set dst (ReplicateI src)); 3659 format %{ "movd $dst,$src\n\t" 3660 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3661 ins_encode %{ 3662 __ movdl($dst$$XMMRegister, $src$$Register); 3663 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3664 %} 3665 ins_pipe( fpu_reg_reg ); 3666 %} 3667 3668 // Integer could be loaded into xmm register directly from memory. 3669 instruct Repl2I_mem(vecD dst, memory mem) %{ 3670 predicate(n->as_Vector()->length() == 2); 3671 match(Set dst (ReplicateI (LoadI mem))); 3672 format %{ "movd $dst,$mem\n\t" 3673 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3674 ins_encode %{ 3675 __ movdl($dst$$XMMRegister, $mem$$Address); 3676 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3677 %} 3678 ins_pipe( fpu_reg_reg ); 3679 %} 3680 3681 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3682 instruct Repl2I_imm(vecD dst, immI con) %{ 3683 predicate(n->as_Vector()->length() == 2); 3684 match(Set dst (ReplicateI con)); 3685 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3686 ins_encode %{ 3687 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3688 %} 3689 ins_pipe( fpu_reg_reg ); 3690 %} 3691 3692 // Replicate integer (4 byte) scalar zero to be vector 3693 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3694 predicate(n->as_Vector()->length() == 2); 3695 match(Set dst (ReplicateI zero)); 3696 format %{ "pxor $dst,$dst\t! replicate2I" %} 3697 ins_encode %{ 3698 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3699 %} 3700 ins_pipe( fpu_reg_reg ); 3701 %} 3702 3703 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3704 predicate(n->as_Vector()->length() == 4); 3705 match(Set dst (ReplicateI zero)); 3706 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3707 ins_encode %{ 3708 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3709 %} 3710 ins_pipe( fpu_reg_reg ); 3711 %} 3712 3713 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3714 predicate(n->as_Vector()->length() == 8); 3715 match(Set dst (ReplicateI zero)); 3716 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3717 ins_encode %{ 3718 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3719 int vector_len = 1; 3720 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3721 %} 3722 ins_pipe( fpu_reg_reg ); 3723 %} 3724 3725 // Replicate long (8 byte) scalar to be vector 3726 #ifdef _LP64 3727 instruct Repl2L(vecX dst, rRegL src) %{ 3728 predicate(n->as_Vector()->length() == 2); 3729 match(Set dst (ReplicateL src)); 3730 format %{ "movdq $dst,$src\n\t" 3731 "punpcklqdq $dst,$dst\t! replicate2L" %} 3732 ins_encode %{ 3733 __ movdq($dst$$XMMRegister, $src$$Register); 3734 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3735 %} 3736 ins_pipe( pipe_slow ); 3737 %} 3738 #else // _LP64 3739 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3740 predicate(n->as_Vector()->length() == 2); 3741 match(Set dst (ReplicateL src)); 3742 effect(TEMP dst, USE src, TEMP tmp); 3743 format %{ "movdl $dst,$src.lo\n\t" 3744 "movdl $tmp,$src.hi\n\t" 3745 "punpckldq $dst,$tmp\n\t" 3746 "punpcklqdq $dst,$dst\t! replicate2L"%} 3747 ins_encode %{ 3748 __ movdl($dst$$XMMRegister, $src$$Register); 3749 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3750 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3751 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3752 %} 3753 ins_pipe( pipe_slow ); 3754 %} 3755 #endif // _LP64 3756 3757 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3758 instruct Repl2L_imm(vecX dst, immL con) %{ 3759 predicate(n->as_Vector()->length() == 2); 3760 match(Set dst (ReplicateL con)); 3761 format %{ "movq $dst,[$constantaddress]\n\t" 3762 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3763 ins_encode %{ 3764 __ movq($dst$$XMMRegister, $constantaddress($con)); 3765 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3766 %} 3767 ins_pipe( pipe_slow ); 3768 %} 3769 3770 // Replicate long (8 byte) scalar zero to be vector 3771 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3772 predicate(n->as_Vector()->length() == 2); 3773 match(Set dst (ReplicateL zero)); 3774 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3775 ins_encode %{ 3776 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3777 %} 3778 ins_pipe( fpu_reg_reg ); 3779 %} 3780 3781 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3782 predicate(n->as_Vector()->length() == 4); 3783 match(Set dst (ReplicateL zero)); 3784 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3785 ins_encode %{ 3786 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3787 int vector_len = 1; 3788 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3789 %} 3790 ins_pipe( fpu_reg_reg ); 3791 %} 3792 3793 // Replicate float (4 byte) scalar to be vector 3794 instruct Repl2F(vecD dst, regF src) %{ 3795 predicate(n->as_Vector()->length() == 2); 3796 match(Set dst (ReplicateF src)); 3797 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3798 ins_encode %{ 3799 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3800 %} 3801 ins_pipe( fpu_reg_reg ); 3802 %} 3803 3804 instruct Repl4F(vecX dst, regF src) %{ 3805 predicate(n->as_Vector()->length() == 4); 3806 match(Set dst (ReplicateF src)); 3807 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3808 ins_encode %{ 3809 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3810 %} 3811 ins_pipe( pipe_slow ); 3812 %} 3813 3814 // Replicate double (8 bytes) scalar to be vector 3815 instruct Repl2D(vecX dst, regD src) %{ 3816 predicate(n->as_Vector()->length() == 2); 3817 match(Set dst (ReplicateD src)); 3818 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3819 ins_encode %{ 3820 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3821 %} 3822 ins_pipe( pipe_slow ); 3823 %} 3824 3825 // ====================EVEX REPLICATE============================================= 3826 3827 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3828 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3829 match(Set dst (ReplicateB (LoadB mem))); 3830 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3831 ins_encode %{ 3832 int vector_len = 0; 3833 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3834 %} 3835 ins_pipe( pipe_slow ); 3836 %} 3837 3838 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3839 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3840 match(Set dst (ReplicateB (LoadB mem))); 3841 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3842 ins_encode %{ 3843 int vector_len = 0; 3844 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3845 %} 3846 ins_pipe( pipe_slow ); 3847 %} 3848 3849 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3850 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3851 match(Set dst (ReplicateB src)); 3852 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3853 ins_encode %{ 3854 int vector_len = 0; 3855 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3856 %} 3857 ins_pipe( pipe_slow ); 3858 %} 3859 3860 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3861 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3862 match(Set dst (ReplicateB (LoadB mem))); 3863 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3864 ins_encode %{ 3865 int vector_len = 0; 3866 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3867 %} 3868 ins_pipe( pipe_slow ); 3869 %} 3870 3871 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3872 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3873 match(Set dst (ReplicateB src)); 3874 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3875 ins_encode %{ 3876 int vector_len = 1; 3877 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3878 %} 3879 ins_pipe( pipe_slow ); 3880 %} 3881 3882 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3883 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3884 match(Set dst (ReplicateB (LoadB mem))); 3885 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3886 ins_encode %{ 3887 int vector_len = 1; 3888 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3889 %} 3890 ins_pipe( pipe_slow ); 3891 %} 3892 3893 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3894 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3895 match(Set dst (ReplicateB src)); 3896 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3897 ins_encode %{ 3898 int vector_len = 2; 3899 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3900 %} 3901 ins_pipe( pipe_slow ); 3902 %} 3903 3904 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3905 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3906 match(Set dst (ReplicateB (LoadB mem))); 3907 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3908 ins_encode %{ 3909 int vector_len = 2; 3910 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3911 %} 3912 ins_pipe( pipe_slow ); 3913 %} 3914 3915 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3916 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3917 match(Set dst (ReplicateB con)); 3918 format %{ "movq $dst,[$constantaddress]\n\t" 3919 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3920 ins_encode %{ 3921 int vector_len = 0; 3922 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3923 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3924 %} 3925 ins_pipe( pipe_slow ); 3926 %} 3927 3928 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3929 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3930 match(Set dst (ReplicateB con)); 3931 format %{ "movq $dst,[$constantaddress]\n\t" 3932 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3933 ins_encode %{ 3934 int vector_len = 1; 3935 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3936 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3937 %} 3938 ins_pipe( pipe_slow ); 3939 %} 3940 3941 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3942 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3943 match(Set dst (ReplicateB con)); 3944 format %{ "movq $dst,[$constantaddress]\n\t" 3945 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3946 ins_encode %{ 3947 int vector_len = 2; 3948 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3949 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3950 %} 3951 ins_pipe( pipe_slow ); 3952 %} 3953 3954 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3955 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3956 match(Set dst (ReplicateB zero)); 3957 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3958 ins_encode %{ 3959 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3960 int vector_len = 2; 3961 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3962 %} 3963 ins_pipe( fpu_reg_reg ); 3964 %} 3965 3966 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3967 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3968 match(Set dst (ReplicateS src)); 3969 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3970 ins_encode %{ 3971 int vector_len = 0; 3972 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3973 %} 3974 ins_pipe( pipe_slow ); 3975 %} 3976 3977 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3978 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3979 match(Set dst (ReplicateS (LoadS mem))); 3980 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3981 ins_encode %{ 3982 int vector_len = 0; 3983 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3984 %} 3985 ins_pipe( pipe_slow ); 3986 %} 3987 3988 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3989 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3990 match(Set dst (ReplicateS src)); 3991 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3992 ins_encode %{ 3993 int vector_len = 0; 3994 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3995 %} 3996 ins_pipe( pipe_slow ); 3997 %} 3998 3999 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4000 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4001 match(Set dst (ReplicateS (LoadS mem))); 4002 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4003 ins_encode %{ 4004 int vector_len = 0; 4005 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4006 %} 4007 ins_pipe( pipe_slow ); 4008 %} 4009 4010 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4011 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4012 match(Set dst (ReplicateS src)); 4013 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4014 ins_encode %{ 4015 int vector_len = 1; 4016 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4017 %} 4018 ins_pipe( pipe_slow ); 4019 %} 4020 4021 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4022 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4023 match(Set dst (ReplicateS (LoadS mem))); 4024 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4025 ins_encode %{ 4026 int vector_len = 1; 4027 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4028 %} 4029 ins_pipe( pipe_slow ); 4030 %} 4031 4032 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4033 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4034 match(Set dst (ReplicateS src)); 4035 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4036 ins_encode %{ 4037 int vector_len = 2; 4038 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4039 %} 4040 ins_pipe( pipe_slow ); 4041 %} 4042 4043 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4044 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4045 match(Set dst (ReplicateS (LoadS mem))); 4046 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4047 ins_encode %{ 4048 int vector_len = 2; 4049 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4050 %} 4051 ins_pipe( pipe_slow ); 4052 %} 4053 4054 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4055 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4056 match(Set dst (ReplicateS con)); 4057 format %{ "movq $dst,[$constantaddress]\n\t" 4058 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4059 ins_encode %{ 4060 int vector_len = 0; 4061 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4062 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4068 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4069 match(Set dst (ReplicateS con)); 4070 format %{ "movq $dst,[$constantaddress]\n\t" 4071 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4072 ins_encode %{ 4073 int vector_len = 1; 4074 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4075 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4076 %} 4077 ins_pipe( pipe_slow ); 4078 %} 4079 4080 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4081 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4082 match(Set dst (ReplicateS con)); 4083 format %{ "movq $dst,[$constantaddress]\n\t" 4084 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4085 ins_encode %{ 4086 int vector_len = 2; 4087 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4088 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4089 %} 4090 ins_pipe( pipe_slow ); 4091 %} 4092 4093 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4094 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4095 match(Set dst (ReplicateS zero)); 4096 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4097 ins_encode %{ 4098 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4099 int vector_len = 2; 4100 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4101 %} 4102 ins_pipe( fpu_reg_reg ); 4103 %} 4104 4105 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4106 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4107 match(Set dst (ReplicateI src)); 4108 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4109 ins_encode %{ 4110 int vector_len = 0; 4111 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4112 %} 4113 ins_pipe( pipe_slow ); 4114 %} 4115 4116 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4117 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4118 match(Set dst (ReplicateI (LoadI mem))); 4119 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4120 ins_encode %{ 4121 int vector_len = 0; 4122 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4123 %} 4124 ins_pipe( pipe_slow ); 4125 %} 4126 4127 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4128 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4129 match(Set dst (ReplicateI src)); 4130 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4131 ins_encode %{ 4132 int vector_len = 1; 4133 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4134 %} 4135 ins_pipe( pipe_slow ); 4136 %} 4137 4138 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4139 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4140 match(Set dst (ReplicateI (LoadI mem))); 4141 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4142 ins_encode %{ 4143 int vector_len = 1; 4144 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4145 %} 4146 ins_pipe( pipe_slow ); 4147 %} 4148 4149 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4150 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4151 match(Set dst (ReplicateI src)); 4152 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4153 ins_encode %{ 4154 int vector_len = 2; 4155 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4156 %} 4157 ins_pipe( pipe_slow ); 4158 %} 4159 4160 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4161 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4162 match(Set dst (ReplicateI (LoadI mem))); 4163 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4164 ins_encode %{ 4165 int vector_len = 2; 4166 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4167 %} 4168 ins_pipe( pipe_slow ); 4169 %} 4170 4171 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4172 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4173 match(Set dst (ReplicateI con)); 4174 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4175 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4176 ins_encode %{ 4177 int vector_len = 0; 4178 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4179 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4180 %} 4181 ins_pipe( pipe_slow ); 4182 %} 4183 4184 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4185 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4186 match(Set dst (ReplicateI con)); 4187 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4188 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4189 ins_encode %{ 4190 int vector_len = 1; 4191 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4192 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4198 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4199 match(Set dst (ReplicateI con)); 4200 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4201 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4202 ins_encode %{ 4203 int vector_len = 2; 4204 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4205 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4206 %} 4207 ins_pipe( pipe_slow ); 4208 %} 4209 4210 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4211 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4212 match(Set dst (ReplicateI zero)); 4213 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4214 ins_encode %{ 4215 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4216 int vector_len = 2; 4217 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4218 %} 4219 ins_pipe( fpu_reg_reg ); 4220 %} 4221 4222 // Replicate long (8 byte) scalar to be vector 4223 #ifdef _LP64 4224 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4225 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4226 match(Set dst (ReplicateL src)); 4227 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4228 ins_encode %{ 4229 int vector_len = 1; 4230 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4231 %} 4232 ins_pipe( pipe_slow ); 4233 %} 4234 4235 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4236 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4237 match(Set dst (ReplicateL src)); 4238 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4239 ins_encode %{ 4240 int vector_len = 2; 4241 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4242 %} 4243 ins_pipe( pipe_slow ); 4244 %} 4245 #else // _LP64 4246 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4247 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4248 match(Set dst (ReplicateL src)); 4249 effect(TEMP dst, USE src, TEMP tmp); 4250 format %{ "movdl $dst,$src.lo\n\t" 4251 "movdl $tmp,$src.hi\n\t" 4252 "punpckldq $dst,$tmp\n\t" 4253 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4254 ins_encode %{ 4255 int vector_len = 1; 4256 __ movdl($dst$$XMMRegister, $src$$Register); 4257 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4258 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4259 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4260 %} 4261 ins_pipe( pipe_slow ); 4262 %} 4263 4264 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4265 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4266 match(Set dst (ReplicateL src)); 4267 effect(TEMP dst, USE src, TEMP tmp); 4268 format %{ "movdl $dst,$src.lo\n\t" 4269 "movdl $tmp,$src.hi\n\t" 4270 "punpckldq $dst,$tmp\n\t" 4271 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4272 ins_encode %{ 4273 int vector_len = 2; 4274 __ movdl($dst$$XMMRegister, $src$$Register); 4275 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4276 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4277 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4278 %} 4279 ins_pipe( pipe_slow ); 4280 %} 4281 #endif // _LP64 4282 4283 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4284 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4285 match(Set dst (ReplicateL con)); 4286 format %{ "movq $dst,[$constantaddress]\n\t" 4287 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4288 ins_encode %{ 4289 int vector_len = 1; 4290 __ movq($dst$$XMMRegister, $constantaddress($con)); 4291 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4292 %} 4293 ins_pipe( pipe_slow ); 4294 %} 4295 4296 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4297 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4298 match(Set dst (ReplicateL con)); 4299 format %{ "movq $dst,[$constantaddress]\n\t" 4300 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4301 ins_encode %{ 4302 int vector_len = 2; 4303 __ movq($dst$$XMMRegister, $constantaddress($con)); 4304 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4305 %} 4306 ins_pipe( pipe_slow ); 4307 %} 4308 4309 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4310 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4311 match(Set dst (ReplicateL (LoadL mem))); 4312 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4313 ins_encode %{ 4314 int vector_len = 0; 4315 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4316 %} 4317 ins_pipe( pipe_slow ); 4318 %} 4319 4320 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4321 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4322 match(Set dst (ReplicateL (LoadL mem))); 4323 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4324 ins_encode %{ 4325 int vector_len = 1; 4326 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4327 %} 4328 ins_pipe( pipe_slow ); 4329 %} 4330 4331 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4332 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4333 match(Set dst (ReplicateL (LoadL mem))); 4334 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4335 ins_encode %{ 4336 int vector_len = 2; 4337 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4338 %} 4339 ins_pipe( pipe_slow ); 4340 %} 4341 4342 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4343 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4344 match(Set dst (ReplicateL zero)); 4345 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4346 ins_encode %{ 4347 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4348 int vector_len = 2; 4349 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4350 %} 4351 ins_pipe( fpu_reg_reg ); 4352 %} 4353 4354 instruct Repl8F_evex(vecY dst, regF src) %{ 4355 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4356 match(Set dst (ReplicateF src)); 4357 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4358 ins_encode %{ 4359 int vector_len = 1; 4360 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4361 %} 4362 ins_pipe( pipe_slow ); 4363 %} 4364 4365 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4366 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4367 match(Set dst (ReplicateF (LoadF mem))); 4368 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4369 ins_encode %{ 4370 int vector_len = 1; 4371 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4372 %} 4373 ins_pipe( pipe_slow ); 4374 %} 4375 4376 instruct Repl16F_evex(vecZ dst, regF src) %{ 4377 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4378 match(Set dst (ReplicateF src)); 4379 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4380 ins_encode %{ 4381 int vector_len = 2; 4382 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4383 %} 4384 ins_pipe( pipe_slow ); 4385 %} 4386 4387 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4388 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4389 match(Set dst (ReplicateF (LoadF mem))); 4390 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4391 ins_encode %{ 4392 int vector_len = 2; 4393 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4394 %} 4395 ins_pipe( pipe_slow ); 4396 %} 4397 4398 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4399 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4400 match(Set dst (ReplicateF zero)); 4401 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4402 ins_encode %{ 4403 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4404 int vector_len = 2; 4405 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4406 %} 4407 ins_pipe( fpu_reg_reg ); 4408 %} 4409 4410 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4411 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4412 match(Set dst (ReplicateF zero)); 4413 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4414 ins_encode %{ 4415 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4416 int vector_len = 2; 4417 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4418 %} 4419 ins_pipe( fpu_reg_reg ); 4420 %} 4421 4422 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4423 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4424 match(Set dst (ReplicateF zero)); 4425 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4426 ins_encode %{ 4427 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4428 int vector_len = 2; 4429 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4430 %} 4431 ins_pipe( fpu_reg_reg ); 4432 %} 4433 4434 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4435 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4436 match(Set dst (ReplicateF zero)); 4437 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4438 ins_encode %{ 4439 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4440 int vector_len = 2; 4441 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4442 %} 4443 ins_pipe( fpu_reg_reg ); 4444 %} 4445 4446 instruct Repl4D_evex(vecY dst, regD src) %{ 4447 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4448 match(Set dst (ReplicateD src)); 4449 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4450 ins_encode %{ 4451 int vector_len = 1; 4452 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4453 %} 4454 ins_pipe( pipe_slow ); 4455 %} 4456 4457 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4458 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4459 match(Set dst (ReplicateD (LoadD mem))); 4460 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4461 ins_encode %{ 4462 int vector_len = 1; 4463 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4464 %} 4465 ins_pipe( pipe_slow ); 4466 %} 4467 4468 instruct Repl8D_evex(vecZ dst, regD src) %{ 4469 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4470 match(Set dst (ReplicateD src)); 4471 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4472 ins_encode %{ 4473 int vector_len = 2; 4474 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4475 %} 4476 ins_pipe( pipe_slow ); 4477 %} 4478 4479 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4480 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4481 match(Set dst (ReplicateD (LoadD mem))); 4482 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4483 ins_encode %{ 4484 int vector_len = 2; 4485 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4486 %} 4487 ins_pipe( pipe_slow ); 4488 %} 4489 4490 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4491 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4492 match(Set dst (ReplicateD zero)); 4493 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4494 ins_encode %{ 4495 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4496 int vector_len = 2; 4497 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4498 %} 4499 ins_pipe( fpu_reg_reg ); 4500 %} 4501 4502 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4503 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4504 match(Set dst (ReplicateD zero)); 4505 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4506 ins_encode %{ 4507 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4508 int vector_len = 2; 4509 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4510 %} 4511 ins_pipe( fpu_reg_reg ); 4512 %} 4513 4514 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4515 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4516 match(Set dst (ReplicateD zero)); 4517 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4518 ins_encode %{ 4519 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4520 int vector_len = 2; 4521 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4522 %} 4523 ins_pipe( fpu_reg_reg ); 4524 %} 4525 4526 // ====================REDUCTION ARITHMETIC======================================= 4527 4528 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4529 predicate(UseSSE > 2 && UseAVX == 0); 4530 match(Set dst (AddReductionVI src1 src2)); 4531 effect(TEMP tmp2, TEMP tmp); 4532 format %{ "movdqu $tmp2,$src2\n\t" 4533 "phaddd $tmp2,$tmp2\n\t" 4534 "movd $tmp,$src1\n\t" 4535 "paddd $tmp,$tmp2\n\t" 4536 "movd $dst,$tmp\t! add reduction2I" %} 4537 ins_encode %{ 4538 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4539 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4540 __ movdl($tmp$$XMMRegister, $src1$$Register); 4541 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4542 __ movdl($dst$$Register, $tmp$$XMMRegister); 4543 %} 4544 ins_pipe( pipe_slow ); 4545 %} 4546 4547 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4548 predicate(VM_Version::supports_avxonly()); 4549 match(Set dst (AddReductionVI src1 src2)); 4550 effect(TEMP tmp, TEMP tmp2); 4551 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4552 "movd $tmp2,$src1\n\t" 4553 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4554 "movd $dst,$tmp2\t! add reduction2I" %} 4555 ins_encode %{ 4556 int vector_len = 0; 4557 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4558 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4559 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4560 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4561 %} 4562 ins_pipe( pipe_slow ); 4563 %} 4564 4565 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4566 predicate(UseAVX > 2); 4567 match(Set dst (AddReductionVI src1 src2)); 4568 effect(TEMP tmp, TEMP tmp2); 4569 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4570 "vpaddd $tmp,$src2,$tmp2\n\t" 4571 "movd $tmp2,$src1\n\t" 4572 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4573 "movd $dst,$tmp2\t! add reduction2I" %} 4574 ins_encode %{ 4575 int vector_len = 0; 4576 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4577 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4578 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4579 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4580 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4581 %} 4582 ins_pipe( pipe_slow ); 4583 %} 4584 4585 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4586 predicate(UseSSE > 2 && UseAVX == 0); 4587 match(Set dst (AddReductionVI src1 src2)); 4588 effect(TEMP tmp, TEMP tmp2); 4589 format %{ "movdqu $tmp,$src2\n\t" 4590 "phaddd $tmp,$tmp\n\t" 4591 "phaddd $tmp,$tmp\n\t" 4592 "movd $tmp2,$src1\n\t" 4593 "paddd $tmp2,$tmp\n\t" 4594 "movd $dst,$tmp2\t! add reduction4I" %} 4595 ins_encode %{ 4596 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4597 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4598 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4599 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4600 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4601 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4602 %} 4603 ins_pipe( pipe_slow ); 4604 %} 4605 4606 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4607 predicate(VM_Version::supports_avxonly()); 4608 match(Set dst (AddReductionVI src1 src2)); 4609 effect(TEMP tmp, TEMP tmp2); 4610 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4611 "vphaddd $tmp,$tmp,$tmp\n\t" 4612 "movd $tmp2,$src1\n\t" 4613 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4614 "movd $dst,$tmp2\t! add reduction4I" %} 4615 ins_encode %{ 4616 int vector_len = 0; 4617 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4618 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4619 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4620 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4621 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 4626 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4627 predicate(UseAVX > 2); 4628 match(Set dst (AddReductionVI src1 src2)); 4629 effect(TEMP tmp, TEMP tmp2); 4630 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4631 "vpaddd $tmp,$src2,$tmp2\n\t" 4632 "pshufd $tmp2,$tmp,0x1\n\t" 4633 "vpaddd $tmp,$tmp,$tmp2\n\t" 4634 "movd $tmp2,$src1\n\t" 4635 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4636 "movd $dst,$tmp2\t! add reduction4I" %} 4637 ins_encode %{ 4638 int vector_len = 0; 4639 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4640 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4641 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4642 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4643 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4644 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4645 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4646 %} 4647 ins_pipe( pipe_slow ); 4648 %} 4649 4650 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4651 predicate(VM_Version::supports_avxonly()); 4652 match(Set dst (AddReductionVI src1 src2)); 4653 effect(TEMP tmp, TEMP tmp2); 4654 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4655 "vphaddd $tmp,$tmp,$tmp2\n\t" 4656 "vextracti128_high $tmp2,$tmp\n\t" 4657 "vpaddd $tmp,$tmp,$tmp2\n\t" 4658 "movd $tmp2,$src1\n\t" 4659 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4660 "movd $dst,$tmp2\t! add reduction8I" %} 4661 ins_encode %{ 4662 int vector_len = 1; 4663 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4664 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4665 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4666 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4667 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4668 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4669 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4670 %} 4671 ins_pipe( pipe_slow ); 4672 %} 4673 4674 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4675 predicate(UseAVX > 2); 4676 match(Set dst (AddReductionVI src1 src2)); 4677 effect(TEMP tmp, TEMP tmp2); 4678 format %{ "vextracti128_high $tmp,$src2\n\t" 4679 "vpaddd $tmp,$tmp,$src2\n\t" 4680 "pshufd $tmp2,$tmp,0xE\n\t" 4681 "vpaddd $tmp,$tmp,$tmp2\n\t" 4682 "pshufd $tmp2,$tmp,0x1\n\t" 4683 "vpaddd $tmp,$tmp,$tmp2\n\t" 4684 "movd $tmp2,$src1\n\t" 4685 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4686 "movd $dst,$tmp2\t! add reduction8I" %} 4687 ins_encode %{ 4688 int vector_len = 0; 4689 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4690 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4691 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4692 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4693 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4694 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4695 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4696 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4697 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4698 %} 4699 ins_pipe( pipe_slow ); 4700 %} 4701 4702 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4703 predicate(UseAVX > 2); 4704 match(Set dst (AddReductionVI src1 src2)); 4705 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4706 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4707 "vpaddd $tmp3,$tmp3,$src2\n\t" 4708 "vextracti128_high $tmp,$tmp3\n\t" 4709 "vpaddd $tmp,$tmp,$tmp3\n\t" 4710 "pshufd $tmp2,$tmp,0xE\n\t" 4711 "vpaddd $tmp,$tmp,$tmp2\n\t" 4712 "pshufd $tmp2,$tmp,0x1\n\t" 4713 "vpaddd $tmp,$tmp,$tmp2\n\t" 4714 "movd $tmp2,$src1\n\t" 4715 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4716 "movd $dst,$tmp2\t! mul reduction16I" %} 4717 ins_encode %{ 4718 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4719 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4720 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4721 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4722 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4723 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4724 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4725 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4726 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4727 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4728 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4729 %} 4730 ins_pipe( pipe_slow ); 4731 %} 4732 4733 #ifdef _LP64 4734 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4735 predicate(UseAVX > 2); 4736 match(Set dst (AddReductionVL src1 src2)); 4737 effect(TEMP tmp, TEMP tmp2); 4738 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4739 "vpaddq $tmp,$src2,$tmp2\n\t" 4740 "movdq $tmp2,$src1\n\t" 4741 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4742 "movdq $dst,$tmp2\t! add reduction2L" %} 4743 ins_encode %{ 4744 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4745 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4746 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4747 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4748 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4749 %} 4750 ins_pipe( pipe_slow ); 4751 %} 4752 4753 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4754 predicate(UseAVX > 2); 4755 match(Set dst (AddReductionVL src1 src2)); 4756 effect(TEMP tmp, TEMP tmp2); 4757 format %{ "vextracti128_high $tmp,$src2\n\t" 4758 "vpaddq $tmp2,$tmp,$src2\n\t" 4759 "pshufd $tmp,$tmp2,0xE\n\t" 4760 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4761 "movdq $tmp,$src1\n\t" 4762 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4763 "movdq $dst,$tmp2\t! add reduction4L" %} 4764 ins_encode %{ 4765 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4766 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4767 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4768 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4769 __ movdq($tmp$$XMMRegister, $src1$$Register); 4770 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4771 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4772 %} 4773 ins_pipe( pipe_slow ); 4774 %} 4775 4776 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4777 predicate(UseAVX > 2); 4778 match(Set dst (AddReductionVL src1 src2)); 4779 effect(TEMP tmp, TEMP tmp2); 4780 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4781 "vpaddq $tmp2,$tmp2,$src2\n\t" 4782 "vextracti128_high $tmp,$tmp2\n\t" 4783 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4784 "pshufd $tmp,$tmp2,0xE\n\t" 4785 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4786 "movdq $tmp,$src1\n\t" 4787 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4788 "movdq $dst,$tmp2\t! add reduction8L" %} 4789 ins_encode %{ 4790 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4791 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4792 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 4793 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4794 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4795 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4796 __ movdq($tmp$$XMMRegister, $src1$$Register); 4797 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4798 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4799 %} 4800 ins_pipe( pipe_slow ); 4801 %} 4802 #endif 4803 4804 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4805 predicate(UseSSE >= 1 && UseAVX == 0); 4806 match(Set dst (AddReductionVF dst src2)); 4807 effect(TEMP dst, TEMP tmp); 4808 format %{ "addss $dst,$src2\n\t" 4809 "pshufd $tmp,$src2,0x01\n\t" 4810 "addss $dst,$tmp\t! add reduction2F" %} 4811 ins_encode %{ 4812 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4813 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4814 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4815 %} 4816 ins_pipe( pipe_slow ); 4817 %} 4818 4819 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4820 predicate(UseAVX > 0); 4821 match(Set dst (AddReductionVF dst src2)); 4822 effect(TEMP dst, TEMP tmp); 4823 format %{ "vaddss $dst,$dst,$src2\n\t" 4824 "pshufd $tmp,$src2,0x01\n\t" 4825 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4826 ins_encode %{ 4827 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4828 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4829 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4830 %} 4831 ins_pipe( pipe_slow ); 4832 %} 4833 4834 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4835 predicate(UseSSE >= 1 && UseAVX == 0); 4836 match(Set dst (AddReductionVF dst src2)); 4837 effect(TEMP dst, TEMP tmp); 4838 format %{ "addss $dst,$src2\n\t" 4839 "pshufd $tmp,$src2,0x01\n\t" 4840 "addss $dst,$tmp\n\t" 4841 "pshufd $tmp,$src2,0x02\n\t" 4842 "addss $dst,$tmp\n\t" 4843 "pshufd $tmp,$src2,0x03\n\t" 4844 "addss $dst,$tmp\t! add reduction4F" %} 4845 ins_encode %{ 4846 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4847 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4848 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4849 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4850 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4851 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4852 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4853 %} 4854 ins_pipe( pipe_slow ); 4855 %} 4856 4857 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4858 predicate(UseAVX > 0); 4859 match(Set dst (AddReductionVF dst src2)); 4860 effect(TEMP tmp, TEMP dst); 4861 format %{ "vaddss $dst,dst,$src2\n\t" 4862 "pshufd $tmp,$src2,0x01\n\t" 4863 "vaddss $dst,$dst,$tmp\n\t" 4864 "pshufd $tmp,$src2,0x02\n\t" 4865 "vaddss $dst,$dst,$tmp\n\t" 4866 "pshufd $tmp,$src2,0x03\n\t" 4867 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4868 ins_encode %{ 4869 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4870 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4871 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4872 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4873 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4874 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4875 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4876 %} 4877 ins_pipe( pipe_slow ); 4878 %} 4879 4880 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 4881 predicate(UseAVX > 0); 4882 match(Set dst (AddReductionVF dst src2)); 4883 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4884 format %{ "vaddss $dst,$dst,$src2\n\t" 4885 "pshufd $tmp,$src2,0x01\n\t" 4886 "vaddss $dst,$dst,$tmp\n\t" 4887 "pshufd $tmp,$src2,0x02\n\t" 4888 "vaddss $dst,$dst,$tmp\n\t" 4889 "pshufd $tmp,$src2,0x03\n\t" 4890 "vaddss $dst,$dst,$tmp\n\t" 4891 "vextractf128_high $tmp2,$src2\n\t" 4892 "vaddss $dst,$dst,$tmp2\n\t" 4893 "pshufd $tmp,$tmp2,0x01\n\t" 4894 "vaddss $dst,$dst,$tmp\n\t" 4895 "pshufd $tmp,$tmp2,0x02\n\t" 4896 "vaddss $dst,$dst,$tmp\n\t" 4897 "pshufd $tmp,$tmp2,0x03\n\t" 4898 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 4899 ins_encode %{ 4900 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4901 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4902 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4903 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4904 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4905 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4906 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4907 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4908 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4909 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4910 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4911 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4912 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4913 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4914 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4915 %} 4916 ins_pipe( pipe_slow ); 4917 %} 4918 4919 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 4920 predicate(UseAVX > 2); 4921 match(Set dst (AddReductionVF dst src2)); 4922 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4923 format %{ "vaddss $dst,$dst,$src2\n\t" 4924 "pshufd $tmp,$src2,0x01\n\t" 4925 "vaddss $dst,$dst,$tmp\n\t" 4926 "pshufd $tmp,$src2,0x02\n\t" 4927 "vaddss $dst,$dst,$tmp\n\t" 4928 "pshufd $tmp,$src2,0x03\n\t" 4929 "vaddss $dst,$dst,$tmp\n\t" 4930 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4931 "vaddss $dst,$dst,$tmp2\n\t" 4932 "pshufd $tmp,$tmp2,0x01\n\t" 4933 "vaddss $dst,$dst,$tmp\n\t" 4934 "pshufd $tmp,$tmp2,0x02\n\t" 4935 "vaddss $dst,$dst,$tmp\n\t" 4936 "pshufd $tmp,$tmp2,0x03\n\t" 4937 "vaddss $dst,$dst,$tmp\n\t" 4938 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4939 "vaddss $dst,$dst,$tmp2\n\t" 4940 "pshufd $tmp,$tmp2,0x01\n\t" 4941 "vaddss $dst,$dst,$tmp\n\t" 4942 "pshufd $tmp,$tmp2,0x02\n\t" 4943 "vaddss $dst,$dst,$tmp\n\t" 4944 "pshufd $tmp,$tmp2,0x03\n\t" 4945 "vaddss $dst,$dst,$tmp\n\t" 4946 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4947 "vaddss $dst,$dst,$tmp2\n\t" 4948 "pshufd $tmp,$tmp2,0x01\n\t" 4949 "vaddss $dst,$dst,$tmp\n\t" 4950 "pshufd $tmp,$tmp2,0x02\n\t" 4951 "vaddss $dst,$dst,$tmp\n\t" 4952 "pshufd $tmp,$tmp2,0x03\n\t" 4953 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 4954 ins_encode %{ 4955 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4956 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4957 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4958 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4959 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4960 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4961 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4962 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4963 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4964 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4965 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4966 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4967 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4968 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4969 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4970 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4971 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4972 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4973 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4974 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4975 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4976 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4977 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4978 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4979 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4980 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4981 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4982 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4983 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4984 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4985 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4986 %} 4987 ins_pipe( pipe_slow ); 4988 %} 4989 4990 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4991 predicate(UseSSE >= 1 && UseAVX == 0); 4992 match(Set dst (AddReductionVD dst src2)); 4993 effect(TEMP tmp, TEMP dst); 4994 format %{ "addsd $dst,$src2\n\t" 4995 "pshufd $tmp,$src2,0xE\n\t" 4996 "addsd $dst,$tmp\t! add reduction2D" %} 4997 ins_encode %{ 4998 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 4999 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5000 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5001 %} 5002 ins_pipe( pipe_slow ); 5003 %} 5004 5005 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5006 predicate(UseAVX > 0); 5007 match(Set dst (AddReductionVD dst src2)); 5008 effect(TEMP tmp, TEMP dst); 5009 format %{ "vaddsd $dst,$dst,$src2\n\t" 5010 "pshufd $tmp,$src2,0xE\n\t" 5011 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5012 ins_encode %{ 5013 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5014 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5015 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5016 %} 5017 ins_pipe( pipe_slow ); 5018 %} 5019 5020 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5021 predicate(UseAVX > 0); 5022 match(Set dst (AddReductionVD dst src2)); 5023 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5024 format %{ "vaddsd $dst,$dst,$src2\n\t" 5025 "pshufd $tmp,$src2,0xE\n\t" 5026 "vaddsd $dst,$dst,$tmp\n\t" 5027 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5028 "vaddsd $dst,$dst,$tmp2\n\t" 5029 "pshufd $tmp,$tmp2,0xE\n\t" 5030 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5031 ins_encode %{ 5032 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5034 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5035 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5036 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5037 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5038 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5039 %} 5040 ins_pipe( pipe_slow ); 5041 %} 5042 5043 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5044 predicate(UseAVX > 2); 5045 match(Set dst (AddReductionVD dst src2)); 5046 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5047 format %{ "vaddsd $dst,$dst,$src2\n\t" 5048 "pshufd $tmp,$src2,0xE\n\t" 5049 "vaddsd $dst,$dst,$tmp\n\t" 5050 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5051 "vaddsd $dst,$dst,$tmp2\n\t" 5052 "pshufd $tmp,$tmp2,0xE\n\t" 5053 "vaddsd $dst,$dst,$tmp\n\t" 5054 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5055 "vaddsd $dst,$dst,$tmp2\n\t" 5056 "pshufd $tmp,$tmp2,0xE\n\t" 5057 "vaddsd $dst,$dst,$tmp\n\t" 5058 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5059 "vaddsd $dst,$dst,$tmp2\n\t" 5060 "pshufd $tmp,$tmp2,0xE\n\t" 5061 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5062 ins_encode %{ 5063 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5064 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5065 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5066 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5067 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5068 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5069 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5070 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5071 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5072 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5073 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5074 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5075 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5076 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5077 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5078 %} 5079 ins_pipe( pipe_slow ); 5080 %} 5081 5082 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5083 predicate(UseSSE > 3 && UseAVX == 0); 5084 match(Set dst (MulReductionVI src1 src2)); 5085 effect(TEMP tmp, TEMP tmp2); 5086 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5087 "pmulld $tmp2,$src2\n\t" 5088 "movd $tmp,$src1\n\t" 5089 "pmulld $tmp2,$tmp\n\t" 5090 "movd $dst,$tmp2\t! mul reduction2I" %} 5091 ins_encode %{ 5092 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5093 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5094 __ movdl($tmp$$XMMRegister, $src1$$Register); 5095 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5096 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5097 %} 5098 ins_pipe( pipe_slow ); 5099 %} 5100 5101 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5102 predicate(UseAVX > 0); 5103 match(Set dst (MulReductionVI src1 src2)); 5104 effect(TEMP tmp, TEMP tmp2); 5105 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5106 "vpmulld $tmp,$src2,$tmp2\n\t" 5107 "movd $tmp2,$src1\n\t" 5108 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5109 "movd $dst,$tmp2\t! mul reduction2I" %} 5110 ins_encode %{ 5111 int vector_len = 0; 5112 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5113 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5114 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5115 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5116 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5117 %} 5118 ins_pipe( pipe_slow ); 5119 %} 5120 5121 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5122 predicate(UseSSE > 3 && UseAVX == 0); 5123 match(Set dst (MulReductionVI src1 src2)); 5124 effect(TEMP tmp, TEMP tmp2); 5125 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5126 "pmulld $tmp2,$src2\n\t" 5127 "pshufd $tmp,$tmp2,0x1\n\t" 5128 "pmulld $tmp2,$tmp\n\t" 5129 "movd $tmp,$src1\n\t" 5130 "pmulld $tmp2,$tmp\n\t" 5131 "movd $dst,$tmp2\t! mul reduction4I" %} 5132 ins_encode %{ 5133 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5134 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5135 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5136 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5137 __ movdl($tmp$$XMMRegister, $src1$$Register); 5138 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5139 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5140 %} 5141 ins_pipe( pipe_slow ); 5142 %} 5143 5144 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5145 predicate(UseAVX > 0); 5146 match(Set dst (MulReductionVI src1 src2)); 5147 effect(TEMP tmp, TEMP tmp2); 5148 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5149 "vpmulld $tmp,$src2,$tmp2\n\t" 5150 "pshufd $tmp2,$tmp,0x1\n\t" 5151 "vpmulld $tmp,$tmp,$tmp2\n\t" 5152 "movd $tmp2,$src1\n\t" 5153 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5154 "movd $dst,$tmp2\t! mul reduction4I" %} 5155 ins_encode %{ 5156 int vector_len = 0; 5157 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5158 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5159 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5160 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5161 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5162 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5163 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5164 %} 5165 ins_pipe( pipe_slow ); 5166 %} 5167 5168 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5169 predicate(UseAVX > 0); 5170 match(Set dst (MulReductionVI src1 src2)); 5171 effect(TEMP tmp, TEMP tmp2); 5172 format %{ "vextracti128_high $tmp,$src2\n\t" 5173 "vpmulld $tmp,$tmp,$src2\n\t" 5174 "pshufd $tmp2,$tmp,0xE\n\t" 5175 "vpmulld $tmp,$tmp,$tmp2\n\t" 5176 "pshufd $tmp2,$tmp,0x1\n\t" 5177 "vpmulld $tmp,$tmp,$tmp2\n\t" 5178 "movd $tmp2,$src1\n\t" 5179 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5180 "movd $dst,$tmp2\t! mul reduction8I" %} 5181 ins_encode %{ 5182 int vector_len = 0; 5183 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5184 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5185 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5186 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5187 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5188 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5189 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5190 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5191 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5192 %} 5193 ins_pipe( pipe_slow ); 5194 %} 5195 5196 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5197 predicate(UseAVX > 2); 5198 match(Set dst (MulReductionVI src1 src2)); 5199 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5200 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5201 "vpmulld $tmp3,$tmp3,$src2\n\t" 5202 "vextracti128_high $tmp,$tmp3\n\t" 5203 "vpmulld $tmp,$tmp,$src2\n\t" 5204 "pshufd $tmp2,$tmp,0xE\n\t" 5205 "vpmulld $tmp,$tmp,$tmp2\n\t" 5206 "pshufd $tmp2,$tmp,0x1\n\t" 5207 "vpmulld $tmp,$tmp,$tmp2\n\t" 5208 "movd $tmp2,$src1\n\t" 5209 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5210 "movd $dst,$tmp2\t! mul reduction16I" %} 5211 ins_encode %{ 5212 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5213 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5214 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5215 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5216 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5217 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5218 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5219 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5220 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5221 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5222 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5223 %} 5224 ins_pipe( pipe_slow ); 5225 %} 5226 5227 #ifdef _LP64 5228 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5229 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5230 match(Set dst (MulReductionVL src1 src2)); 5231 effect(TEMP tmp, TEMP tmp2); 5232 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5233 "vpmullq $tmp,$src2,$tmp2\n\t" 5234 "movdq $tmp2,$src1\n\t" 5235 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5236 "movdq $dst,$tmp2\t! mul reduction2L" %} 5237 ins_encode %{ 5238 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5239 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5240 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5241 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5242 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5243 %} 5244 ins_pipe( pipe_slow ); 5245 %} 5246 5247 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5248 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5249 match(Set dst (MulReductionVL src1 src2)); 5250 effect(TEMP tmp, TEMP tmp2); 5251 format %{ "vextracti128_high $tmp,$src2\n\t" 5252 "vpmullq $tmp2,$tmp,$src2\n\t" 5253 "pshufd $tmp,$tmp2,0xE\n\t" 5254 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5255 "movdq $tmp,$src1\n\t" 5256 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5257 "movdq $dst,$tmp2\t! mul reduction4L" %} 5258 ins_encode %{ 5259 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5260 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5261 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5262 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5263 __ movdq($tmp$$XMMRegister, $src1$$Register); 5264 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5265 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5266 %} 5267 ins_pipe( pipe_slow ); 5268 %} 5269 5270 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5271 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5272 match(Set dst (MulReductionVL src1 src2)); 5273 effect(TEMP tmp, TEMP tmp2); 5274 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5275 "vpmullq $tmp2,$tmp2,$src2\n\t" 5276 "vextracti128_high $tmp,$tmp2\n\t" 5277 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5278 "pshufd $tmp,$tmp2,0xE\n\t" 5279 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5280 "movdq $tmp,$src1\n\t" 5281 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5282 "movdq $dst,$tmp2\t! mul reduction8L" %} 5283 ins_encode %{ 5284 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5285 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5286 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5287 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5288 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5289 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5290 __ movdq($tmp$$XMMRegister, $src1$$Register); 5291 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5292 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5293 %} 5294 ins_pipe( pipe_slow ); 5295 %} 5296 #endif 5297 5298 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5299 predicate(UseSSE >= 1 && UseAVX == 0); 5300 match(Set dst (MulReductionVF dst src2)); 5301 effect(TEMP dst, TEMP tmp); 5302 format %{ "mulss $dst,$src2\n\t" 5303 "pshufd $tmp,$src2,0x01\n\t" 5304 "mulss $dst,$tmp\t! mul reduction2F" %} 5305 ins_encode %{ 5306 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5307 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5308 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5309 %} 5310 ins_pipe( pipe_slow ); 5311 %} 5312 5313 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5314 predicate(UseAVX > 0); 5315 match(Set dst (MulReductionVF dst src2)); 5316 effect(TEMP tmp, TEMP dst); 5317 format %{ "vmulss $dst,$dst,$src2\n\t" 5318 "pshufd $tmp,$src2,0x01\n\t" 5319 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5320 ins_encode %{ 5321 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5322 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5323 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5324 %} 5325 ins_pipe( pipe_slow ); 5326 %} 5327 5328 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5329 predicate(UseSSE >= 1 && UseAVX == 0); 5330 match(Set dst (MulReductionVF dst src2)); 5331 effect(TEMP dst, TEMP tmp); 5332 format %{ "mulss $dst,$src2\n\t" 5333 "pshufd $tmp,$src2,0x01\n\t" 5334 "mulss $dst,$tmp\n\t" 5335 "pshufd $tmp,$src2,0x02\n\t" 5336 "mulss $dst,$tmp\n\t" 5337 "pshufd $tmp,$src2,0x03\n\t" 5338 "mulss $dst,$tmp\t! mul reduction4F" %} 5339 ins_encode %{ 5340 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5341 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5342 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5343 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5344 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5345 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5346 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5347 %} 5348 ins_pipe( pipe_slow ); 5349 %} 5350 5351 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5352 predicate(UseAVX > 0); 5353 match(Set dst (MulReductionVF dst src2)); 5354 effect(TEMP tmp, TEMP dst); 5355 format %{ "vmulss $dst,$dst,$src2\n\t" 5356 "pshufd $tmp,$src2,0x01\n\t" 5357 "vmulss $dst,$dst,$tmp\n\t" 5358 "pshufd $tmp,$src2,0x02\n\t" 5359 "vmulss $dst,$dst,$tmp\n\t" 5360 "pshufd $tmp,$src2,0x03\n\t" 5361 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5362 ins_encode %{ 5363 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5364 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5365 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5366 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5367 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5368 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5369 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5370 %} 5371 ins_pipe( pipe_slow ); 5372 %} 5373 5374 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5375 predicate(UseAVX > 0); 5376 match(Set dst (MulReductionVF dst src2)); 5377 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5378 format %{ "vmulss $dst,$dst,$src2\n\t" 5379 "pshufd $tmp,$src2,0x01\n\t" 5380 "vmulss $dst,$dst,$tmp\n\t" 5381 "pshufd $tmp,$src2,0x02\n\t" 5382 "vmulss $dst,$dst,$tmp\n\t" 5383 "pshufd $tmp,$src2,0x03\n\t" 5384 "vmulss $dst,$dst,$tmp\n\t" 5385 "vextractf128_high $tmp2,$src2\n\t" 5386 "vmulss $dst,$dst,$tmp2\n\t" 5387 "pshufd $tmp,$tmp2,0x01\n\t" 5388 "vmulss $dst,$dst,$tmp\n\t" 5389 "pshufd $tmp,$tmp2,0x02\n\t" 5390 "vmulss $dst,$dst,$tmp\n\t" 5391 "pshufd $tmp,$tmp2,0x03\n\t" 5392 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5393 ins_encode %{ 5394 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5395 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5396 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5397 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5398 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5399 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5400 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5401 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5402 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5403 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5404 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5405 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5406 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5407 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5408 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5409 %} 5410 ins_pipe( pipe_slow ); 5411 %} 5412 5413 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5414 predicate(UseAVX > 2); 5415 match(Set dst (MulReductionVF dst src2)); 5416 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5417 format %{ "vmulss $dst,$dst,$src2\n\t" 5418 "pshufd $tmp,$src2,0x01\n\t" 5419 "vmulss $dst,$dst,$tmp\n\t" 5420 "pshufd $tmp,$src2,0x02\n\t" 5421 "vmulss $dst,$dst,$tmp\n\t" 5422 "pshufd $tmp,$src2,0x03\n\t" 5423 "vmulss $dst,$dst,$tmp\n\t" 5424 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5425 "vmulss $dst,$dst,$tmp2\n\t" 5426 "pshufd $tmp,$tmp2,0x01\n\t" 5427 "vmulss $dst,$dst,$tmp\n\t" 5428 "pshufd $tmp,$tmp2,0x02\n\t" 5429 "vmulss $dst,$dst,$tmp\n\t" 5430 "pshufd $tmp,$tmp2,0x03\n\t" 5431 "vmulss $dst,$dst,$tmp\n\t" 5432 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5433 "vmulss $dst,$dst,$tmp2\n\t" 5434 "pshufd $tmp,$tmp2,0x01\n\t" 5435 "vmulss $dst,$dst,$tmp\n\t" 5436 "pshufd $tmp,$tmp2,0x02\n\t" 5437 "vmulss $dst,$dst,$tmp\n\t" 5438 "pshufd $tmp,$tmp2,0x03\n\t" 5439 "vmulss $dst,$dst,$tmp\n\t" 5440 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5441 "vmulss $dst,$dst,$tmp2\n\t" 5442 "pshufd $tmp,$tmp2,0x01\n\t" 5443 "vmulss $dst,$dst,$tmp\n\t" 5444 "pshufd $tmp,$tmp2,0x02\n\t" 5445 "vmulss $dst,$dst,$tmp\n\t" 5446 "pshufd $tmp,$tmp2,0x03\n\t" 5447 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5448 ins_encode %{ 5449 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5450 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5451 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5452 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5453 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5454 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5455 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5456 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5457 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5458 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5459 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5460 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5461 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5462 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5463 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5464 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5465 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5466 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5467 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5468 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5469 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5470 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5471 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5472 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5473 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5474 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5475 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5476 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5477 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5478 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5479 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5480 %} 5481 ins_pipe( pipe_slow ); 5482 %} 5483 5484 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5485 predicate(UseSSE >= 1 && UseAVX == 0); 5486 match(Set dst (MulReductionVD dst src2)); 5487 effect(TEMP dst, TEMP tmp); 5488 format %{ "mulsd $dst,$src2\n\t" 5489 "pshufd $tmp,$src2,0xE\n\t" 5490 "mulsd $dst,$tmp\t! mul reduction2D" %} 5491 ins_encode %{ 5492 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5493 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5494 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5500 predicate(UseAVX > 0); 5501 match(Set dst (MulReductionVD dst src2)); 5502 effect(TEMP tmp, TEMP dst); 5503 format %{ "vmulsd $dst,$dst,$src2\n\t" 5504 "pshufd $tmp,$src2,0xE\n\t" 5505 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5506 ins_encode %{ 5507 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5508 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5509 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5510 %} 5511 ins_pipe( pipe_slow ); 5512 %} 5513 5514 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5515 predicate(UseAVX > 0); 5516 match(Set dst (MulReductionVD dst src2)); 5517 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5518 format %{ "vmulsd $dst,$dst,$src2\n\t" 5519 "pshufd $tmp,$src2,0xE\n\t" 5520 "vmulsd $dst,$dst,$tmp\n\t" 5521 "vextractf128_high $tmp2,$src2\n\t" 5522 "vmulsd $dst,$dst,$tmp2\n\t" 5523 "pshufd $tmp,$tmp2,0xE\n\t" 5524 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5525 ins_encode %{ 5526 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5527 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5528 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5529 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5530 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5531 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5532 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5533 %} 5534 ins_pipe( pipe_slow ); 5535 %} 5536 5537 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5538 predicate(UseAVX > 2); 5539 match(Set dst (MulReductionVD dst src2)); 5540 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5541 format %{ "vmulsd $dst,$dst,$src2\n\t" 5542 "pshufd $tmp,$src2,0xE\n\t" 5543 "vmulsd $dst,$dst,$tmp\n\t" 5544 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5545 "vmulsd $dst,$dst,$tmp2\n\t" 5546 "pshufd $tmp,$src2,0xE\n\t" 5547 "vmulsd $dst,$dst,$tmp\n\t" 5548 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5549 "vmulsd $dst,$dst,$tmp2\n\t" 5550 "pshufd $tmp,$tmp2,0xE\n\t" 5551 "vmulsd $dst,$dst,$tmp\n\t" 5552 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5553 "vmulsd $dst,$dst,$tmp2\n\t" 5554 "pshufd $tmp,$tmp2,0xE\n\t" 5555 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5556 ins_encode %{ 5557 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5558 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5559 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5560 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5561 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5562 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5563 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5564 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5565 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5566 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5567 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5568 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5569 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5570 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5571 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5572 %} 5573 ins_pipe( pipe_slow ); 5574 %} 5575 5576 // ====================VECTOR ARITHMETIC======================================= 5577 5578 // --------------------------------- ADD -------------------------------------- 5579 5580 // Bytes vector add 5581 instruct vadd4B(vecS dst, vecS src) %{ 5582 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5583 match(Set dst (AddVB dst src)); 5584 format %{ "paddb $dst,$src\t! add packed4B" %} 5585 ins_encode %{ 5586 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5587 %} 5588 ins_pipe( pipe_slow ); 5589 %} 5590 5591 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5592 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5593 match(Set dst (AddVB src1 src2)); 5594 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5595 ins_encode %{ 5596 int vector_len = 0; 5597 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5598 %} 5599 ins_pipe( pipe_slow ); 5600 %} 5601 5602 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5603 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5604 match(Set dst (AddVB src1 src2)); 5605 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5606 ins_encode %{ 5607 int vector_len = 0; 5608 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5609 %} 5610 ins_pipe( pipe_slow ); 5611 %} 5612 5613 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5614 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5615 match(Set dst (AddVB dst src2)); 5616 effect(TEMP src1); 5617 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5618 ins_encode %{ 5619 int vector_len = 0; 5620 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5626 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5627 match(Set dst (AddVB src (LoadVector mem))); 5628 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5629 ins_encode %{ 5630 int vector_len = 0; 5631 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5637 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5638 match(Set dst (AddVB src (LoadVector mem))); 5639 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5640 ins_encode %{ 5641 int vector_len = 0; 5642 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5643 %} 5644 ins_pipe( pipe_slow ); 5645 %} 5646 5647 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5648 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5649 match(Set dst (AddVB dst (LoadVector mem))); 5650 effect(TEMP src); 5651 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5652 ins_encode %{ 5653 int vector_len = 0; 5654 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5655 %} 5656 ins_pipe( pipe_slow ); 5657 %} 5658 5659 instruct vadd8B(vecD dst, vecD src) %{ 5660 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5661 match(Set dst (AddVB dst src)); 5662 format %{ "paddb $dst,$src\t! add packed8B" %} 5663 ins_encode %{ 5664 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5665 %} 5666 ins_pipe( pipe_slow ); 5667 %} 5668 5669 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5670 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5671 match(Set dst (AddVB src1 src2)); 5672 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5673 ins_encode %{ 5674 int vector_len = 0; 5675 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5676 %} 5677 ins_pipe( pipe_slow ); 5678 %} 5679 5680 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5681 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5682 match(Set dst (AddVB src1 src2)); 5683 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5684 ins_encode %{ 5685 int vector_len = 0; 5686 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5687 %} 5688 ins_pipe( pipe_slow ); 5689 %} 5690 5691 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5692 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5693 match(Set dst (AddVB dst src2)); 5694 effect(TEMP src1); 5695 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5696 ins_encode %{ 5697 int vector_len = 0; 5698 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5699 %} 5700 ins_pipe( pipe_slow ); 5701 %} 5702 5703 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5704 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5705 match(Set dst (AddVB src (LoadVector mem))); 5706 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5707 ins_encode %{ 5708 int vector_len = 0; 5709 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5710 %} 5711 ins_pipe( pipe_slow ); 5712 %} 5713 5714 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5715 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5716 match(Set dst (AddVB src (LoadVector mem))); 5717 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5718 ins_encode %{ 5719 int vector_len = 0; 5720 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5721 %} 5722 ins_pipe( pipe_slow ); 5723 %} 5724 5725 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5726 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5727 match(Set dst (AddVB dst (LoadVector mem))); 5728 effect(TEMP src); 5729 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5730 ins_encode %{ 5731 int vector_len = 0; 5732 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5733 %} 5734 ins_pipe( pipe_slow ); 5735 %} 5736 5737 instruct vadd16B(vecX dst, vecX src) %{ 5738 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5739 match(Set dst (AddVB dst src)); 5740 format %{ "paddb $dst,$src\t! add packed16B" %} 5741 ins_encode %{ 5742 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5743 %} 5744 ins_pipe( pipe_slow ); 5745 %} 5746 5747 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5748 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5749 match(Set dst (AddVB src1 src2)); 5750 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5751 ins_encode %{ 5752 int vector_len = 0; 5753 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5754 %} 5755 ins_pipe( pipe_slow ); 5756 %} 5757 5758 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5759 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5760 match(Set dst (AddVB src1 src2)); 5761 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5762 ins_encode %{ 5763 int vector_len = 0; 5764 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5765 %} 5766 ins_pipe( pipe_slow ); 5767 %} 5768 5769 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5770 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5771 match(Set dst (AddVB dst src2)); 5772 effect(TEMP src1); 5773 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5774 ins_encode %{ 5775 int vector_len = 0; 5776 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5782 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5783 match(Set dst (AddVB src (LoadVector mem))); 5784 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5785 ins_encode %{ 5786 int vector_len = 0; 5787 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5793 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5794 match(Set dst (AddVB src (LoadVector mem))); 5795 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5796 ins_encode %{ 5797 int vector_len = 0; 5798 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5799 %} 5800 ins_pipe( pipe_slow ); 5801 %} 5802 5803 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5804 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5805 match(Set dst (AddVB dst (LoadVector mem))); 5806 effect(TEMP src); 5807 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5808 ins_encode %{ 5809 int vector_len = 0; 5810 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5811 %} 5812 ins_pipe( pipe_slow ); 5813 %} 5814 5815 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5816 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5817 match(Set dst (AddVB src1 src2)); 5818 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5819 ins_encode %{ 5820 int vector_len = 1; 5821 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5822 %} 5823 ins_pipe( pipe_slow ); 5824 %} 5825 5826 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5827 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5828 match(Set dst (AddVB src1 src2)); 5829 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5830 ins_encode %{ 5831 int vector_len = 1; 5832 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5833 %} 5834 ins_pipe( pipe_slow ); 5835 %} 5836 5837 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5838 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5839 match(Set dst (AddVB dst src2)); 5840 effect(TEMP src1); 5841 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5842 ins_encode %{ 5843 int vector_len = 1; 5844 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5845 %} 5846 ins_pipe( pipe_slow ); 5847 %} 5848 5849 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5850 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5851 match(Set dst (AddVB src (LoadVector mem))); 5852 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5853 ins_encode %{ 5854 int vector_len = 1; 5855 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5856 %} 5857 ins_pipe( pipe_slow ); 5858 %} 5859 5860 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5861 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5862 match(Set dst (AddVB src (LoadVector mem))); 5863 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5864 ins_encode %{ 5865 int vector_len = 1; 5866 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5867 %} 5868 ins_pipe( pipe_slow ); 5869 %} 5870 5871 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 5872 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5873 match(Set dst (AddVB dst (LoadVector mem))); 5874 effect(TEMP src); 5875 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5876 ins_encode %{ 5877 int vector_len = 1; 5878 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5879 %} 5880 ins_pipe( pipe_slow ); 5881 %} 5882 5883 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5884 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5885 match(Set dst (AddVB src1 src2)); 5886 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5887 ins_encode %{ 5888 int vector_len = 2; 5889 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5890 %} 5891 ins_pipe( pipe_slow ); 5892 %} 5893 5894 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5895 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5896 match(Set dst (AddVB src (LoadVector mem))); 5897 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5898 ins_encode %{ 5899 int vector_len = 2; 5900 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5901 %} 5902 ins_pipe( pipe_slow ); 5903 %} 5904 5905 // Shorts/Chars vector add 5906 instruct vadd2S(vecS dst, vecS src) %{ 5907 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 5908 match(Set dst (AddVS dst src)); 5909 format %{ "paddw $dst,$src\t! add packed2S" %} 5910 ins_encode %{ 5911 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5912 %} 5913 ins_pipe( pipe_slow ); 5914 %} 5915 5916 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5917 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5918 match(Set dst (AddVS src1 src2)); 5919 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5920 ins_encode %{ 5921 int vector_len = 0; 5922 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5923 %} 5924 ins_pipe( pipe_slow ); 5925 %} 5926 5927 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5928 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5929 match(Set dst (AddVS src1 src2)); 5930 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5931 ins_encode %{ 5932 int vector_len = 0; 5933 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5934 %} 5935 ins_pipe( pipe_slow ); 5936 %} 5937 5938 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5939 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 5940 match(Set dst (AddVS dst src2)); 5941 effect(TEMP src1); 5942 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 5943 ins_encode %{ 5944 int vector_len = 0; 5945 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5946 %} 5947 ins_pipe( pipe_slow ); 5948 %} 5949 5950 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 5951 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5952 match(Set dst (AddVS src (LoadVector mem))); 5953 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5954 ins_encode %{ 5955 int vector_len = 0; 5956 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5957 %} 5958 ins_pipe( pipe_slow ); 5959 %} 5960 5961 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 5962 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5963 match(Set dst (AddVS src (LoadVector mem))); 5964 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5965 ins_encode %{ 5966 int vector_len = 0; 5967 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5968 %} 5969 ins_pipe( pipe_slow ); 5970 %} 5971 5972 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5973 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 5974 match(Set dst (AddVS dst (LoadVector mem))); 5975 effect(TEMP src); 5976 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5977 ins_encode %{ 5978 int vector_len = 0; 5979 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5980 %} 5981 ins_pipe( pipe_slow ); 5982 %} 5983 5984 instruct vadd4S(vecD dst, vecD src) %{ 5985 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5986 match(Set dst (AddVS dst src)); 5987 format %{ "paddw $dst,$src\t! add packed4S" %} 5988 ins_encode %{ 5989 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5995 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5996 match(Set dst (AddVS src1 src2)); 5997 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5998 ins_encode %{ 5999 int vector_len = 0; 6000 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6001 %} 6002 ins_pipe( pipe_slow ); 6003 %} 6004 6005 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6006 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6007 match(Set dst (AddVS src1 src2)); 6008 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6009 ins_encode %{ 6010 int vector_len = 0; 6011 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6012 %} 6013 ins_pipe( pipe_slow ); 6014 %} 6015 6016 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6017 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6018 match(Set dst (AddVS dst src2)); 6019 effect(TEMP src1); 6020 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6021 ins_encode %{ 6022 int vector_len = 0; 6023 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6024 %} 6025 ins_pipe( pipe_slow ); 6026 %} 6027 6028 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6029 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6030 match(Set dst (AddVS src (LoadVector mem))); 6031 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6032 ins_encode %{ 6033 int vector_len = 0; 6034 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6035 %} 6036 ins_pipe( pipe_slow ); 6037 %} 6038 6039 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6040 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6041 match(Set dst (AddVS src (LoadVector mem))); 6042 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6043 ins_encode %{ 6044 int vector_len = 0; 6045 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6046 %} 6047 ins_pipe( pipe_slow ); 6048 %} 6049 6050 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6051 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6052 match(Set dst (AddVS dst (LoadVector mem))); 6053 effect(TEMP src); 6054 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6055 ins_encode %{ 6056 int vector_len = 0; 6057 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6058 %} 6059 ins_pipe( pipe_slow ); 6060 %} 6061 6062 instruct vadd8S(vecX dst, vecX src) %{ 6063 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6064 match(Set dst (AddVS dst src)); 6065 format %{ "paddw $dst,$src\t! add packed8S" %} 6066 ins_encode %{ 6067 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6068 %} 6069 ins_pipe( pipe_slow ); 6070 %} 6071 6072 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6073 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6074 match(Set dst (AddVS src1 src2)); 6075 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6076 ins_encode %{ 6077 int vector_len = 0; 6078 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6079 %} 6080 ins_pipe( pipe_slow ); 6081 %} 6082 6083 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6084 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6085 match(Set dst (AddVS src1 src2)); 6086 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6087 ins_encode %{ 6088 int vector_len = 0; 6089 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6090 %} 6091 ins_pipe( pipe_slow ); 6092 %} 6093 6094 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6095 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6096 match(Set dst (AddVS dst src2)); 6097 effect(TEMP src1); 6098 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6099 ins_encode %{ 6100 int vector_len = 0; 6101 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6102 %} 6103 ins_pipe( pipe_slow ); 6104 %} 6105 6106 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6107 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6108 match(Set dst (AddVS src (LoadVector mem))); 6109 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6110 ins_encode %{ 6111 int vector_len = 0; 6112 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6113 %} 6114 ins_pipe( pipe_slow ); 6115 %} 6116 6117 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6118 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6119 match(Set dst (AddVS src (LoadVector mem))); 6120 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6121 ins_encode %{ 6122 int vector_len = 0; 6123 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6124 %} 6125 ins_pipe( pipe_slow ); 6126 %} 6127 6128 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6129 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6130 match(Set dst (AddVS dst (LoadVector mem))); 6131 effect(TEMP src); 6132 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6133 ins_encode %{ 6134 int vector_len = 0; 6135 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6136 %} 6137 ins_pipe( pipe_slow ); 6138 %} 6139 6140 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6141 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6142 match(Set dst (AddVS src1 src2)); 6143 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6144 ins_encode %{ 6145 int vector_len = 1; 6146 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6147 %} 6148 ins_pipe( pipe_slow ); 6149 %} 6150 6151 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6152 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6153 match(Set dst (AddVS src1 src2)); 6154 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6155 ins_encode %{ 6156 int vector_len = 1; 6157 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6158 %} 6159 ins_pipe( pipe_slow ); 6160 %} 6161 6162 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6163 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6164 match(Set dst (AddVS dst src2)); 6165 effect(TEMP src1); 6166 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6167 ins_encode %{ 6168 int vector_len = 1; 6169 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6170 %} 6171 ins_pipe( pipe_slow ); 6172 %} 6173 6174 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6175 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6176 match(Set dst (AddVS src (LoadVector mem))); 6177 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6178 ins_encode %{ 6179 int vector_len = 1; 6180 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6181 %} 6182 ins_pipe( pipe_slow ); 6183 %} 6184 6185 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6186 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6187 match(Set dst (AddVS src (LoadVector mem))); 6188 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6189 ins_encode %{ 6190 int vector_len = 1; 6191 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6192 %} 6193 ins_pipe( pipe_slow ); 6194 %} 6195 6196 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6197 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6198 match(Set dst (AddVS dst (LoadVector mem))); 6199 effect(TEMP src); 6200 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6201 ins_encode %{ 6202 int vector_len = 1; 6203 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6204 %} 6205 ins_pipe( pipe_slow ); 6206 %} 6207 6208 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6209 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6210 match(Set dst (AddVS src1 src2)); 6211 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6212 ins_encode %{ 6213 int vector_len = 2; 6214 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6220 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6221 match(Set dst (AddVS src (LoadVector mem))); 6222 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6223 ins_encode %{ 6224 int vector_len = 2; 6225 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 // Integers vector add 6231 instruct vadd2I(vecD dst, vecD src) %{ 6232 predicate(n->as_Vector()->length() == 2); 6233 match(Set dst (AddVI dst src)); 6234 format %{ "paddd $dst,$src\t! add packed2I" %} 6235 ins_encode %{ 6236 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6237 %} 6238 ins_pipe( pipe_slow ); 6239 %} 6240 6241 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6242 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6243 match(Set dst (AddVI src1 src2)); 6244 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6245 ins_encode %{ 6246 int vector_len = 0; 6247 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6253 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6254 match(Set dst (AddVI src (LoadVector mem))); 6255 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6256 ins_encode %{ 6257 int vector_len = 0; 6258 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 instruct vadd4I(vecX dst, vecX src) %{ 6264 predicate(n->as_Vector()->length() == 4); 6265 match(Set dst (AddVI dst src)); 6266 format %{ "paddd $dst,$src\t! add packed4I" %} 6267 ins_encode %{ 6268 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6274 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6275 match(Set dst (AddVI src1 src2)); 6276 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6277 ins_encode %{ 6278 int vector_len = 0; 6279 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6280 %} 6281 ins_pipe( pipe_slow ); 6282 %} 6283 6284 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6285 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6286 match(Set dst (AddVI src (LoadVector mem))); 6287 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6288 ins_encode %{ 6289 int vector_len = 0; 6290 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6296 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6297 match(Set dst (AddVI src1 src2)); 6298 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6299 ins_encode %{ 6300 int vector_len = 1; 6301 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6307 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6308 match(Set dst (AddVI src (LoadVector mem))); 6309 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6310 ins_encode %{ 6311 int vector_len = 1; 6312 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6313 %} 6314 ins_pipe( pipe_slow ); 6315 %} 6316 6317 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6318 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6319 match(Set dst (AddVI src1 src2)); 6320 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6321 ins_encode %{ 6322 int vector_len = 2; 6323 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6329 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6330 match(Set dst (AddVI src (LoadVector mem))); 6331 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6332 ins_encode %{ 6333 int vector_len = 2; 6334 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 // Longs vector add 6340 instruct vadd2L(vecX dst, vecX src) %{ 6341 predicate(n->as_Vector()->length() == 2); 6342 match(Set dst (AddVL dst src)); 6343 format %{ "paddq $dst,$src\t! add packed2L" %} 6344 ins_encode %{ 6345 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6351 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6352 match(Set dst (AddVL src1 src2)); 6353 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6354 ins_encode %{ 6355 int vector_len = 0; 6356 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6362 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6363 match(Set dst (AddVL src (LoadVector mem))); 6364 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6365 ins_encode %{ 6366 int vector_len = 0; 6367 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6373 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6374 match(Set dst (AddVL src1 src2)); 6375 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6376 ins_encode %{ 6377 int vector_len = 1; 6378 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6384 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6385 match(Set dst (AddVL src (LoadVector mem))); 6386 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6387 ins_encode %{ 6388 int vector_len = 1; 6389 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6395 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6396 match(Set dst (AddVL src1 src2)); 6397 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6398 ins_encode %{ 6399 int vector_len = 2; 6400 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6406 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6407 match(Set dst (AddVL src (LoadVector mem))); 6408 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6409 ins_encode %{ 6410 int vector_len = 2; 6411 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 // Floats vector add 6417 instruct vadd2F(vecD dst, vecD src) %{ 6418 predicate(n->as_Vector()->length() == 2); 6419 match(Set dst (AddVF dst src)); 6420 format %{ "addps $dst,$src\t! add packed2F" %} 6421 ins_encode %{ 6422 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6428 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6429 match(Set dst (AddVF src1 src2)); 6430 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6431 ins_encode %{ 6432 int vector_len = 0; 6433 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6434 %} 6435 ins_pipe( pipe_slow ); 6436 %} 6437 6438 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6439 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6440 match(Set dst (AddVF src (LoadVector mem))); 6441 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6442 ins_encode %{ 6443 int vector_len = 0; 6444 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6445 %} 6446 ins_pipe( pipe_slow ); 6447 %} 6448 6449 instruct vadd4F(vecX dst, vecX src) %{ 6450 predicate(n->as_Vector()->length() == 4); 6451 match(Set dst (AddVF dst src)); 6452 format %{ "addps $dst,$src\t! add packed4F" %} 6453 ins_encode %{ 6454 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6460 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6461 match(Set dst (AddVF src1 src2)); 6462 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6463 ins_encode %{ 6464 int vector_len = 0; 6465 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6471 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6472 match(Set dst (AddVF src (LoadVector mem))); 6473 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6474 ins_encode %{ 6475 int vector_len = 0; 6476 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6482 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6483 match(Set dst (AddVF src1 src2)); 6484 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6485 ins_encode %{ 6486 int vector_len = 1; 6487 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6493 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6494 match(Set dst (AddVF src (LoadVector mem))); 6495 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6496 ins_encode %{ 6497 int vector_len = 1; 6498 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6504 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6505 match(Set dst (AddVF src1 src2)); 6506 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6507 ins_encode %{ 6508 int vector_len = 2; 6509 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6515 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6516 match(Set dst (AddVF src (LoadVector mem))); 6517 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6518 ins_encode %{ 6519 int vector_len = 2; 6520 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 // Doubles vector add 6526 instruct vadd2D(vecX dst, vecX src) %{ 6527 predicate(n->as_Vector()->length() == 2); 6528 match(Set dst (AddVD dst src)); 6529 format %{ "addpd $dst,$src\t! add packed2D" %} 6530 ins_encode %{ 6531 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6532 %} 6533 ins_pipe( pipe_slow ); 6534 %} 6535 6536 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6537 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6538 match(Set dst (AddVD src1 src2)); 6539 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6540 ins_encode %{ 6541 int vector_len = 0; 6542 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6548 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6549 match(Set dst (AddVD src (LoadVector mem))); 6550 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6551 ins_encode %{ 6552 int vector_len = 0; 6553 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6558 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6559 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6560 match(Set dst (AddVD src1 src2)); 6561 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6562 ins_encode %{ 6563 int vector_len = 1; 6564 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6565 %} 6566 ins_pipe( pipe_slow ); 6567 %} 6568 6569 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6570 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6571 match(Set dst (AddVD src (LoadVector mem))); 6572 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6573 ins_encode %{ 6574 int vector_len = 1; 6575 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6576 %} 6577 ins_pipe( pipe_slow ); 6578 %} 6579 6580 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6581 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6582 match(Set dst (AddVD src1 src2)); 6583 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6584 ins_encode %{ 6585 int vector_len = 2; 6586 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6587 %} 6588 ins_pipe( pipe_slow ); 6589 %} 6590 6591 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6592 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6593 match(Set dst (AddVD src (LoadVector mem))); 6594 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6595 ins_encode %{ 6596 int vector_len = 2; 6597 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6598 %} 6599 ins_pipe( pipe_slow ); 6600 %} 6601 6602 // --------------------------------- SUB -------------------------------------- 6603 6604 // Bytes vector sub 6605 instruct vsub4B(vecS dst, vecS src) %{ 6606 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6607 match(Set dst (SubVB dst src)); 6608 format %{ "psubb $dst,$src\t! sub packed4B" %} 6609 ins_encode %{ 6610 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6611 %} 6612 ins_pipe( pipe_slow ); 6613 %} 6614 6615 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6616 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6617 match(Set dst (SubVB src1 src2)); 6618 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6619 ins_encode %{ 6620 int vector_len = 0; 6621 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6622 %} 6623 ins_pipe( pipe_slow ); 6624 %} 6625 6626 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6627 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6628 match(Set dst (SubVB src1 src2)); 6629 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6630 ins_encode %{ 6631 int vector_len = 0; 6632 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6633 %} 6634 ins_pipe( pipe_slow ); 6635 %} 6636 6637 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6638 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6639 match(Set dst (SubVB dst src2)); 6640 effect(TEMP src1); 6641 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6642 ins_encode %{ 6643 int vector_len = 0; 6644 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6645 %} 6646 ins_pipe( pipe_slow ); 6647 %} 6648 6649 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6650 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6651 match(Set dst (SubVB src (LoadVector mem))); 6652 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6653 ins_encode %{ 6654 int vector_len = 0; 6655 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6656 %} 6657 ins_pipe( pipe_slow ); 6658 %} 6659 6660 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6661 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6662 match(Set dst (SubVB src (LoadVector mem))); 6663 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6664 ins_encode %{ 6665 int vector_len = 0; 6666 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6667 %} 6668 ins_pipe( pipe_slow ); 6669 %} 6670 6671 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6672 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6673 match(Set dst (SubVB dst (LoadVector mem))); 6674 effect(TEMP src); 6675 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6676 ins_encode %{ 6677 int vector_len = 0; 6678 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6679 %} 6680 ins_pipe( pipe_slow ); 6681 %} 6682 6683 instruct vsub8B(vecD dst, vecD src) %{ 6684 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6685 match(Set dst (SubVB dst src)); 6686 format %{ "psubb $dst,$src\t! sub packed8B" %} 6687 ins_encode %{ 6688 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6689 %} 6690 ins_pipe( pipe_slow ); 6691 %} 6692 6693 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6694 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6695 match(Set dst (SubVB src1 src2)); 6696 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6697 ins_encode %{ 6698 int vector_len = 0; 6699 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6700 %} 6701 ins_pipe( pipe_slow ); 6702 %} 6703 6704 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6705 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6706 match(Set dst (SubVB src1 src2)); 6707 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6708 ins_encode %{ 6709 int vector_len = 0; 6710 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6711 %} 6712 ins_pipe( pipe_slow ); 6713 %} 6714 6715 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6716 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6717 match(Set dst (SubVB dst src2)); 6718 effect(TEMP src1); 6719 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6720 ins_encode %{ 6721 int vector_len = 0; 6722 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6723 %} 6724 ins_pipe( pipe_slow ); 6725 %} 6726 6727 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6728 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6729 match(Set dst (SubVB src (LoadVector mem))); 6730 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6731 ins_encode %{ 6732 int vector_len = 0; 6733 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6734 %} 6735 ins_pipe( pipe_slow ); 6736 %} 6737 6738 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6739 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6740 match(Set dst (SubVB src (LoadVector mem))); 6741 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6742 ins_encode %{ 6743 int vector_len = 0; 6744 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6745 %} 6746 ins_pipe( pipe_slow ); 6747 %} 6748 6749 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6750 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6751 match(Set dst (SubVB dst (LoadVector mem))); 6752 effect(TEMP src); 6753 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6754 ins_encode %{ 6755 int vector_len = 0; 6756 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6757 %} 6758 ins_pipe( pipe_slow ); 6759 %} 6760 6761 instruct vsub16B(vecX dst, vecX src) %{ 6762 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6763 match(Set dst (SubVB dst src)); 6764 format %{ "psubb $dst,$src\t! sub packed16B" %} 6765 ins_encode %{ 6766 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6767 %} 6768 ins_pipe( pipe_slow ); 6769 %} 6770 6771 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6772 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6773 match(Set dst (SubVB src1 src2)); 6774 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6775 ins_encode %{ 6776 int vector_len = 0; 6777 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6778 %} 6779 ins_pipe( pipe_slow ); 6780 %} 6781 6782 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6783 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6784 match(Set dst (SubVB src1 src2)); 6785 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6786 ins_encode %{ 6787 int vector_len = 0; 6788 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6789 %} 6790 ins_pipe( pipe_slow ); 6791 %} 6792 6793 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6794 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6795 match(Set dst (SubVB dst src2)); 6796 effect(TEMP src1); 6797 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6798 ins_encode %{ 6799 int vector_len = 0; 6800 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6801 %} 6802 ins_pipe( pipe_slow ); 6803 %} 6804 6805 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6806 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6807 match(Set dst (SubVB src (LoadVector mem))); 6808 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6809 ins_encode %{ 6810 int vector_len = 0; 6811 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6812 %} 6813 ins_pipe( pipe_slow ); 6814 %} 6815 6816 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6817 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6818 match(Set dst (SubVB src (LoadVector mem))); 6819 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6820 ins_encode %{ 6821 int vector_len = 0; 6822 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6828 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6829 match(Set dst (SubVB dst (LoadVector mem))); 6830 effect(TEMP src); 6831 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6832 ins_encode %{ 6833 int vector_len = 0; 6834 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6835 %} 6836 ins_pipe( pipe_slow ); 6837 %} 6838 6839 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6840 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6841 match(Set dst (SubVB src1 src2)); 6842 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6843 ins_encode %{ 6844 int vector_len = 1; 6845 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6846 %} 6847 ins_pipe( pipe_slow ); 6848 %} 6849 6850 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6851 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6852 match(Set dst (SubVB src1 src2)); 6853 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6854 ins_encode %{ 6855 int vector_len = 1; 6856 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6857 %} 6858 ins_pipe( pipe_slow ); 6859 %} 6860 6861 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6862 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6863 match(Set dst (SubVB dst src2)); 6864 effect(TEMP src1); 6865 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6866 ins_encode %{ 6867 int vector_len = 1; 6868 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6869 %} 6870 ins_pipe( pipe_slow ); 6871 %} 6872 6873 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6874 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6875 match(Set dst (SubVB src (LoadVector mem))); 6876 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6877 ins_encode %{ 6878 int vector_len = 1; 6879 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6880 %} 6881 ins_pipe( pipe_slow ); 6882 %} 6883 6884 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6885 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6886 match(Set dst (SubVB src (LoadVector mem))); 6887 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6888 ins_encode %{ 6889 int vector_len = 1; 6890 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6891 %} 6892 ins_pipe( pipe_slow ); 6893 %} 6894 6895 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6896 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6897 match(Set dst (SubVB dst (LoadVector mem))); 6898 effect(TEMP src); 6899 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6900 ins_encode %{ 6901 int vector_len = 1; 6902 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6903 %} 6904 ins_pipe( pipe_slow ); 6905 %} 6906 6907 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6908 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6909 match(Set dst (SubVB src1 src2)); 6910 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6911 ins_encode %{ 6912 int vector_len = 2; 6913 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6914 %} 6915 ins_pipe( pipe_slow ); 6916 %} 6917 6918 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6919 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6920 match(Set dst (SubVB src (LoadVector mem))); 6921 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6922 ins_encode %{ 6923 int vector_len = 2; 6924 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6925 %} 6926 ins_pipe( pipe_slow ); 6927 %} 6928 6929 // Shorts/Chars vector sub 6930 instruct vsub2S(vecS dst, vecS src) %{ 6931 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6932 match(Set dst (SubVS dst src)); 6933 format %{ "psubw $dst,$src\t! sub packed2S" %} 6934 ins_encode %{ 6935 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6936 %} 6937 ins_pipe( pipe_slow ); 6938 %} 6939 6940 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6941 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6942 match(Set dst (SubVS src1 src2)); 6943 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6944 ins_encode %{ 6945 int vector_len = 0; 6946 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6947 %} 6948 ins_pipe( pipe_slow ); 6949 %} 6950 6951 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6952 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6953 match(Set dst (SubVS src1 src2)); 6954 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6955 ins_encode %{ 6956 int vector_len = 0; 6957 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6963 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6964 match(Set dst (SubVS dst src2)); 6965 effect(TEMP src1); 6966 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6967 ins_encode %{ 6968 int vector_len = 0; 6969 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6975 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6976 match(Set dst (SubVS src (LoadVector mem))); 6977 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6978 ins_encode %{ 6979 int vector_len = 0; 6980 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6986 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6987 match(Set dst (SubVS src (LoadVector mem))); 6988 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6989 ins_encode %{ 6990 int vector_len = 0; 6991 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6997 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6998 match(Set dst (SubVS dst (LoadVector mem))); 6999 effect(TEMP src); 7000 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7001 ins_encode %{ 7002 int vector_len = 0; 7003 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7004 %} 7005 ins_pipe( pipe_slow ); 7006 %} 7007 7008 instruct vsub4S(vecD dst, vecD src) %{ 7009 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7010 match(Set dst (SubVS dst src)); 7011 format %{ "psubw $dst,$src\t! sub packed4S" %} 7012 ins_encode %{ 7013 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7019 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7020 match(Set dst (SubVS src1 src2)); 7021 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7022 ins_encode %{ 7023 int vector_len = 0; 7024 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7030 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7031 match(Set dst (SubVS src1 src2)); 7032 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7033 ins_encode %{ 7034 int vector_len = 0; 7035 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7041 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7042 match(Set dst (SubVS dst src2)); 7043 effect(TEMP src1); 7044 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7045 ins_encode %{ 7046 int vector_len = 0; 7047 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7048 %} 7049 ins_pipe( pipe_slow ); 7050 %} 7051 7052 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7053 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7054 match(Set dst (SubVS src (LoadVector mem))); 7055 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7056 ins_encode %{ 7057 int vector_len = 0; 7058 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7059 %} 7060 ins_pipe( pipe_slow ); 7061 %} 7062 7063 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7064 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7065 match(Set dst (SubVS src (LoadVector mem))); 7066 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7067 ins_encode %{ 7068 int vector_len = 0; 7069 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7070 %} 7071 ins_pipe( pipe_slow ); 7072 %} 7073 7074 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7075 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7076 match(Set dst (SubVS dst (LoadVector mem))); 7077 effect(TEMP src); 7078 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7079 ins_encode %{ 7080 int vector_len = 0; 7081 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7082 %} 7083 ins_pipe( pipe_slow ); 7084 %} 7085 7086 instruct vsub8S(vecX dst, vecX src) %{ 7087 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7088 match(Set dst (SubVS dst src)); 7089 format %{ "psubw $dst,$src\t! sub packed8S" %} 7090 ins_encode %{ 7091 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7092 %} 7093 ins_pipe( pipe_slow ); 7094 %} 7095 7096 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7097 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7098 match(Set dst (SubVS src1 src2)); 7099 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7100 ins_encode %{ 7101 int vector_len = 0; 7102 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7103 %} 7104 ins_pipe( pipe_slow ); 7105 %} 7106 7107 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7108 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7109 match(Set dst (SubVS src1 src2)); 7110 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7111 ins_encode %{ 7112 int vector_len = 0; 7113 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7114 %} 7115 ins_pipe( pipe_slow ); 7116 %} 7117 7118 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7119 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7120 match(Set dst (SubVS dst src2)); 7121 effect(TEMP src1); 7122 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7123 ins_encode %{ 7124 int vector_len = 0; 7125 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7131 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7132 match(Set dst (SubVS src (LoadVector mem))); 7133 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7134 ins_encode %{ 7135 int vector_len = 0; 7136 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7137 %} 7138 ins_pipe( pipe_slow ); 7139 %} 7140 7141 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7142 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7143 match(Set dst (SubVS src (LoadVector mem))); 7144 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7145 ins_encode %{ 7146 int vector_len = 0; 7147 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7153 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7154 match(Set dst (SubVS dst (LoadVector mem))); 7155 effect(TEMP src); 7156 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7157 ins_encode %{ 7158 int vector_len = 0; 7159 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7160 %} 7161 ins_pipe( pipe_slow ); 7162 %} 7163 7164 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7165 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7166 match(Set dst (SubVS src1 src2)); 7167 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7168 ins_encode %{ 7169 int vector_len = 1; 7170 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7171 %} 7172 ins_pipe( pipe_slow ); 7173 %} 7174 7175 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7176 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7177 match(Set dst (SubVS src1 src2)); 7178 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7179 ins_encode %{ 7180 int vector_len = 1; 7181 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7182 %} 7183 ins_pipe( pipe_slow ); 7184 %} 7185 7186 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7187 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7188 match(Set dst (SubVS dst src2)); 7189 effect(TEMP src1); 7190 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7191 ins_encode %{ 7192 int vector_len = 1; 7193 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7194 %} 7195 ins_pipe( pipe_slow ); 7196 %} 7197 7198 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7199 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7200 match(Set dst (SubVS src (LoadVector mem))); 7201 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7202 ins_encode %{ 7203 int vector_len = 1; 7204 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7205 %} 7206 ins_pipe( pipe_slow ); 7207 %} 7208 7209 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7210 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7211 match(Set dst (SubVS src (LoadVector mem))); 7212 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7213 ins_encode %{ 7214 int vector_len = 1; 7215 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7216 %} 7217 ins_pipe( pipe_slow ); 7218 %} 7219 7220 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7221 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7222 match(Set dst (SubVS dst (LoadVector mem))); 7223 effect(TEMP src); 7224 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7225 ins_encode %{ 7226 int vector_len = 1; 7227 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7228 %} 7229 ins_pipe( pipe_slow ); 7230 %} 7231 7232 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7233 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7234 match(Set dst (SubVS src1 src2)); 7235 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7236 ins_encode %{ 7237 int vector_len = 2; 7238 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7239 %} 7240 ins_pipe( pipe_slow ); 7241 %} 7242 7243 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7244 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7245 match(Set dst (SubVS src (LoadVector mem))); 7246 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7247 ins_encode %{ 7248 int vector_len = 2; 7249 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7250 %} 7251 ins_pipe( pipe_slow ); 7252 %} 7253 7254 // Integers vector sub 7255 instruct vsub2I(vecD dst, vecD src) %{ 7256 predicate(n->as_Vector()->length() == 2); 7257 match(Set dst (SubVI dst src)); 7258 format %{ "psubd $dst,$src\t! sub packed2I" %} 7259 ins_encode %{ 7260 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7261 %} 7262 ins_pipe( pipe_slow ); 7263 %} 7264 7265 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7266 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7267 match(Set dst (SubVI src1 src2)); 7268 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7269 ins_encode %{ 7270 int vector_len = 0; 7271 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7272 %} 7273 ins_pipe( pipe_slow ); 7274 %} 7275 7276 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7277 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7278 match(Set dst (SubVI src (LoadVector mem))); 7279 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7280 ins_encode %{ 7281 int vector_len = 0; 7282 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7283 %} 7284 ins_pipe( pipe_slow ); 7285 %} 7286 7287 instruct vsub4I(vecX dst, vecX src) %{ 7288 predicate(n->as_Vector()->length() == 4); 7289 match(Set dst (SubVI dst src)); 7290 format %{ "psubd $dst,$src\t! sub packed4I" %} 7291 ins_encode %{ 7292 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7293 %} 7294 ins_pipe( pipe_slow ); 7295 %} 7296 7297 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7298 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7299 match(Set dst (SubVI src1 src2)); 7300 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7301 ins_encode %{ 7302 int vector_len = 0; 7303 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7304 %} 7305 ins_pipe( pipe_slow ); 7306 %} 7307 7308 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7309 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7310 match(Set dst (SubVI src (LoadVector mem))); 7311 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7312 ins_encode %{ 7313 int vector_len = 0; 7314 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7315 %} 7316 ins_pipe( pipe_slow ); 7317 %} 7318 7319 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7320 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7321 match(Set dst (SubVI src1 src2)); 7322 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7323 ins_encode %{ 7324 int vector_len = 1; 7325 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7326 %} 7327 ins_pipe( pipe_slow ); 7328 %} 7329 7330 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7331 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7332 match(Set dst (SubVI src (LoadVector mem))); 7333 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7334 ins_encode %{ 7335 int vector_len = 1; 7336 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7337 %} 7338 ins_pipe( pipe_slow ); 7339 %} 7340 7341 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7342 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7343 match(Set dst (SubVI src1 src2)); 7344 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7345 ins_encode %{ 7346 int vector_len = 2; 7347 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7348 %} 7349 ins_pipe( pipe_slow ); 7350 %} 7351 7352 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7353 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7354 match(Set dst (SubVI src (LoadVector mem))); 7355 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7356 ins_encode %{ 7357 int vector_len = 2; 7358 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7359 %} 7360 ins_pipe( pipe_slow ); 7361 %} 7362 7363 // Longs vector sub 7364 instruct vsub2L(vecX dst, vecX src) %{ 7365 predicate(n->as_Vector()->length() == 2); 7366 match(Set dst (SubVL dst src)); 7367 format %{ "psubq $dst,$src\t! sub packed2L" %} 7368 ins_encode %{ 7369 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7370 %} 7371 ins_pipe( pipe_slow ); 7372 %} 7373 7374 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7375 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7376 match(Set dst (SubVL src1 src2)); 7377 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7378 ins_encode %{ 7379 int vector_len = 0; 7380 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7381 %} 7382 ins_pipe( pipe_slow ); 7383 %} 7384 7385 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7386 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7387 match(Set dst (SubVL src (LoadVector mem))); 7388 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7389 ins_encode %{ 7390 int vector_len = 0; 7391 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7392 %} 7393 ins_pipe( pipe_slow ); 7394 %} 7395 7396 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7397 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7398 match(Set dst (SubVL src1 src2)); 7399 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7400 ins_encode %{ 7401 int vector_len = 1; 7402 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7403 %} 7404 ins_pipe( pipe_slow ); 7405 %} 7406 7407 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7408 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7409 match(Set dst (SubVL src (LoadVector mem))); 7410 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7411 ins_encode %{ 7412 int vector_len = 1; 7413 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7414 %} 7415 ins_pipe( pipe_slow ); 7416 %} 7417 7418 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7419 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7420 match(Set dst (SubVL src1 src2)); 7421 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7422 ins_encode %{ 7423 int vector_len = 2; 7424 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7425 %} 7426 ins_pipe( pipe_slow ); 7427 %} 7428 7429 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7430 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7431 match(Set dst (SubVL src (LoadVector mem))); 7432 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7433 ins_encode %{ 7434 int vector_len = 2; 7435 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7436 %} 7437 ins_pipe( pipe_slow ); 7438 %} 7439 7440 // Floats vector sub 7441 instruct vsub2F(vecD dst, vecD src) %{ 7442 predicate(n->as_Vector()->length() == 2); 7443 match(Set dst (SubVF dst src)); 7444 format %{ "subps $dst,$src\t! sub packed2F" %} 7445 ins_encode %{ 7446 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7447 %} 7448 ins_pipe( pipe_slow ); 7449 %} 7450 7451 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7452 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7453 match(Set dst (SubVF src1 src2)); 7454 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7455 ins_encode %{ 7456 int vector_len = 0; 7457 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7458 %} 7459 ins_pipe( pipe_slow ); 7460 %} 7461 7462 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7463 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7464 match(Set dst (SubVF src (LoadVector mem))); 7465 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7466 ins_encode %{ 7467 int vector_len = 0; 7468 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7469 %} 7470 ins_pipe( pipe_slow ); 7471 %} 7472 7473 instruct vsub4F(vecX dst, vecX src) %{ 7474 predicate(n->as_Vector()->length() == 4); 7475 match(Set dst (SubVF dst src)); 7476 format %{ "subps $dst,$src\t! sub packed4F" %} 7477 ins_encode %{ 7478 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7479 %} 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7484 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7485 match(Set dst (SubVF src1 src2)); 7486 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7487 ins_encode %{ 7488 int vector_len = 0; 7489 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7490 %} 7491 ins_pipe( pipe_slow ); 7492 %} 7493 7494 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7495 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7496 match(Set dst (SubVF src (LoadVector mem))); 7497 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7498 ins_encode %{ 7499 int vector_len = 0; 7500 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7501 %} 7502 ins_pipe( pipe_slow ); 7503 %} 7504 7505 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7506 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7507 match(Set dst (SubVF src1 src2)); 7508 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7509 ins_encode %{ 7510 int vector_len = 1; 7511 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7512 %} 7513 ins_pipe( pipe_slow ); 7514 %} 7515 7516 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7517 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7518 match(Set dst (SubVF src (LoadVector mem))); 7519 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7520 ins_encode %{ 7521 int vector_len = 1; 7522 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7523 %} 7524 ins_pipe( pipe_slow ); 7525 %} 7526 7527 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7528 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7529 match(Set dst (SubVF src1 src2)); 7530 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7531 ins_encode %{ 7532 int vector_len = 2; 7533 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7534 %} 7535 ins_pipe( pipe_slow ); 7536 %} 7537 7538 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7539 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7540 match(Set dst (SubVF src (LoadVector mem))); 7541 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7542 ins_encode %{ 7543 int vector_len = 2; 7544 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7545 %} 7546 ins_pipe( pipe_slow ); 7547 %} 7548 7549 // Doubles vector sub 7550 instruct vsub2D(vecX dst, vecX src) %{ 7551 predicate(n->as_Vector()->length() == 2); 7552 match(Set dst (SubVD dst src)); 7553 format %{ "subpd $dst,$src\t! sub packed2D" %} 7554 ins_encode %{ 7555 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7556 %} 7557 ins_pipe( pipe_slow ); 7558 %} 7559 7560 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7561 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7562 match(Set dst (SubVD src1 src2)); 7563 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7564 ins_encode %{ 7565 int vector_len = 0; 7566 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7567 %} 7568 ins_pipe( pipe_slow ); 7569 %} 7570 7571 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7572 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7573 match(Set dst (SubVD src (LoadVector mem))); 7574 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7575 ins_encode %{ 7576 int vector_len = 0; 7577 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7578 %} 7579 ins_pipe( pipe_slow ); 7580 %} 7581 7582 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7583 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7584 match(Set dst (SubVD src1 src2)); 7585 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7586 ins_encode %{ 7587 int vector_len = 1; 7588 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7589 %} 7590 ins_pipe( pipe_slow ); 7591 %} 7592 7593 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7594 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7595 match(Set dst (SubVD src (LoadVector mem))); 7596 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7597 ins_encode %{ 7598 int vector_len = 1; 7599 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7600 %} 7601 ins_pipe( pipe_slow ); 7602 %} 7603 7604 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7605 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7606 match(Set dst (SubVD src1 src2)); 7607 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7608 ins_encode %{ 7609 int vector_len = 2; 7610 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7611 %} 7612 ins_pipe( pipe_slow ); 7613 %} 7614 7615 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7616 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7617 match(Set dst (SubVD src (LoadVector mem))); 7618 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7619 ins_encode %{ 7620 int vector_len = 2; 7621 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7622 %} 7623 ins_pipe( pipe_slow ); 7624 %} 7625 7626 // --------------------------------- MUL -------------------------------------- 7627 7628 // Shorts/Chars vector mul 7629 instruct vmul2S(vecS dst, vecS src) %{ 7630 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7631 match(Set dst (MulVS dst src)); 7632 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7633 ins_encode %{ 7634 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7635 %} 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7640 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7641 match(Set dst (MulVS src1 src2)); 7642 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7643 ins_encode %{ 7644 int vector_len = 0; 7645 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7646 %} 7647 ins_pipe( pipe_slow ); 7648 %} 7649 7650 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7651 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7652 match(Set dst (MulVS src1 src2)); 7653 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7654 ins_encode %{ 7655 int vector_len = 0; 7656 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7657 %} 7658 ins_pipe( pipe_slow ); 7659 %} 7660 7661 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7662 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7663 match(Set dst (MulVS dst src2)); 7664 effect(TEMP src1); 7665 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7666 ins_encode %{ 7667 int vector_len = 0; 7668 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7669 %} 7670 ins_pipe( pipe_slow ); 7671 %} 7672 7673 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7674 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7675 match(Set dst (MulVS src (LoadVector mem))); 7676 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7677 ins_encode %{ 7678 int vector_len = 0; 7679 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7680 %} 7681 ins_pipe( pipe_slow ); 7682 %} 7683 7684 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7685 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7686 match(Set dst (MulVS src (LoadVector mem))); 7687 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7688 ins_encode %{ 7689 int vector_len = 0; 7690 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7691 %} 7692 ins_pipe( pipe_slow ); 7693 %} 7694 7695 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7696 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7697 match(Set dst (MulVS dst (LoadVector mem))); 7698 effect(TEMP src); 7699 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7700 ins_encode %{ 7701 int vector_len = 0; 7702 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 instruct vmul4S(vecD dst, vecD src) %{ 7708 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7709 match(Set dst (MulVS dst src)); 7710 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7711 ins_encode %{ 7712 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7713 %} 7714 ins_pipe( pipe_slow ); 7715 %} 7716 7717 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7718 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7719 match(Set dst (MulVS src1 src2)); 7720 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7721 ins_encode %{ 7722 int vector_len = 0; 7723 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7724 %} 7725 ins_pipe( pipe_slow ); 7726 %} 7727 7728 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7729 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7730 match(Set dst (MulVS src1 src2)); 7731 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7732 ins_encode %{ 7733 int vector_len = 0; 7734 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7735 %} 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7740 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7741 match(Set dst (MulVS dst src2)); 7742 effect(TEMP src1); 7743 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7744 ins_encode %{ 7745 int vector_len = 0; 7746 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7747 %} 7748 ins_pipe( pipe_slow ); 7749 %} 7750 7751 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7752 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7753 match(Set dst (MulVS src (LoadVector mem))); 7754 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7755 ins_encode %{ 7756 int vector_len = 0; 7757 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7758 %} 7759 ins_pipe( pipe_slow ); 7760 %} 7761 7762 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7763 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7764 match(Set dst (MulVS src (LoadVector mem))); 7765 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7766 ins_encode %{ 7767 int vector_len = 0; 7768 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7769 %} 7770 ins_pipe( pipe_slow ); 7771 %} 7772 7773 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7774 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7775 match(Set dst (MulVS dst (LoadVector mem))); 7776 effect(TEMP src); 7777 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7778 ins_encode %{ 7779 int vector_len = 0; 7780 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7781 %} 7782 ins_pipe( pipe_slow ); 7783 %} 7784 7785 instruct vmul8S(vecX dst, vecX src) %{ 7786 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7787 match(Set dst (MulVS dst src)); 7788 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7789 ins_encode %{ 7790 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7791 %} 7792 ins_pipe( pipe_slow ); 7793 %} 7794 7795 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7796 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7797 match(Set dst (MulVS src1 src2)); 7798 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7799 ins_encode %{ 7800 int vector_len = 0; 7801 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7802 %} 7803 ins_pipe( pipe_slow ); 7804 %} 7805 7806 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7807 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7808 match(Set dst (MulVS src1 src2)); 7809 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7810 ins_encode %{ 7811 int vector_len = 0; 7812 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7813 %} 7814 ins_pipe( pipe_slow ); 7815 %} 7816 7817 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7818 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7819 match(Set dst (MulVS dst src2)); 7820 effect(TEMP src1); 7821 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7822 ins_encode %{ 7823 int vector_len = 0; 7824 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7825 %} 7826 ins_pipe( pipe_slow ); 7827 %} 7828 7829 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7830 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7831 match(Set dst (MulVS src (LoadVector mem))); 7832 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7833 ins_encode %{ 7834 int vector_len = 0; 7835 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7836 %} 7837 ins_pipe( pipe_slow ); 7838 %} 7839 7840 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7841 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7842 match(Set dst (MulVS src (LoadVector mem))); 7843 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7844 ins_encode %{ 7845 int vector_len = 0; 7846 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7847 %} 7848 ins_pipe( pipe_slow ); 7849 %} 7850 7851 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7852 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7853 match(Set dst (MulVS dst (LoadVector mem))); 7854 effect(TEMP src); 7855 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7856 ins_encode %{ 7857 int vector_len = 0; 7858 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7859 %} 7860 ins_pipe( pipe_slow ); 7861 %} 7862 7863 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7864 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7865 match(Set dst (MulVS src1 src2)); 7866 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7867 ins_encode %{ 7868 int vector_len = 1; 7869 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7870 %} 7871 ins_pipe( pipe_slow ); 7872 %} 7873 7874 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7875 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7876 match(Set dst (MulVS src1 src2)); 7877 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7878 ins_encode %{ 7879 int vector_len = 1; 7880 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7881 %} 7882 ins_pipe( pipe_slow ); 7883 %} 7884 7885 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7886 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7887 match(Set dst (MulVS dst src2)); 7888 effect(TEMP src1); 7889 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7890 ins_encode %{ 7891 int vector_len = 1; 7892 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7893 %} 7894 ins_pipe( pipe_slow ); 7895 %} 7896 7897 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7898 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7899 match(Set dst (MulVS src (LoadVector mem))); 7900 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7901 ins_encode %{ 7902 int vector_len = 1; 7903 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7904 %} 7905 ins_pipe( pipe_slow ); 7906 %} 7907 7908 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7909 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7910 match(Set dst (MulVS src (LoadVector mem))); 7911 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7912 ins_encode %{ 7913 int vector_len = 1; 7914 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7915 %} 7916 ins_pipe( pipe_slow ); 7917 %} 7918 7919 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7920 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7921 match(Set dst (MulVS dst (LoadVector mem))); 7922 effect(TEMP src); 7923 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7924 ins_encode %{ 7925 int vector_len = 1; 7926 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7927 %} 7928 ins_pipe( pipe_slow ); 7929 %} 7930 7931 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7932 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7933 match(Set dst (MulVS src1 src2)); 7934 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7935 ins_encode %{ 7936 int vector_len = 2; 7937 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7938 %} 7939 ins_pipe( pipe_slow ); 7940 %} 7941 7942 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7943 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7944 match(Set dst (MulVS src (LoadVector mem))); 7945 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7946 ins_encode %{ 7947 int vector_len = 2; 7948 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7949 %} 7950 ins_pipe( pipe_slow ); 7951 %} 7952 7953 // Integers vector mul (sse4_1) 7954 instruct vmul2I(vecD dst, vecD src) %{ 7955 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7956 match(Set dst (MulVI dst src)); 7957 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7958 ins_encode %{ 7959 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7960 %} 7961 ins_pipe( pipe_slow ); 7962 %} 7963 7964 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7965 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7966 match(Set dst (MulVI src1 src2)); 7967 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7968 ins_encode %{ 7969 int vector_len = 0; 7970 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7971 %} 7972 ins_pipe( pipe_slow ); 7973 %} 7974 7975 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7976 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7977 match(Set dst (MulVI src (LoadVector mem))); 7978 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7979 ins_encode %{ 7980 int vector_len = 0; 7981 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7982 %} 7983 ins_pipe( pipe_slow ); 7984 %} 7985 7986 instruct vmul4I(vecX dst, vecX src) %{ 7987 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7988 match(Set dst (MulVI dst src)); 7989 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7990 ins_encode %{ 7991 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7992 %} 7993 ins_pipe( pipe_slow ); 7994 %} 7995 7996 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7997 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7998 match(Set dst (MulVI src1 src2)); 7999 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8000 ins_encode %{ 8001 int vector_len = 0; 8002 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8008 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8009 match(Set dst (MulVI src (LoadVector mem))); 8010 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8011 ins_encode %{ 8012 int vector_len = 0; 8013 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8014 %} 8015 ins_pipe( pipe_slow ); 8016 %} 8017 8018 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8019 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8020 match(Set dst (MulVL src1 src2)); 8021 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8022 ins_encode %{ 8023 int vector_len = 0; 8024 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8025 %} 8026 ins_pipe( pipe_slow ); 8027 %} 8028 8029 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8030 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8031 match(Set dst (MulVL src (LoadVector mem))); 8032 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8033 ins_encode %{ 8034 int vector_len = 0; 8035 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8041 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8042 match(Set dst (MulVL src1 src2)); 8043 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8044 ins_encode %{ 8045 int vector_len = 1; 8046 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8047 %} 8048 ins_pipe( pipe_slow ); 8049 %} 8050 8051 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8052 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8053 match(Set dst (MulVL src (LoadVector mem))); 8054 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8055 ins_encode %{ 8056 int vector_len = 1; 8057 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8058 %} 8059 ins_pipe( pipe_slow ); 8060 %} 8061 8062 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8063 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8064 match(Set dst (MulVL src1 src2)); 8065 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8066 ins_encode %{ 8067 int vector_len = 2; 8068 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8069 %} 8070 ins_pipe( pipe_slow ); 8071 %} 8072 8073 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8074 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8075 match(Set dst (MulVL src (LoadVector mem))); 8076 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8077 ins_encode %{ 8078 int vector_len = 2; 8079 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8080 %} 8081 ins_pipe( pipe_slow ); 8082 %} 8083 8084 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8085 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8086 match(Set dst (MulVI src1 src2)); 8087 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8088 ins_encode %{ 8089 int vector_len = 1; 8090 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8091 %} 8092 ins_pipe( pipe_slow ); 8093 %} 8094 8095 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8096 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8097 match(Set dst (MulVI src (LoadVector mem))); 8098 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8099 ins_encode %{ 8100 int vector_len = 1; 8101 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8102 %} 8103 ins_pipe( pipe_slow ); 8104 %} 8105 8106 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8107 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8108 match(Set dst (MulVI src1 src2)); 8109 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8110 ins_encode %{ 8111 int vector_len = 2; 8112 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8113 %} 8114 ins_pipe( pipe_slow ); 8115 %} 8116 8117 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8118 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8119 match(Set dst (MulVI src (LoadVector mem))); 8120 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8121 ins_encode %{ 8122 int vector_len = 2; 8123 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8124 %} 8125 ins_pipe( pipe_slow ); 8126 %} 8127 8128 // Floats vector mul 8129 instruct vmul2F(vecD dst, vecD src) %{ 8130 predicate(n->as_Vector()->length() == 2); 8131 match(Set dst (MulVF dst src)); 8132 format %{ "mulps $dst,$src\t! mul packed2F" %} 8133 ins_encode %{ 8134 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8135 %} 8136 ins_pipe( pipe_slow ); 8137 %} 8138 8139 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8140 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8141 match(Set dst (MulVF src1 src2)); 8142 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8143 ins_encode %{ 8144 int vector_len = 0; 8145 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8146 %} 8147 ins_pipe( pipe_slow ); 8148 %} 8149 8150 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8151 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8152 match(Set dst (MulVF src (LoadVector mem))); 8153 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8154 ins_encode %{ 8155 int vector_len = 0; 8156 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8157 %} 8158 ins_pipe( pipe_slow ); 8159 %} 8160 8161 instruct vmul4F(vecX dst, vecX src) %{ 8162 predicate(n->as_Vector()->length() == 4); 8163 match(Set dst (MulVF dst src)); 8164 format %{ "mulps $dst,$src\t! mul packed4F" %} 8165 ins_encode %{ 8166 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8167 %} 8168 ins_pipe( pipe_slow ); 8169 %} 8170 8171 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8172 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8173 match(Set dst (MulVF src1 src2)); 8174 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8175 ins_encode %{ 8176 int vector_len = 0; 8177 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8178 %} 8179 ins_pipe( pipe_slow ); 8180 %} 8181 8182 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8183 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8184 match(Set dst (MulVF src (LoadVector mem))); 8185 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8186 ins_encode %{ 8187 int vector_len = 0; 8188 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8189 %} 8190 ins_pipe( pipe_slow ); 8191 %} 8192 8193 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8194 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8195 match(Set dst (MulVF src1 src2)); 8196 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8197 ins_encode %{ 8198 int vector_len = 1; 8199 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8200 %} 8201 ins_pipe( pipe_slow ); 8202 %} 8203 8204 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8205 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8206 match(Set dst (MulVF src (LoadVector mem))); 8207 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8208 ins_encode %{ 8209 int vector_len = 1; 8210 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8211 %} 8212 ins_pipe( pipe_slow ); 8213 %} 8214 8215 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8216 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8217 match(Set dst (MulVF src1 src2)); 8218 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8219 ins_encode %{ 8220 int vector_len = 2; 8221 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8222 %} 8223 ins_pipe( pipe_slow ); 8224 %} 8225 8226 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8227 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8228 match(Set dst (MulVF src (LoadVector mem))); 8229 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8230 ins_encode %{ 8231 int vector_len = 2; 8232 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8233 %} 8234 ins_pipe( pipe_slow ); 8235 %} 8236 8237 // Doubles vector mul 8238 instruct vmul2D(vecX dst, vecX src) %{ 8239 predicate(n->as_Vector()->length() == 2); 8240 match(Set dst (MulVD dst src)); 8241 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8242 ins_encode %{ 8243 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8244 %} 8245 ins_pipe( pipe_slow ); 8246 %} 8247 8248 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8249 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8250 match(Set dst (MulVD src1 src2)); 8251 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8252 ins_encode %{ 8253 int vector_len = 0; 8254 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8255 %} 8256 ins_pipe( pipe_slow ); 8257 %} 8258 8259 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8260 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8261 match(Set dst (MulVD src (LoadVector mem))); 8262 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8263 ins_encode %{ 8264 int vector_len = 0; 8265 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8266 %} 8267 ins_pipe( pipe_slow ); 8268 %} 8269 8270 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8271 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8272 match(Set dst (MulVD src1 src2)); 8273 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8274 ins_encode %{ 8275 int vector_len = 1; 8276 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8277 %} 8278 ins_pipe( pipe_slow ); 8279 %} 8280 8281 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8282 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8283 match(Set dst (MulVD src (LoadVector mem))); 8284 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8285 ins_encode %{ 8286 int vector_len = 1; 8287 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8288 %} 8289 ins_pipe( pipe_slow ); 8290 %} 8291 8292 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8293 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8294 match(Set dst (MulVD src1 src2)); 8295 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8296 ins_encode %{ 8297 int vector_len = 2; 8298 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8299 %} 8300 ins_pipe( pipe_slow ); 8301 %} 8302 8303 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8304 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8305 match(Set dst (MulVD src (LoadVector mem))); 8306 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8307 ins_encode %{ 8308 int vector_len = 2; 8309 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8310 %} 8311 ins_pipe( pipe_slow ); 8312 %} 8313 8314 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8315 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 8316 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 8317 effect(TEMP dst, USE src1, USE src2); 8318 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 8319 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 8320 %} 8321 ins_encode %{ 8322 int vector_len = 1; 8323 int cond = (Assembler::Condition)($copnd$$cmpcode); 8324 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8325 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8326 %} 8327 ins_pipe( pipe_slow ); 8328 %} 8329 8330 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8331 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8332 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8333 effect(TEMP dst, USE src1, USE src2); 8334 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8335 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8336 %} 8337 ins_encode %{ 8338 int vector_len = 1; 8339 int cond = (Assembler::Condition)($copnd$$cmpcode); 8340 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8341 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8342 %} 8343 ins_pipe( pipe_slow ); 8344 %} 8345 8346 // --------------------------------- DIV -------------------------------------- 8347 8348 // Floats vector div 8349 instruct vdiv2F(vecD dst, vecD src) %{ 8350 predicate(n->as_Vector()->length() == 2); 8351 match(Set dst (DivVF dst src)); 8352 format %{ "divps $dst,$src\t! div packed2F" %} 8353 ins_encode %{ 8354 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8355 %} 8356 ins_pipe( pipe_slow ); 8357 %} 8358 8359 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8360 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8361 match(Set dst (DivVF src1 src2)); 8362 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8363 ins_encode %{ 8364 int vector_len = 0; 8365 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8366 %} 8367 ins_pipe( pipe_slow ); 8368 %} 8369 8370 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8371 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8372 match(Set dst (DivVF src (LoadVector mem))); 8373 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8374 ins_encode %{ 8375 int vector_len = 0; 8376 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8377 %} 8378 ins_pipe( pipe_slow ); 8379 %} 8380 8381 instruct vdiv4F(vecX dst, vecX src) %{ 8382 predicate(n->as_Vector()->length() == 4); 8383 match(Set dst (DivVF dst src)); 8384 format %{ "divps $dst,$src\t! div packed4F" %} 8385 ins_encode %{ 8386 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8387 %} 8388 ins_pipe( pipe_slow ); 8389 %} 8390 8391 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8392 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8393 match(Set dst (DivVF src1 src2)); 8394 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8395 ins_encode %{ 8396 int vector_len = 0; 8397 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8398 %} 8399 ins_pipe( pipe_slow ); 8400 %} 8401 8402 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8403 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8404 match(Set dst (DivVF src (LoadVector mem))); 8405 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8406 ins_encode %{ 8407 int vector_len = 0; 8408 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8409 %} 8410 ins_pipe( pipe_slow ); 8411 %} 8412 8413 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8414 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8415 match(Set dst (DivVF src1 src2)); 8416 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8417 ins_encode %{ 8418 int vector_len = 1; 8419 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8420 %} 8421 ins_pipe( pipe_slow ); 8422 %} 8423 8424 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8425 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8426 match(Set dst (DivVF src (LoadVector mem))); 8427 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8428 ins_encode %{ 8429 int vector_len = 1; 8430 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8431 %} 8432 ins_pipe( pipe_slow ); 8433 %} 8434 8435 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8436 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8437 match(Set dst (DivVF src1 src2)); 8438 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8439 ins_encode %{ 8440 int vector_len = 2; 8441 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8442 %} 8443 ins_pipe( pipe_slow ); 8444 %} 8445 8446 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8447 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8448 match(Set dst (DivVF src (LoadVector mem))); 8449 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8450 ins_encode %{ 8451 int vector_len = 2; 8452 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8453 %} 8454 ins_pipe( pipe_slow ); 8455 %} 8456 8457 // Doubles vector div 8458 instruct vdiv2D(vecX dst, vecX src) %{ 8459 predicate(n->as_Vector()->length() == 2); 8460 match(Set dst (DivVD dst src)); 8461 format %{ "divpd $dst,$src\t! div packed2D" %} 8462 ins_encode %{ 8463 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8464 %} 8465 ins_pipe( pipe_slow ); 8466 %} 8467 8468 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8469 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8470 match(Set dst (DivVD src1 src2)); 8471 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8472 ins_encode %{ 8473 int vector_len = 0; 8474 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8475 %} 8476 ins_pipe( pipe_slow ); 8477 %} 8478 8479 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8480 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8481 match(Set dst (DivVD src (LoadVector mem))); 8482 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8483 ins_encode %{ 8484 int vector_len = 0; 8485 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8486 %} 8487 ins_pipe( pipe_slow ); 8488 %} 8489 8490 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8491 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8492 match(Set dst (DivVD src1 src2)); 8493 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8494 ins_encode %{ 8495 int vector_len = 1; 8496 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8497 %} 8498 ins_pipe( pipe_slow ); 8499 %} 8500 8501 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8502 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8503 match(Set dst (DivVD src (LoadVector mem))); 8504 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8505 ins_encode %{ 8506 int vector_len = 1; 8507 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8508 %} 8509 ins_pipe( pipe_slow ); 8510 %} 8511 8512 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8513 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8514 match(Set dst (DivVD src1 src2)); 8515 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8516 ins_encode %{ 8517 int vector_len = 2; 8518 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8519 %} 8520 ins_pipe( pipe_slow ); 8521 %} 8522 8523 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8524 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8525 match(Set dst (DivVD src (LoadVector mem))); 8526 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8527 ins_encode %{ 8528 int vector_len = 2; 8529 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8530 %} 8531 ins_pipe( pipe_slow ); 8532 %} 8533 8534 // ------------------------------ Shift --------------------------------------- 8535 8536 // Left and right shift count vectors are the same on x86 8537 // (only lowest bits of xmm reg are used for count). 8538 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8539 match(Set dst (LShiftCntV cnt)); 8540 match(Set dst (RShiftCntV cnt)); 8541 format %{ "movd $dst,$cnt\t! load shift count" %} 8542 ins_encode %{ 8543 __ movdl($dst$$XMMRegister, $cnt$$Register); 8544 %} 8545 ins_pipe( pipe_slow ); 8546 %} 8547 8548 // --------------------------------- Sqrt -------------------------------------- 8549 8550 // Floating point vector sqrt 8551 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8552 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8553 match(Set dst (SqrtVD src)); 8554 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8555 ins_encode %{ 8556 int vector_len = 0; 8557 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8558 %} 8559 ins_pipe( pipe_slow ); 8560 %} 8561 8562 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8563 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8564 match(Set dst (SqrtVD (LoadVector mem))); 8565 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8566 ins_encode %{ 8567 int vector_len = 0; 8568 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8569 %} 8570 ins_pipe( pipe_slow ); 8571 %} 8572 8573 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8574 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8575 match(Set dst (SqrtVD src)); 8576 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8577 ins_encode %{ 8578 int vector_len = 1; 8579 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8580 %} 8581 ins_pipe( pipe_slow ); 8582 %} 8583 8584 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8585 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8586 match(Set dst (SqrtVD (LoadVector mem))); 8587 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8588 ins_encode %{ 8589 int vector_len = 1; 8590 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8591 %} 8592 ins_pipe( pipe_slow ); 8593 %} 8594 8595 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8596 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8597 match(Set dst (SqrtVD src)); 8598 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8599 ins_encode %{ 8600 int vector_len = 2; 8601 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8602 %} 8603 ins_pipe( pipe_slow ); 8604 %} 8605 8606 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8607 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8608 match(Set dst (SqrtVD (LoadVector mem))); 8609 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8610 ins_encode %{ 8611 int vector_len = 2; 8612 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8613 %} 8614 ins_pipe( pipe_slow ); 8615 %} 8616 8617 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8618 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8619 match(Set dst (SqrtVF src)); 8620 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8621 ins_encode %{ 8622 int vector_len = 0; 8623 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8624 %} 8625 ins_pipe( pipe_slow ); 8626 %} 8627 8628 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8629 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8630 match(Set dst (SqrtVF (LoadVector mem))); 8631 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8632 ins_encode %{ 8633 int vector_len = 0; 8634 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8635 %} 8636 ins_pipe( pipe_slow ); 8637 %} 8638 8639 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8640 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8641 match(Set dst (SqrtVF src)); 8642 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8643 ins_encode %{ 8644 int vector_len = 0; 8645 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8646 %} 8647 ins_pipe( pipe_slow ); 8648 %} 8649 8650 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8651 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8652 match(Set dst (SqrtVF (LoadVector mem))); 8653 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8654 ins_encode %{ 8655 int vector_len = 0; 8656 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8657 %} 8658 ins_pipe( pipe_slow ); 8659 %} 8660 8661 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8662 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8663 match(Set dst (SqrtVF src)); 8664 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8665 ins_encode %{ 8666 int vector_len = 1; 8667 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8668 %} 8669 ins_pipe( pipe_slow ); 8670 %} 8671 8672 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8673 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8674 match(Set dst (SqrtVF (LoadVector mem))); 8675 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8676 ins_encode %{ 8677 int vector_len = 1; 8678 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8679 %} 8680 ins_pipe( pipe_slow ); 8681 %} 8682 8683 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8684 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8685 match(Set dst (SqrtVF src)); 8686 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8687 ins_encode %{ 8688 int vector_len = 2; 8689 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8690 %} 8691 ins_pipe( pipe_slow ); 8692 %} 8693 8694 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8695 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8696 match(Set dst (SqrtVF (LoadVector mem))); 8697 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8698 ins_encode %{ 8699 int vector_len = 2; 8700 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8701 %} 8702 ins_pipe( pipe_slow ); 8703 %} 8704 8705 // ------------------------------ LeftShift ----------------------------------- 8706 8707 // Shorts/Chars vector left shift 8708 instruct vsll2S(vecS dst, vecS shift) %{ 8709 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8710 match(Set dst (LShiftVS dst shift)); 8711 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8712 ins_encode %{ 8713 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8714 %} 8715 ins_pipe( pipe_slow ); 8716 %} 8717 8718 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8719 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8720 match(Set dst (LShiftVS dst shift)); 8721 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8722 ins_encode %{ 8723 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8724 %} 8725 ins_pipe( pipe_slow ); 8726 %} 8727 8728 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8729 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8730 match(Set dst (LShiftVS src shift)); 8731 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8732 ins_encode %{ 8733 int vector_len = 0; 8734 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8735 %} 8736 ins_pipe( pipe_slow ); 8737 %} 8738 8739 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8740 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8741 match(Set dst (LShiftVS src shift)); 8742 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8743 ins_encode %{ 8744 int vector_len = 0; 8745 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8746 %} 8747 ins_pipe( pipe_slow ); 8748 %} 8749 8750 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8751 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8752 match(Set dst (LShiftVS dst shift)); 8753 effect(TEMP src); 8754 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8755 ins_encode %{ 8756 int vector_len = 0; 8757 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8758 %} 8759 ins_pipe( pipe_slow ); 8760 %} 8761 8762 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8763 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8764 match(Set dst (LShiftVS src shift)); 8765 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8766 ins_encode %{ 8767 int vector_len = 0; 8768 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8769 %} 8770 ins_pipe( pipe_slow ); 8771 %} 8772 8773 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8774 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8775 match(Set dst (LShiftVS src shift)); 8776 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8777 ins_encode %{ 8778 int vector_len = 0; 8779 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8780 %} 8781 ins_pipe( pipe_slow ); 8782 %} 8783 8784 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8785 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8786 match(Set dst (LShiftVS dst shift)); 8787 effect(TEMP src); 8788 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8789 ins_encode %{ 8790 int vector_len = 0; 8791 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8792 %} 8793 ins_pipe( pipe_slow ); 8794 %} 8795 8796 instruct vsll4S(vecD dst, vecS shift) %{ 8797 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8798 match(Set dst (LShiftVS dst shift)); 8799 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8800 ins_encode %{ 8801 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8802 %} 8803 ins_pipe( pipe_slow ); 8804 %} 8805 8806 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8807 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8808 match(Set dst (LShiftVS dst shift)); 8809 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8810 ins_encode %{ 8811 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8812 %} 8813 ins_pipe( pipe_slow ); 8814 %} 8815 8816 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8817 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8818 match(Set dst (LShiftVS src shift)); 8819 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8820 ins_encode %{ 8821 int vector_len = 0; 8822 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8823 %} 8824 ins_pipe( pipe_slow ); 8825 %} 8826 8827 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8828 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8829 match(Set dst (LShiftVS src shift)); 8830 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8831 ins_encode %{ 8832 int vector_len = 0; 8833 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8834 %} 8835 ins_pipe( pipe_slow ); 8836 %} 8837 8838 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8839 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8840 match(Set dst (LShiftVS dst shift)); 8841 effect(TEMP src); 8842 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8843 ins_encode %{ 8844 int vector_len = 0; 8845 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8846 %} 8847 ins_pipe( pipe_slow ); 8848 %} 8849 8850 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8851 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8852 match(Set dst (LShiftVS src shift)); 8853 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8854 ins_encode %{ 8855 int vector_len = 0; 8856 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8857 %} 8858 ins_pipe( pipe_slow ); 8859 %} 8860 8861 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8862 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8863 match(Set dst (LShiftVS src shift)); 8864 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8865 ins_encode %{ 8866 int vector_len = 0; 8867 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8868 %} 8869 ins_pipe( pipe_slow ); 8870 %} 8871 8872 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8873 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8874 match(Set dst (LShiftVS dst shift)); 8875 effect(TEMP src); 8876 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8877 ins_encode %{ 8878 int vector_len = 0; 8879 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8880 %} 8881 ins_pipe( pipe_slow ); 8882 %} 8883 8884 instruct vsll8S(vecX dst, vecS shift) %{ 8885 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8886 match(Set dst (LShiftVS dst shift)); 8887 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8888 ins_encode %{ 8889 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8890 %} 8891 ins_pipe( pipe_slow ); 8892 %} 8893 8894 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8895 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8896 match(Set dst (LShiftVS dst shift)); 8897 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8898 ins_encode %{ 8899 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8900 %} 8901 ins_pipe( pipe_slow ); 8902 %} 8903 8904 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8905 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8906 match(Set dst (LShiftVS src shift)); 8907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8908 ins_encode %{ 8909 int vector_len = 0; 8910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8911 %} 8912 ins_pipe( pipe_slow ); 8913 %} 8914 8915 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8916 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8917 match(Set dst (LShiftVS src shift)); 8918 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8919 ins_encode %{ 8920 int vector_len = 0; 8921 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8922 %} 8923 ins_pipe( pipe_slow ); 8924 %} 8925 8926 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8927 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8928 match(Set dst (LShiftVS dst shift)); 8929 effect(TEMP src); 8930 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8931 ins_encode %{ 8932 int vector_len = 0; 8933 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8934 %} 8935 ins_pipe( pipe_slow ); 8936 %} 8937 8938 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8939 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8940 match(Set dst (LShiftVS src shift)); 8941 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8942 ins_encode %{ 8943 int vector_len = 0; 8944 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8945 %} 8946 ins_pipe( pipe_slow ); 8947 %} 8948 8949 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8950 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8951 match(Set dst (LShiftVS src shift)); 8952 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8953 ins_encode %{ 8954 int vector_len = 0; 8955 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8956 %} 8957 ins_pipe( pipe_slow ); 8958 %} 8959 8960 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8961 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8962 match(Set dst (LShiftVS dst shift)); 8963 effect(TEMP src); 8964 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8965 ins_encode %{ 8966 int vector_len = 0; 8967 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8968 %} 8969 ins_pipe( pipe_slow ); 8970 %} 8971 8972 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8973 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8974 match(Set dst (LShiftVS src shift)); 8975 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8976 ins_encode %{ 8977 int vector_len = 1; 8978 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8979 %} 8980 ins_pipe( pipe_slow ); 8981 %} 8982 8983 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 8984 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8985 match(Set dst (LShiftVS src shift)); 8986 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8987 ins_encode %{ 8988 int vector_len = 1; 8989 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8990 %} 8991 ins_pipe( pipe_slow ); 8992 %} 8993 8994 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 8995 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8996 match(Set dst (LShiftVS dst shift)); 8997 effect(TEMP src); 8998 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8999 ins_encode %{ 9000 int vector_len = 1; 9001 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9002 %} 9003 ins_pipe( pipe_slow ); 9004 %} 9005 9006 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9007 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9008 match(Set dst (LShiftVS src shift)); 9009 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9010 ins_encode %{ 9011 int vector_len = 1; 9012 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9013 %} 9014 ins_pipe( pipe_slow ); 9015 %} 9016 9017 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9018 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9019 match(Set dst (LShiftVS src shift)); 9020 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9021 ins_encode %{ 9022 int vector_len = 1; 9023 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9024 %} 9025 ins_pipe( pipe_slow ); 9026 %} 9027 9028 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9029 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9030 match(Set dst (LShiftVS dst shift)); 9031 effect(TEMP src); 9032 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9033 ins_encode %{ 9034 int vector_len = 1; 9035 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9036 %} 9037 ins_pipe( pipe_slow ); 9038 %} 9039 9040 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9041 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9042 match(Set dst (LShiftVS src shift)); 9043 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9044 ins_encode %{ 9045 int vector_len = 2; 9046 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9047 %} 9048 ins_pipe( pipe_slow ); 9049 %} 9050 9051 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9052 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9053 match(Set dst (LShiftVS src shift)); 9054 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9055 ins_encode %{ 9056 int vector_len = 2; 9057 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9058 %} 9059 ins_pipe( pipe_slow ); 9060 %} 9061 9062 // Integers vector left shift 9063 instruct vsll2I(vecD dst, vecS shift) %{ 9064 predicate(n->as_Vector()->length() == 2); 9065 match(Set dst (LShiftVI dst shift)); 9066 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9067 ins_encode %{ 9068 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9069 %} 9070 ins_pipe( pipe_slow ); 9071 %} 9072 9073 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9074 predicate(n->as_Vector()->length() == 2); 9075 match(Set dst (LShiftVI dst shift)); 9076 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9077 ins_encode %{ 9078 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9079 %} 9080 ins_pipe( pipe_slow ); 9081 %} 9082 9083 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9084 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9085 match(Set dst (LShiftVI src shift)); 9086 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9087 ins_encode %{ 9088 int vector_len = 0; 9089 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9090 %} 9091 ins_pipe( pipe_slow ); 9092 %} 9093 9094 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9095 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9096 match(Set dst (LShiftVI src shift)); 9097 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9098 ins_encode %{ 9099 int vector_len = 0; 9100 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9101 %} 9102 ins_pipe( pipe_slow ); 9103 %} 9104 9105 instruct vsll4I(vecX dst, vecS shift) %{ 9106 predicate(n->as_Vector()->length() == 4); 9107 match(Set dst (LShiftVI dst shift)); 9108 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9109 ins_encode %{ 9110 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9111 %} 9112 ins_pipe( pipe_slow ); 9113 %} 9114 9115 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9116 predicate(n->as_Vector()->length() == 4); 9117 match(Set dst (LShiftVI dst shift)); 9118 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9119 ins_encode %{ 9120 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9126 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9127 match(Set dst (LShiftVI src shift)); 9128 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9129 ins_encode %{ 9130 int vector_len = 0; 9131 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9132 %} 9133 ins_pipe( pipe_slow ); 9134 %} 9135 9136 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9137 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9138 match(Set dst (LShiftVI src shift)); 9139 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9140 ins_encode %{ 9141 int vector_len = 0; 9142 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9143 %} 9144 ins_pipe( pipe_slow ); 9145 %} 9146 9147 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9148 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9149 match(Set dst (LShiftVI src shift)); 9150 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9151 ins_encode %{ 9152 int vector_len = 1; 9153 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9154 %} 9155 ins_pipe( pipe_slow ); 9156 %} 9157 9158 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9159 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9160 match(Set dst (LShiftVI src shift)); 9161 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9162 ins_encode %{ 9163 int vector_len = 1; 9164 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9165 %} 9166 ins_pipe( pipe_slow ); 9167 %} 9168 9169 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9170 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9171 match(Set dst (LShiftVI src shift)); 9172 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9173 ins_encode %{ 9174 int vector_len = 2; 9175 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9176 %} 9177 ins_pipe( pipe_slow ); 9178 %} 9179 9180 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9181 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9182 match(Set dst (LShiftVI src shift)); 9183 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9184 ins_encode %{ 9185 int vector_len = 2; 9186 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9187 %} 9188 ins_pipe( pipe_slow ); 9189 %} 9190 9191 // Longs vector left shift 9192 instruct vsll2L(vecX dst, vecS shift) %{ 9193 predicate(n->as_Vector()->length() == 2); 9194 match(Set dst (LShiftVL dst shift)); 9195 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9196 ins_encode %{ 9197 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9198 %} 9199 ins_pipe( pipe_slow ); 9200 %} 9201 9202 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9203 predicate(n->as_Vector()->length() == 2); 9204 match(Set dst (LShiftVL dst shift)); 9205 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9206 ins_encode %{ 9207 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9208 %} 9209 ins_pipe( pipe_slow ); 9210 %} 9211 9212 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9213 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9214 match(Set dst (LShiftVL src shift)); 9215 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9216 ins_encode %{ 9217 int vector_len = 0; 9218 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9219 %} 9220 ins_pipe( pipe_slow ); 9221 %} 9222 9223 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9224 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9225 match(Set dst (LShiftVL src shift)); 9226 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9227 ins_encode %{ 9228 int vector_len = 0; 9229 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9230 %} 9231 ins_pipe( pipe_slow ); 9232 %} 9233 9234 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9235 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9236 match(Set dst (LShiftVL src shift)); 9237 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9238 ins_encode %{ 9239 int vector_len = 1; 9240 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9241 %} 9242 ins_pipe( pipe_slow ); 9243 %} 9244 9245 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9246 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9247 match(Set dst (LShiftVL src shift)); 9248 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9249 ins_encode %{ 9250 int vector_len = 1; 9251 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9252 %} 9253 ins_pipe( pipe_slow ); 9254 %} 9255 9256 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9257 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9258 match(Set dst (LShiftVL src shift)); 9259 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9260 ins_encode %{ 9261 int vector_len = 2; 9262 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9263 %} 9264 ins_pipe( pipe_slow ); 9265 %} 9266 9267 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9268 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9269 match(Set dst (LShiftVL src shift)); 9270 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9271 ins_encode %{ 9272 int vector_len = 2; 9273 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9274 %} 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 // ----------------------- LogicalRightShift ----------------------------------- 9279 9280 // Shorts vector logical right shift produces incorrect Java result 9281 // for negative data because java code convert short value into int with 9282 // sign extension before a shift. But char vectors are fine since chars are 9283 // unsigned values. 9284 9285 instruct vsrl2S(vecS dst, vecS shift) %{ 9286 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9287 match(Set dst (URShiftVS dst shift)); 9288 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9289 ins_encode %{ 9290 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9291 %} 9292 ins_pipe( pipe_slow ); 9293 %} 9294 9295 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9296 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9297 match(Set dst (URShiftVS dst shift)); 9298 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9299 ins_encode %{ 9300 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9301 %} 9302 ins_pipe( pipe_slow ); 9303 %} 9304 9305 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9306 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9307 match(Set dst (URShiftVS src shift)); 9308 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9309 ins_encode %{ 9310 int vector_len = 0; 9311 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9312 %} 9313 ins_pipe( pipe_slow ); 9314 %} 9315 9316 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9317 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9318 match(Set dst (URShiftVS src shift)); 9319 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9320 ins_encode %{ 9321 int vector_len = 0; 9322 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9323 %} 9324 ins_pipe( pipe_slow ); 9325 %} 9326 9327 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9328 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9329 match(Set dst (URShiftVS dst shift)); 9330 effect(TEMP src); 9331 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9332 ins_encode %{ 9333 int vector_len = 0; 9334 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9335 %} 9336 ins_pipe( pipe_slow ); 9337 %} 9338 9339 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9340 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9341 match(Set dst (URShiftVS src shift)); 9342 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9343 ins_encode %{ 9344 int vector_len = 0; 9345 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9346 %} 9347 ins_pipe( pipe_slow ); 9348 %} 9349 9350 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9351 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9352 match(Set dst (URShiftVS src shift)); 9353 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9354 ins_encode %{ 9355 int vector_len = 0; 9356 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9357 %} 9358 ins_pipe( pipe_slow ); 9359 %} 9360 9361 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9362 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9363 match(Set dst (URShiftVS dst shift)); 9364 effect(TEMP src); 9365 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9366 ins_encode %{ 9367 int vector_len = 0; 9368 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 instruct vsrl4S(vecD dst, vecS shift) %{ 9374 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9375 match(Set dst (URShiftVS dst shift)); 9376 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9377 ins_encode %{ 9378 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9379 %} 9380 ins_pipe( pipe_slow ); 9381 %} 9382 9383 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9384 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9385 match(Set dst (URShiftVS dst shift)); 9386 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9387 ins_encode %{ 9388 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9389 %} 9390 ins_pipe( pipe_slow ); 9391 %} 9392 9393 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9394 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9395 match(Set dst (URShiftVS src shift)); 9396 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9397 ins_encode %{ 9398 int vector_len = 0; 9399 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9400 %} 9401 ins_pipe( pipe_slow ); 9402 %} 9403 9404 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9405 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9406 match(Set dst (URShiftVS src shift)); 9407 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9408 ins_encode %{ 9409 int vector_len = 0; 9410 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9411 %} 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9416 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9417 match(Set dst (URShiftVS dst shift)); 9418 effect(TEMP src); 9419 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9420 ins_encode %{ 9421 int vector_len = 0; 9422 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9423 %} 9424 ins_pipe( pipe_slow ); 9425 %} 9426 9427 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9428 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9429 match(Set dst (URShiftVS src shift)); 9430 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9431 ins_encode %{ 9432 int vector_len = 0; 9433 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9434 %} 9435 ins_pipe( pipe_slow ); 9436 %} 9437 9438 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9439 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9440 match(Set dst (URShiftVS src shift)); 9441 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9442 ins_encode %{ 9443 int vector_len = 0; 9444 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9445 %} 9446 ins_pipe( pipe_slow ); 9447 %} 9448 9449 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9450 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9451 match(Set dst (URShiftVS dst shift)); 9452 effect(TEMP src); 9453 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9454 ins_encode %{ 9455 int vector_len = 0; 9456 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9457 %} 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 instruct vsrl8S(vecX dst, vecS shift) %{ 9462 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9463 match(Set dst (URShiftVS dst shift)); 9464 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9465 ins_encode %{ 9466 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9467 %} 9468 ins_pipe( pipe_slow ); 9469 %} 9470 9471 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9472 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9473 match(Set dst (URShiftVS dst shift)); 9474 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9475 ins_encode %{ 9476 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9477 %} 9478 ins_pipe( pipe_slow ); 9479 %} 9480 9481 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9482 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9483 match(Set dst (URShiftVS src shift)); 9484 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9485 ins_encode %{ 9486 int vector_len = 0; 9487 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9488 %} 9489 ins_pipe( pipe_slow ); 9490 %} 9491 9492 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9493 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9494 match(Set dst (URShiftVS src shift)); 9495 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9496 ins_encode %{ 9497 int vector_len = 0; 9498 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9499 %} 9500 ins_pipe( pipe_slow ); 9501 %} 9502 9503 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9504 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9505 match(Set dst (URShiftVS dst shift)); 9506 effect(TEMP src); 9507 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9508 ins_encode %{ 9509 int vector_len = 0; 9510 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9511 %} 9512 ins_pipe( pipe_slow ); 9513 %} 9514 9515 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9516 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9517 match(Set dst (URShiftVS src shift)); 9518 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9519 ins_encode %{ 9520 int vector_len = 0; 9521 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9522 %} 9523 ins_pipe( pipe_slow ); 9524 %} 9525 9526 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9527 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9528 match(Set dst (URShiftVS src shift)); 9529 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9530 ins_encode %{ 9531 int vector_len = 0; 9532 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9533 %} 9534 ins_pipe( pipe_slow ); 9535 %} 9536 9537 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9538 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9539 match(Set dst (URShiftVS dst shift)); 9540 effect(TEMP src); 9541 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9542 ins_encode %{ 9543 int vector_len = 0; 9544 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9550 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9551 match(Set dst (URShiftVS src shift)); 9552 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9553 ins_encode %{ 9554 int vector_len = 1; 9555 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9556 %} 9557 ins_pipe( pipe_slow ); 9558 %} 9559 9560 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9561 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9562 match(Set dst (URShiftVS src shift)); 9563 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9564 ins_encode %{ 9565 int vector_len = 1; 9566 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9567 %} 9568 ins_pipe( pipe_slow ); 9569 %} 9570 9571 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9572 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9573 match(Set dst (URShiftVS dst shift)); 9574 effect(TEMP src); 9575 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9576 ins_encode %{ 9577 int vector_len = 1; 9578 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9579 %} 9580 ins_pipe( pipe_slow ); 9581 %} 9582 9583 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9584 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9585 match(Set dst (URShiftVS src shift)); 9586 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9587 ins_encode %{ 9588 int vector_len = 1; 9589 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9590 %} 9591 ins_pipe( pipe_slow ); 9592 %} 9593 9594 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9595 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9596 match(Set dst (URShiftVS src shift)); 9597 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9598 ins_encode %{ 9599 int vector_len = 1; 9600 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9606 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9607 match(Set dst (URShiftVS dst shift)); 9608 effect(TEMP src); 9609 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9610 ins_encode %{ 9611 int vector_len = 1; 9612 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9613 %} 9614 ins_pipe( pipe_slow ); 9615 %} 9616 9617 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9618 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9619 match(Set dst (URShiftVS src shift)); 9620 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9621 ins_encode %{ 9622 int vector_len = 2; 9623 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9624 %} 9625 ins_pipe( pipe_slow ); 9626 %} 9627 9628 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9629 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9630 match(Set dst (URShiftVS src shift)); 9631 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9632 ins_encode %{ 9633 int vector_len = 2; 9634 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9635 %} 9636 ins_pipe( pipe_slow ); 9637 %} 9638 9639 // Integers vector logical right shift 9640 instruct vsrl2I(vecD dst, vecS shift) %{ 9641 predicate(n->as_Vector()->length() == 2); 9642 match(Set dst (URShiftVI dst shift)); 9643 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9644 ins_encode %{ 9645 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9646 %} 9647 ins_pipe( pipe_slow ); 9648 %} 9649 9650 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9651 predicate(n->as_Vector()->length() == 2); 9652 match(Set dst (URShiftVI dst shift)); 9653 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9654 ins_encode %{ 9655 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9656 %} 9657 ins_pipe( pipe_slow ); 9658 %} 9659 9660 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9661 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9662 match(Set dst (URShiftVI src shift)); 9663 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9664 ins_encode %{ 9665 int vector_len = 0; 9666 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9667 %} 9668 ins_pipe( pipe_slow ); 9669 %} 9670 9671 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9672 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9673 match(Set dst (URShiftVI src shift)); 9674 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9675 ins_encode %{ 9676 int vector_len = 0; 9677 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9678 %} 9679 ins_pipe( pipe_slow ); 9680 %} 9681 9682 instruct vsrl4I(vecX dst, vecS shift) %{ 9683 predicate(n->as_Vector()->length() == 4); 9684 match(Set dst (URShiftVI dst shift)); 9685 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9686 ins_encode %{ 9687 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9688 %} 9689 ins_pipe( pipe_slow ); 9690 %} 9691 9692 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9693 predicate(n->as_Vector()->length() == 4); 9694 match(Set dst (URShiftVI dst shift)); 9695 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9696 ins_encode %{ 9697 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9698 %} 9699 ins_pipe( pipe_slow ); 9700 %} 9701 9702 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9703 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9704 match(Set dst (URShiftVI src shift)); 9705 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9706 ins_encode %{ 9707 int vector_len = 0; 9708 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9709 %} 9710 ins_pipe( pipe_slow ); 9711 %} 9712 9713 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9714 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9715 match(Set dst (URShiftVI src shift)); 9716 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9717 ins_encode %{ 9718 int vector_len = 0; 9719 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9720 %} 9721 ins_pipe( pipe_slow ); 9722 %} 9723 9724 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9725 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9726 match(Set dst (URShiftVI src shift)); 9727 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9728 ins_encode %{ 9729 int vector_len = 1; 9730 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9731 %} 9732 ins_pipe( pipe_slow ); 9733 %} 9734 9735 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9736 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9737 match(Set dst (URShiftVI src shift)); 9738 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9739 ins_encode %{ 9740 int vector_len = 1; 9741 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9742 %} 9743 ins_pipe( pipe_slow ); 9744 %} 9745 9746 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9747 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9748 match(Set dst (URShiftVI src shift)); 9749 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9750 ins_encode %{ 9751 int vector_len = 2; 9752 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9753 %} 9754 ins_pipe( pipe_slow ); 9755 %} 9756 9757 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9758 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9759 match(Set dst (URShiftVI src shift)); 9760 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9761 ins_encode %{ 9762 int vector_len = 2; 9763 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9764 %} 9765 ins_pipe( pipe_slow ); 9766 %} 9767 9768 // Longs vector logical right shift 9769 instruct vsrl2L(vecX dst, vecS shift) %{ 9770 predicate(n->as_Vector()->length() == 2); 9771 match(Set dst (URShiftVL dst shift)); 9772 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9773 ins_encode %{ 9774 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9775 %} 9776 ins_pipe( pipe_slow ); 9777 %} 9778 9779 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9780 predicate(n->as_Vector()->length() == 2); 9781 match(Set dst (URShiftVL dst shift)); 9782 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9783 ins_encode %{ 9784 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9785 %} 9786 ins_pipe( pipe_slow ); 9787 %} 9788 9789 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9790 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9791 match(Set dst (URShiftVL src shift)); 9792 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9793 ins_encode %{ 9794 int vector_len = 0; 9795 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9796 %} 9797 ins_pipe( pipe_slow ); 9798 %} 9799 9800 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9801 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9802 match(Set dst (URShiftVL src shift)); 9803 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9804 ins_encode %{ 9805 int vector_len = 0; 9806 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9807 %} 9808 ins_pipe( pipe_slow ); 9809 %} 9810 9811 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9812 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9813 match(Set dst (URShiftVL src shift)); 9814 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9815 ins_encode %{ 9816 int vector_len = 1; 9817 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9818 %} 9819 ins_pipe( pipe_slow ); 9820 %} 9821 9822 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9823 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9824 match(Set dst (URShiftVL src shift)); 9825 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9826 ins_encode %{ 9827 int vector_len = 1; 9828 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9829 %} 9830 ins_pipe( pipe_slow ); 9831 %} 9832 9833 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9834 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9835 match(Set dst (URShiftVL src shift)); 9836 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9837 ins_encode %{ 9838 int vector_len = 2; 9839 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9840 %} 9841 ins_pipe( pipe_slow ); 9842 %} 9843 9844 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9845 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9846 match(Set dst (URShiftVL src shift)); 9847 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9848 ins_encode %{ 9849 int vector_len = 2; 9850 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9851 %} 9852 ins_pipe( pipe_slow ); 9853 %} 9854 9855 // ------------------- ArithmeticRightShift ----------------------------------- 9856 9857 // Shorts/Chars vector arithmetic right shift 9858 instruct vsra2S(vecS dst, vecS shift) %{ 9859 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9860 match(Set dst (RShiftVS dst shift)); 9861 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9862 ins_encode %{ 9863 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9864 %} 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9869 predicate(n->as_Vector()->length() == 2); 9870 match(Set dst (RShiftVS dst shift)); 9871 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9872 ins_encode %{ 9873 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9874 %} 9875 ins_pipe( pipe_slow ); 9876 %} 9877 9878 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9879 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9880 match(Set dst (RShiftVS src shift)); 9881 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9882 ins_encode %{ 9883 int vector_len = 0; 9884 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9885 %} 9886 ins_pipe( pipe_slow ); 9887 %} 9888 9889 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9890 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9891 match(Set dst (RShiftVS src shift)); 9892 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9893 ins_encode %{ 9894 int vector_len = 0; 9895 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9896 %} 9897 ins_pipe( pipe_slow ); 9898 %} 9899 9900 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9901 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9902 match(Set dst (RShiftVS dst shift)); 9903 effect(TEMP src); 9904 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9905 ins_encode %{ 9906 int vector_len = 0; 9907 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9908 %} 9909 ins_pipe( pipe_slow ); 9910 %} 9911 9912 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9913 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9914 match(Set dst (RShiftVS src shift)); 9915 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9916 ins_encode %{ 9917 int vector_len = 0; 9918 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9919 %} 9920 ins_pipe( pipe_slow ); 9921 %} 9922 9923 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9924 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9925 match(Set dst (RShiftVS src shift)); 9926 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9927 ins_encode %{ 9928 int vector_len = 0; 9929 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9930 %} 9931 ins_pipe( pipe_slow ); 9932 %} 9933 9934 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9935 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9936 match(Set dst (RShiftVS dst shift)); 9937 effect(TEMP src); 9938 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9939 ins_encode %{ 9940 int vector_len = 0; 9941 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9942 %} 9943 ins_pipe( pipe_slow ); 9944 %} 9945 9946 instruct vsra4S(vecD dst, vecS shift) %{ 9947 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9948 match(Set dst (RShiftVS dst shift)); 9949 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9950 ins_encode %{ 9951 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9952 %} 9953 ins_pipe( pipe_slow ); 9954 %} 9955 9956 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9957 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9958 match(Set dst (RShiftVS dst shift)); 9959 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9960 ins_encode %{ 9961 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9962 %} 9963 ins_pipe( pipe_slow ); 9964 %} 9965 9966 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9967 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9968 match(Set dst (RShiftVS src shift)); 9969 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9970 ins_encode %{ 9971 int vector_len = 0; 9972 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9973 %} 9974 ins_pipe( pipe_slow ); 9975 %} 9976 9977 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9978 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9979 match(Set dst (RShiftVS src shift)); 9980 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9981 ins_encode %{ 9982 int vector_len = 0; 9983 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9984 %} 9985 ins_pipe( pipe_slow ); 9986 %} 9987 9988 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9989 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9990 match(Set dst (RShiftVS dst shift)); 9991 effect(TEMP src); 9992 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9993 ins_encode %{ 9994 int vector_len = 0; 9995 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9996 %} 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10001 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10002 match(Set dst (RShiftVS src shift)); 10003 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10004 ins_encode %{ 10005 int vector_len = 0; 10006 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10007 %} 10008 ins_pipe( pipe_slow ); 10009 %} 10010 10011 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10012 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10013 match(Set dst (RShiftVS src shift)); 10014 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10015 ins_encode %{ 10016 int vector_len = 0; 10017 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10018 %} 10019 ins_pipe( pipe_slow ); 10020 %} 10021 10022 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10023 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10024 match(Set dst (RShiftVS dst shift)); 10025 effect(TEMP src); 10026 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10027 ins_encode %{ 10028 int vector_len = 0; 10029 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10030 %} 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 instruct vsra8S(vecX dst, vecS shift) %{ 10035 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10036 match(Set dst (RShiftVS dst shift)); 10037 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10038 ins_encode %{ 10039 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10040 %} 10041 ins_pipe( pipe_slow ); 10042 %} 10043 10044 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10045 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10046 match(Set dst (RShiftVS dst shift)); 10047 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10048 ins_encode %{ 10049 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10050 %} 10051 ins_pipe( pipe_slow ); 10052 %} 10053 10054 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10055 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10056 match(Set dst (RShiftVS src shift)); 10057 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10058 ins_encode %{ 10059 int vector_len = 0; 10060 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10061 %} 10062 ins_pipe( pipe_slow ); 10063 %} 10064 10065 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10066 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10067 match(Set dst (RShiftVS src shift)); 10068 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10069 ins_encode %{ 10070 int vector_len = 0; 10071 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10072 %} 10073 ins_pipe( pipe_slow ); 10074 %} 10075 10076 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10077 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10078 match(Set dst (RShiftVS dst shift)); 10079 effect(TEMP src); 10080 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10081 ins_encode %{ 10082 int vector_len = 0; 10083 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10084 %} 10085 ins_pipe( pipe_slow ); 10086 %} 10087 10088 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10089 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10090 match(Set dst (RShiftVS src shift)); 10091 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10092 ins_encode %{ 10093 int vector_len = 0; 10094 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10095 %} 10096 ins_pipe( pipe_slow ); 10097 %} 10098 10099 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10100 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10101 match(Set dst (RShiftVS src shift)); 10102 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10103 ins_encode %{ 10104 int vector_len = 0; 10105 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10106 %} 10107 ins_pipe( pipe_slow ); 10108 %} 10109 10110 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10111 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10112 match(Set dst (RShiftVS dst shift)); 10113 effect(TEMP src); 10114 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10115 ins_encode %{ 10116 int vector_len = 0; 10117 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10118 %} 10119 ins_pipe( pipe_slow ); 10120 %} 10121 10122 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10123 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10124 match(Set dst (RShiftVS src shift)); 10125 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10126 ins_encode %{ 10127 int vector_len = 1; 10128 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10129 %} 10130 ins_pipe( pipe_slow ); 10131 %} 10132 10133 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10134 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10135 match(Set dst (RShiftVS src shift)); 10136 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10137 ins_encode %{ 10138 int vector_len = 1; 10139 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10140 %} 10141 ins_pipe( pipe_slow ); 10142 %} 10143 10144 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10145 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10146 match(Set dst (RShiftVS dst shift)); 10147 effect(TEMP src); 10148 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10149 ins_encode %{ 10150 int vector_len = 1; 10151 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10152 %} 10153 ins_pipe( pipe_slow ); 10154 %} 10155 10156 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10157 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10158 match(Set dst (RShiftVS src shift)); 10159 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10160 ins_encode %{ 10161 int vector_len = 1; 10162 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10163 %} 10164 ins_pipe( pipe_slow ); 10165 %} 10166 10167 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10168 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10169 match(Set dst (RShiftVS src shift)); 10170 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10171 ins_encode %{ 10172 int vector_len = 1; 10173 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10174 %} 10175 ins_pipe( pipe_slow ); 10176 %} 10177 10178 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10179 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10180 match(Set dst (RShiftVS dst shift)); 10181 effect(TEMP src); 10182 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10183 ins_encode %{ 10184 int vector_len = 1; 10185 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10186 %} 10187 ins_pipe( pipe_slow ); 10188 %} 10189 10190 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10191 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10192 match(Set dst (RShiftVS src shift)); 10193 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10194 ins_encode %{ 10195 int vector_len = 2; 10196 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10197 %} 10198 ins_pipe( pipe_slow ); 10199 %} 10200 10201 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10202 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10203 match(Set dst (RShiftVS src shift)); 10204 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10205 ins_encode %{ 10206 int vector_len = 2; 10207 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10208 %} 10209 ins_pipe( pipe_slow ); 10210 %} 10211 10212 // Integers vector arithmetic right shift 10213 instruct vsra2I(vecD dst, vecS shift) %{ 10214 predicate(n->as_Vector()->length() == 2); 10215 match(Set dst (RShiftVI dst shift)); 10216 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10217 ins_encode %{ 10218 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10219 %} 10220 ins_pipe( pipe_slow ); 10221 %} 10222 10223 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10224 predicate(n->as_Vector()->length() == 2); 10225 match(Set dst (RShiftVI dst shift)); 10226 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10227 ins_encode %{ 10228 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10229 %} 10230 ins_pipe( pipe_slow ); 10231 %} 10232 10233 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10234 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10235 match(Set dst (RShiftVI src shift)); 10236 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10237 ins_encode %{ 10238 int vector_len = 0; 10239 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10240 %} 10241 ins_pipe( pipe_slow ); 10242 %} 10243 10244 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10245 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10246 match(Set dst (RShiftVI src shift)); 10247 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10248 ins_encode %{ 10249 int vector_len = 0; 10250 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10251 %} 10252 ins_pipe( pipe_slow ); 10253 %} 10254 10255 instruct vsra4I(vecX dst, vecS shift) %{ 10256 predicate(n->as_Vector()->length() == 4); 10257 match(Set dst (RShiftVI dst shift)); 10258 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10259 ins_encode %{ 10260 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10261 %} 10262 ins_pipe( pipe_slow ); 10263 %} 10264 10265 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10266 predicate(n->as_Vector()->length() == 4); 10267 match(Set dst (RShiftVI dst shift)); 10268 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10269 ins_encode %{ 10270 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10271 %} 10272 ins_pipe( pipe_slow ); 10273 %} 10274 10275 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10276 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10277 match(Set dst (RShiftVI src shift)); 10278 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10279 ins_encode %{ 10280 int vector_len = 0; 10281 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10282 %} 10283 ins_pipe( pipe_slow ); 10284 %} 10285 10286 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10287 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10288 match(Set dst (RShiftVI src shift)); 10289 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10290 ins_encode %{ 10291 int vector_len = 0; 10292 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10293 %} 10294 ins_pipe( pipe_slow ); 10295 %} 10296 10297 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10298 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10299 match(Set dst (RShiftVI src shift)); 10300 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10301 ins_encode %{ 10302 int vector_len = 1; 10303 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10304 %} 10305 ins_pipe( pipe_slow ); 10306 %} 10307 10308 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10309 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10310 match(Set dst (RShiftVI src shift)); 10311 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10312 ins_encode %{ 10313 int vector_len = 1; 10314 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10315 %} 10316 ins_pipe( pipe_slow ); 10317 %} 10318 10319 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10320 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10321 match(Set dst (RShiftVI src shift)); 10322 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10323 ins_encode %{ 10324 int vector_len = 2; 10325 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10326 %} 10327 ins_pipe( pipe_slow ); 10328 %} 10329 10330 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10331 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10332 match(Set dst (RShiftVI src shift)); 10333 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10334 ins_encode %{ 10335 int vector_len = 2; 10336 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10337 %} 10338 ins_pipe( pipe_slow ); 10339 %} 10340 10341 // There are no longs vector arithmetic right shift instructions. 10342 10343 10344 // --------------------------------- AND -------------------------------------- 10345 10346 instruct vand4B(vecS dst, vecS src) %{ 10347 predicate(n->as_Vector()->length_in_bytes() == 4); 10348 match(Set dst (AndV dst src)); 10349 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10350 ins_encode %{ 10351 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10352 %} 10353 ins_pipe( pipe_slow ); 10354 %} 10355 10356 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10357 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10358 match(Set dst (AndV src1 src2)); 10359 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10360 ins_encode %{ 10361 int vector_len = 0; 10362 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10363 %} 10364 ins_pipe( pipe_slow ); 10365 %} 10366 10367 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10368 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10369 match(Set dst (AndV src (LoadVector mem))); 10370 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10371 ins_encode %{ 10372 int vector_len = 0; 10373 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10374 %} 10375 ins_pipe( pipe_slow ); 10376 %} 10377 10378 instruct vand8B(vecD dst, vecD src) %{ 10379 predicate(n->as_Vector()->length_in_bytes() == 8); 10380 match(Set dst (AndV dst src)); 10381 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10382 ins_encode %{ 10383 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10384 %} 10385 ins_pipe( pipe_slow ); 10386 %} 10387 10388 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10389 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10390 match(Set dst (AndV src1 src2)); 10391 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10392 ins_encode %{ 10393 int vector_len = 0; 10394 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10395 %} 10396 ins_pipe( pipe_slow ); 10397 %} 10398 10399 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10400 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10401 match(Set dst (AndV src (LoadVector mem))); 10402 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10403 ins_encode %{ 10404 int vector_len = 0; 10405 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10406 %} 10407 ins_pipe( pipe_slow ); 10408 %} 10409 10410 instruct vand16B(vecX dst, vecX src) %{ 10411 predicate(n->as_Vector()->length_in_bytes() == 16); 10412 match(Set dst (AndV dst src)); 10413 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10414 ins_encode %{ 10415 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10416 %} 10417 ins_pipe( pipe_slow ); 10418 %} 10419 10420 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10421 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10422 match(Set dst (AndV src1 src2)); 10423 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10424 ins_encode %{ 10425 int vector_len = 0; 10426 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10427 %} 10428 ins_pipe( pipe_slow ); 10429 %} 10430 10431 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10432 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10433 match(Set dst (AndV src (LoadVector mem))); 10434 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10435 ins_encode %{ 10436 int vector_len = 0; 10437 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10438 %} 10439 ins_pipe( pipe_slow ); 10440 %} 10441 10442 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10443 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10444 match(Set dst (AndV src1 src2)); 10445 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10446 ins_encode %{ 10447 int vector_len = 1; 10448 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10449 %} 10450 ins_pipe( pipe_slow ); 10451 %} 10452 10453 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10454 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10455 match(Set dst (AndV src (LoadVector mem))); 10456 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10457 ins_encode %{ 10458 int vector_len = 1; 10459 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10460 %} 10461 ins_pipe( pipe_slow ); 10462 %} 10463 10464 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10465 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10466 match(Set dst (AndV src1 src2)); 10467 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10468 ins_encode %{ 10469 int vector_len = 2; 10470 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10471 %} 10472 ins_pipe( pipe_slow ); 10473 %} 10474 10475 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10476 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10477 match(Set dst (AndV src (LoadVector mem))); 10478 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10479 ins_encode %{ 10480 int vector_len = 2; 10481 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10482 %} 10483 ins_pipe( pipe_slow ); 10484 %} 10485 10486 // --------------------------------- OR --------------------------------------- 10487 10488 instruct vor4B(vecS dst, vecS src) %{ 10489 predicate(n->as_Vector()->length_in_bytes() == 4); 10490 match(Set dst (OrV dst src)); 10491 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10492 ins_encode %{ 10493 __ por($dst$$XMMRegister, $src$$XMMRegister); 10494 %} 10495 ins_pipe( pipe_slow ); 10496 %} 10497 10498 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10499 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10500 match(Set dst (OrV src1 src2)); 10501 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10502 ins_encode %{ 10503 int vector_len = 0; 10504 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10505 %} 10506 ins_pipe( pipe_slow ); 10507 %} 10508 10509 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10510 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10511 match(Set dst (OrV src (LoadVector mem))); 10512 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10513 ins_encode %{ 10514 int vector_len = 0; 10515 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10516 %} 10517 ins_pipe( pipe_slow ); 10518 %} 10519 10520 instruct vor8B(vecD dst, vecD src) %{ 10521 predicate(n->as_Vector()->length_in_bytes() == 8); 10522 match(Set dst (OrV dst src)); 10523 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10524 ins_encode %{ 10525 __ por($dst$$XMMRegister, $src$$XMMRegister); 10526 %} 10527 ins_pipe( pipe_slow ); 10528 %} 10529 10530 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10531 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10532 match(Set dst (OrV src1 src2)); 10533 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10534 ins_encode %{ 10535 int vector_len = 0; 10536 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10537 %} 10538 ins_pipe( pipe_slow ); 10539 %} 10540 10541 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10542 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10543 match(Set dst (OrV src (LoadVector mem))); 10544 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10545 ins_encode %{ 10546 int vector_len = 0; 10547 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10548 %} 10549 ins_pipe( pipe_slow ); 10550 %} 10551 10552 instruct vor16B(vecX dst, vecX src) %{ 10553 predicate(n->as_Vector()->length_in_bytes() == 16); 10554 match(Set dst (OrV dst src)); 10555 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10556 ins_encode %{ 10557 __ por($dst$$XMMRegister, $src$$XMMRegister); 10558 %} 10559 ins_pipe( pipe_slow ); 10560 %} 10561 10562 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10563 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10564 match(Set dst (OrV src1 src2)); 10565 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10566 ins_encode %{ 10567 int vector_len = 0; 10568 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10569 %} 10570 ins_pipe( pipe_slow ); 10571 %} 10572 10573 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10574 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10575 match(Set dst (OrV src (LoadVector mem))); 10576 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10577 ins_encode %{ 10578 int vector_len = 0; 10579 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10580 %} 10581 ins_pipe( pipe_slow ); 10582 %} 10583 10584 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10585 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10586 match(Set dst (OrV src1 src2)); 10587 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10588 ins_encode %{ 10589 int vector_len = 1; 10590 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10591 %} 10592 ins_pipe( pipe_slow ); 10593 %} 10594 10595 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10596 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10597 match(Set dst (OrV src (LoadVector mem))); 10598 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10599 ins_encode %{ 10600 int vector_len = 1; 10601 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10602 %} 10603 ins_pipe( pipe_slow ); 10604 %} 10605 10606 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10607 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10608 match(Set dst (OrV src1 src2)); 10609 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10610 ins_encode %{ 10611 int vector_len = 2; 10612 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10613 %} 10614 ins_pipe( pipe_slow ); 10615 %} 10616 10617 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10618 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10619 match(Set dst (OrV src (LoadVector mem))); 10620 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10621 ins_encode %{ 10622 int vector_len = 2; 10623 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10624 %} 10625 ins_pipe( pipe_slow ); 10626 %} 10627 10628 // --------------------------------- XOR -------------------------------------- 10629 10630 instruct vxor4B(vecS dst, vecS src) %{ 10631 predicate(n->as_Vector()->length_in_bytes() == 4); 10632 match(Set dst (XorV dst src)); 10633 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10634 ins_encode %{ 10635 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10636 %} 10637 ins_pipe( pipe_slow ); 10638 %} 10639 10640 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10641 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10642 match(Set dst (XorV src1 src2)); 10643 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10644 ins_encode %{ 10645 int vector_len = 0; 10646 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10647 %} 10648 ins_pipe( pipe_slow ); 10649 %} 10650 10651 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10652 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10653 match(Set dst (XorV src (LoadVector mem))); 10654 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10655 ins_encode %{ 10656 int vector_len = 0; 10657 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10658 %} 10659 ins_pipe( pipe_slow ); 10660 %} 10661 10662 instruct vxor8B(vecD dst, vecD src) %{ 10663 predicate(n->as_Vector()->length_in_bytes() == 8); 10664 match(Set dst (XorV dst src)); 10665 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10666 ins_encode %{ 10667 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10668 %} 10669 ins_pipe( pipe_slow ); 10670 %} 10671 10672 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10673 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10674 match(Set dst (XorV src1 src2)); 10675 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10676 ins_encode %{ 10677 int vector_len = 0; 10678 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10679 %} 10680 ins_pipe( pipe_slow ); 10681 %} 10682 10683 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10684 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10685 match(Set dst (XorV src (LoadVector mem))); 10686 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10687 ins_encode %{ 10688 int vector_len = 0; 10689 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10690 %} 10691 ins_pipe( pipe_slow ); 10692 %} 10693 10694 instruct vxor16B(vecX dst, vecX src) %{ 10695 predicate(n->as_Vector()->length_in_bytes() == 16); 10696 match(Set dst (XorV dst src)); 10697 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10698 ins_encode %{ 10699 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10700 %} 10701 ins_pipe( pipe_slow ); 10702 %} 10703 10704 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10705 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10706 match(Set dst (XorV src1 src2)); 10707 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10708 ins_encode %{ 10709 int vector_len = 0; 10710 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10711 %} 10712 ins_pipe( pipe_slow ); 10713 %} 10714 10715 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10716 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10717 match(Set dst (XorV src (LoadVector mem))); 10718 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10719 ins_encode %{ 10720 int vector_len = 0; 10721 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10722 %} 10723 ins_pipe( pipe_slow ); 10724 %} 10725 10726 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10727 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10728 match(Set dst (XorV src1 src2)); 10729 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10730 ins_encode %{ 10731 int vector_len = 1; 10732 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10733 %} 10734 ins_pipe( pipe_slow ); 10735 %} 10736 10737 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10738 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10739 match(Set dst (XorV src (LoadVector mem))); 10740 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10741 ins_encode %{ 10742 int vector_len = 1; 10743 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10744 %} 10745 ins_pipe( pipe_slow ); 10746 %} 10747 10748 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10749 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10750 match(Set dst (XorV src1 src2)); 10751 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10752 ins_encode %{ 10753 int vector_len = 2; 10754 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10755 %} 10756 ins_pipe( pipe_slow ); 10757 %} 10758 10759 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10760 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10761 match(Set dst (XorV src (LoadVector mem))); 10762 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10763 ins_encode %{ 10764 int vector_len = 2; 10765 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10766 %} 10767 ins_pipe( pipe_slow ); 10768 %} 10769 10770 // --------------------------------- FMA -------------------------------------- 10771 10772 // a * b + c 10773 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 10774 predicate(UseFMA && n->as_Vector()->length() == 2); 10775 match(Set c (FmaVD c (Binary a b))); 10776 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10777 ins_cost(150); 10778 ins_encode %{ 10779 int vector_len = 0; 10780 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10781 %} 10782 ins_pipe( pipe_slow ); 10783 %} 10784 10785 // a * b + c 10786 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 10787 predicate(UseFMA && n->as_Vector()->length() == 2); 10788 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10789 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10790 ins_cost(150); 10791 ins_encode %{ 10792 int vector_len = 0; 10793 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10794 %} 10795 ins_pipe( pipe_slow ); 10796 %} 10797 10798 10799 // a * b + c 10800 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 10801 predicate(UseFMA && n->as_Vector()->length() == 4); 10802 match(Set c (FmaVD c (Binary a b))); 10803 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10804 ins_cost(150); 10805 ins_encode %{ 10806 int vector_len = 1; 10807 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10808 %} 10809 ins_pipe( pipe_slow ); 10810 %} 10811 10812 // a * b + c 10813 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 10814 predicate(UseFMA && n->as_Vector()->length() == 4); 10815 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10816 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10817 ins_cost(150); 10818 ins_encode %{ 10819 int vector_len = 1; 10820 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10821 %} 10822 ins_pipe( pipe_slow ); 10823 %} 10824 10825 // a * b + c 10826 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 10827 predicate(UseFMA && n->as_Vector()->length() == 8); 10828 match(Set c (FmaVD c (Binary a b))); 10829 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10830 ins_cost(150); 10831 ins_encode %{ 10832 int vector_len = 2; 10833 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10834 %} 10835 ins_pipe( pipe_slow ); 10836 %} 10837 10838 // a * b + c 10839 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 10840 predicate(UseFMA && n->as_Vector()->length() == 8); 10841 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10842 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10843 ins_cost(150); 10844 ins_encode %{ 10845 int vector_len = 2; 10846 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10847 %} 10848 ins_pipe( pipe_slow ); 10849 %} 10850 10851 // a * b + c 10852 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 10853 predicate(UseFMA && n->as_Vector()->length() == 4); 10854 match(Set c (FmaVF c (Binary a b))); 10855 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10856 ins_cost(150); 10857 ins_encode %{ 10858 int vector_len = 0; 10859 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10860 %} 10861 ins_pipe( pipe_slow ); 10862 %} 10863 10864 // a * b + c 10865 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 10866 predicate(UseFMA && n->as_Vector()->length() == 4); 10867 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10868 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10869 ins_cost(150); 10870 ins_encode %{ 10871 int vector_len = 0; 10872 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10873 %} 10874 ins_pipe( pipe_slow ); 10875 %} 10876 10877 // a * b + c 10878 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 10879 predicate(UseFMA && n->as_Vector()->length() == 8); 10880 match(Set c (FmaVF c (Binary a b))); 10881 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10882 ins_cost(150); 10883 ins_encode %{ 10884 int vector_len = 1; 10885 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10886 %} 10887 ins_pipe( pipe_slow ); 10888 %} 10889 10890 // a * b + c 10891 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 10892 predicate(UseFMA && n->as_Vector()->length() == 8); 10893 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10894 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10895 ins_cost(150); 10896 ins_encode %{ 10897 int vector_len = 1; 10898 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10899 %} 10900 ins_pipe( pipe_slow ); 10901 %} 10902 10903 // a * b + c 10904 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 10905 predicate(UseFMA && n->as_Vector()->length() == 16); 10906 match(Set c (FmaVF c (Binary a b))); 10907 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10908 ins_cost(150); 10909 ins_encode %{ 10910 int vector_len = 2; 10911 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10912 %} 10913 ins_pipe( pipe_slow ); 10914 %} 10915 10916 // a * b + c 10917 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 10918 predicate(UseFMA && n->as_Vector()->length() == 16); 10919 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10920 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10921 ins_cost(150); 10922 ins_encode %{ 10923 int vector_len = 2; 10924 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10925 %} 10926 ins_pipe( pipe_slow ); 10927 %} 10928 10929 // --------------------------------- PopCount -------------------------------------- 10930 10931 instruct vpopcount2I(vecD dst, vecD src) %{ 10932 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 10933 match(Set dst (PopCountVI src)); 10934 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 10935 ins_encode %{ 10936 int vector_len = 0; 10937 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10938 %} 10939 ins_pipe( pipe_slow ); 10940 %} 10941 10942 instruct vpopcount4I(vecX dst, vecX src) %{ 10943 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 10944 match(Set dst (PopCountVI src)); 10945 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 10946 ins_encode %{ 10947 int vector_len = 0; 10948 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10949 %} 10950 ins_pipe( pipe_slow ); 10951 %} 10952 10953 instruct vpopcount8I(vecY dst, vecY src) %{ 10954 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 10955 match(Set dst (PopCountVI src)); 10956 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 10957 ins_encode %{ 10958 int vector_len = 1; 10959 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10960 %} 10961 ins_pipe( pipe_slow ); 10962 %} 10963 10964 instruct vpopcount16I(vecZ dst, vecZ src) %{ 10965 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 10966 match(Set dst (PopCountVI src)); 10967 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 10968 ins_encode %{ 10969 int vector_len = 2; 10970 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10971 %} 10972 ins_pipe( pipe_slow ); 10973 %}