1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 %} 1071 1072 1073 //----------SOURCE BLOCK------------------------------------------------------- 1074 // This is a block of C++ code which provides values, functions, and 1075 // definitions necessary in the rest of the architecture description 1076 1077 source_hpp %{ 1078 // Header information of the source block. 1079 // Method declarations/definitions which are used outside 1080 // the ad-scope can conveniently be defined here. 1081 // 1082 // To keep related declarations/definitions/uses close together, 1083 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1084 1085 class NativeJump; 1086 1087 class CallStubImpl { 1088 1089 //-------------------------------------------------------------- 1090 //---< Used for optimization in Compile::shorten_branches >--- 1091 //-------------------------------------------------------------- 1092 1093 public: 1094 // Size of call trampoline stub. 1095 static uint size_call_trampoline() { 1096 return 0; // no call trampolines on this platform 1097 } 1098 1099 // number of relocations needed by a call trampoline stub 1100 static uint reloc_call_trampoline() { 1101 return 0; // no call trampolines on this platform 1102 } 1103 }; 1104 1105 class HandlerImpl { 1106 1107 public: 1108 1109 static int emit_exception_handler(CodeBuffer &cbuf); 1110 static int emit_deopt_handler(CodeBuffer& cbuf); 1111 1112 static uint size_exception_handler() { 1113 // NativeCall instruction size is the same as NativeJump. 1114 // exception handler starts out as jump and can be patched to 1115 // a call be deoptimization. (4932387) 1116 // Note that this value is also credited (in output.cpp) to 1117 // the size of the code section. 1118 return NativeJump::instruction_size; 1119 } 1120 1121 #ifdef _LP64 1122 static uint size_deopt_handler() { 1123 // three 5 byte instructions 1124 return 15; 1125 } 1126 #else 1127 static uint size_deopt_handler() { 1128 // NativeCall instruction size is the same as NativeJump. 1129 // exception handler starts out as jump and can be patched to 1130 // a call be deoptimization. (4932387) 1131 // Note that this value is also credited (in output.cpp) to 1132 // the size of the code section. 1133 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1134 } 1135 #endif 1136 }; 1137 1138 %} // end source_hpp 1139 1140 source %{ 1141 1142 #include "opto/addnode.hpp" 1143 1144 // Emit exception handler code. 1145 // Stuff framesize into a register and call a VM stub routine. 1146 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1147 1148 // Note that the code buffer's insts_mark is always relative to insts. 1149 // That's why we must use the macroassembler to generate a handler. 1150 MacroAssembler _masm(&cbuf); 1151 address base = __ start_a_stub(size_exception_handler()); 1152 if (base == NULL) { 1153 ciEnv::current()->record_failure("CodeCache is full"); 1154 return 0; // CodeBuffer::expand failed 1155 } 1156 int offset = __ offset(); 1157 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1158 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1159 __ end_a_stub(); 1160 return offset; 1161 } 1162 1163 // Emit deopt handler code. 1164 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1165 1166 // Note that the code buffer's insts_mark is always relative to insts. 1167 // That's why we must use the macroassembler to generate a handler. 1168 MacroAssembler _masm(&cbuf); 1169 address base = __ start_a_stub(size_deopt_handler()); 1170 if (base == NULL) { 1171 ciEnv::current()->record_failure("CodeCache is full"); 1172 return 0; // CodeBuffer::expand failed 1173 } 1174 int offset = __ offset(); 1175 1176 #ifdef _LP64 1177 address the_pc = (address) __ pc(); 1178 Label next; 1179 // push a "the_pc" on the stack without destroying any registers 1180 // as they all may be live. 1181 1182 // push address of "next" 1183 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1184 __ bind(next); 1185 // adjust it so it matches "the_pc" 1186 __ subptr(Address(rsp, 0), __ offset() - offset); 1187 #else 1188 InternalAddress here(__ pc()); 1189 __ pushptr(here.addr()); 1190 #endif 1191 1192 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1193 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1194 __ end_a_stub(); 1195 return offset; 1196 } 1197 1198 1199 //============================================================================= 1200 1201 // Float masks come from different places depending on platform. 1202 #ifdef _LP64 1203 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1204 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1205 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1206 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1207 #else 1208 static address float_signmask() { return (address)float_signmask_pool; } 1209 static address float_signflip() { return (address)float_signflip_pool; } 1210 static address double_signmask() { return (address)double_signmask_pool; } 1211 static address double_signflip() { return (address)double_signflip_pool; } 1212 #endif 1213 1214 1215 const bool Matcher::match_rule_supported(int opcode) { 1216 if (!has_match_rule(opcode)) 1217 return false; 1218 1219 bool ret_value = true; 1220 switch (opcode) { 1221 case Op_PopCountI: 1222 case Op_PopCountL: 1223 if (!UsePopCountInstruction) 1224 ret_value = false; 1225 break; 1226 case Op_MulVI: 1227 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1228 ret_value = false; 1229 break; 1230 case Op_MulVL: 1231 case Op_MulReductionVL: 1232 if (VM_Version::supports_avx512dq() == false) 1233 ret_value = false; 1234 break; 1235 case Op_AddReductionVL: 1236 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1237 ret_value = false; 1238 break; 1239 case Op_AddReductionVI: 1240 if (UseSSE < 3) // requires at least SSE3 1241 ret_value = false; 1242 break; 1243 case Op_MulReductionVI: 1244 if (UseSSE < 4) // requires at least SSE4 1245 ret_value = false; 1246 break; 1247 case Op_AddReductionVF: 1248 case Op_AddReductionVD: 1249 case Op_MulReductionVF: 1250 case Op_MulReductionVD: 1251 if (UseSSE < 1) // requires at least SSE 1252 ret_value = false; 1253 break; 1254 case Op_SqrtVD: 1255 case Op_SqrtVF: 1256 if (UseAVX < 1) // enabled for AVX only 1257 ret_value = false; 1258 break; 1259 case Op_CompareAndSwapL: 1260 #ifdef _LP64 1261 case Op_CompareAndSwapP: 1262 #endif 1263 if (!VM_Version::supports_cx8()) 1264 ret_value = false; 1265 break; 1266 case Op_CMoveVF: 1267 case Op_CMoveVD: 1268 if (UseAVX < 1 || UseAVX > 2) 1269 ret_value = false; 1270 break; 1271 case Op_StrIndexOf: 1272 if (!UseSSE42Intrinsics) 1273 ret_value = false; 1274 break; 1275 case Op_StrIndexOfChar: 1276 if (!UseSSE42Intrinsics) 1277 ret_value = false; 1278 break; 1279 case Op_OnSpinWait: 1280 if (VM_Version::supports_on_spin_wait() == false) 1281 ret_value = false; 1282 break; 1283 } 1284 1285 return ret_value; // Per default match rules are supported. 1286 } 1287 1288 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1289 // identify extra cases that we might want to provide match rules for 1290 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1291 bool ret_value = match_rule_supported(opcode); 1292 if (ret_value) { 1293 switch (opcode) { 1294 case Op_AddVB: 1295 case Op_SubVB: 1296 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1297 ret_value = false; 1298 break; 1299 case Op_URShiftVS: 1300 case Op_RShiftVS: 1301 case Op_LShiftVS: 1302 case Op_MulVS: 1303 case Op_AddVS: 1304 case Op_SubVS: 1305 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1306 ret_value = false; 1307 break; 1308 case Op_CMoveVF: 1309 if (vlen != 8) 1310 ret_value = false; 1311 case Op_CMoveVD: 1312 if (vlen != 4) 1313 ret_value = false; 1314 break; 1315 } 1316 } 1317 1318 return ret_value; // Per default match rules are supported. 1319 } 1320 1321 const bool Matcher::has_predicated_vectors(void) { 1322 bool ret_value = false; 1323 if (UseAVX > 2) { 1324 ret_value = VM_Version::supports_avx512vl(); 1325 } 1326 1327 return ret_value; 1328 } 1329 1330 const int Matcher::float_pressure(int default_pressure_threshold) { 1331 int float_pressure_threshold = default_pressure_threshold; 1332 #ifdef _LP64 1333 if (UseAVX > 2) { 1334 // Increase pressure threshold on machines with AVX3 which have 1335 // 2x more XMM registers. 1336 float_pressure_threshold = default_pressure_threshold * 2; 1337 } 1338 #endif 1339 return float_pressure_threshold; 1340 } 1341 1342 // Max vector size in bytes. 0 if not supported. 1343 const int Matcher::vector_width_in_bytes(BasicType bt) { 1344 assert(is_java_primitive(bt), "only primitive type vectors"); 1345 if (UseSSE < 2) return 0; 1346 // SSE2 supports 128bit vectors for all types. 1347 // AVX2 supports 256bit vectors for all types. 1348 // AVX2/EVEX supports 512bit vectors for all types. 1349 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1350 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1351 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1352 size = (UseAVX > 2) ? 64 : 32; 1353 // Use flag to limit vector size. 1354 size = MIN2(size,(int)MaxVectorSize); 1355 // Minimum 2 values in vector (or 4 for bytes). 1356 switch (bt) { 1357 case T_DOUBLE: 1358 case T_LONG: 1359 if (size < 16) return 0; 1360 break; 1361 case T_FLOAT: 1362 case T_INT: 1363 if (size < 8) return 0; 1364 break; 1365 case T_BOOLEAN: 1366 if (size < 4) return 0; 1367 break; 1368 case T_CHAR: 1369 if (size < 4) return 0; 1370 break; 1371 case T_BYTE: 1372 if (size < 4) return 0; 1373 break; 1374 case T_SHORT: 1375 if (size < 4) return 0; 1376 break; 1377 default: 1378 ShouldNotReachHere(); 1379 } 1380 return size; 1381 } 1382 1383 // Limits on vector size (number of elements) loaded into vector. 1384 const int Matcher::max_vector_size(const BasicType bt) { 1385 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1386 } 1387 const int Matcher::min_vector_size(const BasicType bt) { 1388 int max_size = max_vector_size(bt); 1389 // Min size which can be loaded into vector is 4 bytes. 1390 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1391 return MIN2(size,max_size); 1392 } 1393 1394 // Vector ideal reg corresponding to specidied size in bytes 1395 const uint Matcher::vector_ideal_reg(int size) { 1396 assert(MaxVectorSize >= size, ""); 1397 switch(size) { 1398 case 4: return Op_VecS; 1399 case 8: return Op_VecD; 1400 case 16: return Op_VecX; 1401 case 32: return Op_VecY; 1402 case 64: return Op_VecZ; 1403 } 1404 ShouldNotReachHere(); 1405 return 0; 1406 } 1407 1408 // Only lowest bits of xmm reg are used for vector shift count. 1409 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1410 return Op_VecS; 1411 } 1412 1413 // x86 supports misaligned vectors store/load. 1414 const bool Matcher::misaligned_vectors_ok() { 1415 return !AlignVector; // can be changed by flag 1416 } 1417 1418 // x86 AES instructions are compatible with SunJCE expanded 1419 // keys, hence we do not need to pass the original key to stubs 1420 const bool Matcher::pass_original_key_for_aes() { 1421 return false; 1422 } 1423 1424 1425 const bool Matcher::convi2l_type_required = true; 1426 1427 // Check for shift by small constant as well 1428 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1429 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1430 shift->in(2)->get_int() <= 3 && 1431 // Are there other uses besides address expressions? 1432 !matcher->is_visited(shift)) { 1433 address_visited.set(shift->_idx); // Flag as address_visited 1434 mstack.push(shift->in(2), Matcher::Visit); 1435 Node *conv = shift->in(1); 1436 #ifdef _LP64 1437 // Allow Matcher to match the rule which bypass 1438 // ConvI2L operation for an array index on LP64 1439 // if the index value is positive. 1440 if (conv->Opcode() == Op_ConvI2L && 1441 conv->as_Type()->type()->is_long()->_lo >= 0 && 1442 // Are there other uses besides address expressions? 1443 !matcher->is_visited(conv)) { 1444 address_visited.set(conv->_idx); // Flag as address_visited 1445 mstack.push(conv->in(1), Matcher::Pre_Visit); 1446 } else 1447 #endif 1448 mstack.push(conv, Matcher::Pre_Visit); 1449 return true; 1450 } 1451 return false; 1452 } 1453 1454 // Should the Matcher clone shifts on addressing modes, expecting them 1455 // to be subsumed into complex addressing expressions or compute them 1456 // into registers? 1457 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1458 Node *off = m->in(AddPNode::Offset); 1459 if (off->is_Con()) { 1460 address_visited.test_set(m->_idx); // Flag as address_visited 1461 Node *adr = m->in(AddPNode::Address); 1462 1463 // Intel can handle 2 adds in addressing mode 1464 // AtomicAdd is not an addressing expression. 1465 // Cheap to find it by looking for screwy base. 1466 if (adr->is_AddP() && 1467 !adr->in(AddPNode::Base)->is_top() && 1468 // Are there other uses besides address expressions? 1469 !is_visited(adr)) { 1470 address_visited.set(adr->_idx); // Flag as address_visited 1471 Node *shift = adr->in(AddPNode::Offset); 1472 if (!clone_shift(shift, this, mstack, address_visited)) { 1473 mstack.push(shift, Pre_Visit); 1474 } 1475 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1476 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1477 } else { 1478 mstack.push(adr, Pre_Visit); 1479 } 1480 1481 // Clone X+offset as it also folds into most addressing expressions 1482 mstack.push(off, Visit); 1483 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1484 return true; 1485 } else if (clone_shift(off, this, mstack, address_visited)) { 1486 address_visited.test_set(m->_idx); // Flag as address_visited 1487 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1488 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1489 return true; 1490 } 1491 return false; 1492 } 1493 1494 void Compile::reshape_address(AddPNode* addp) { 1495 } 1496 1497 // Helper methods for MachSpillCopyNode::implementation(). 1498 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1499 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1500 // In 64-bit VM size calculation is very complex. Emitting instructions 1501 // into scratch buffer is used to get size in 64-bit VM. 1502 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1503 assert(ireg == Op_VecS || // 32bit vector 1504 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1505 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1506 "no non-adjacent vector moves" ); 1507 if (cbuf) { 1508 MacroAssembler _masm(cbuf); 1509 int offset = __ offset(); 1510 switch (ireg) { 1511 case Op_VecS: // copy whole register 1512 case Op_VecD: 1513 case Op_VecX: 1514 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1515 break; 1516 case Op_VecY: 1517 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1518 break; 1519 case Op_VecZ: 1520 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1521 break; 1522 default: 1523 ShouldNotReachHere(); 1524 } 1525 int size = __ offset() - offset; 1526 #ifdef ASSERT 1527 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1528 assert(!do_size || size == 4, "incorrect size calculattion"); 1529 #endif 1530 return size; 1531 #ifndef PRODUCT 1532 } else if (!do_size) { 1533 switch (ireg) { 1534 case Op_VecS: 1535 case Op_VecD: 1536 case Op_VecX: 1537 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1538 break; 1539 case Op_VecY: 1540 case Op_VecZ: 1541 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1542 break; 1543 default: 1544 ShouldNotReachHere(); 1545 } 1546 #endif 1547 } 1548 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1549 return (UseAVX > 2) ? 6 : 4; 1550 } 1551 1552 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1553 int stack_offset, int reg, uint ireg, outputStream* st) { 1554 // In 64-bit VM size calculation is very complex. Emitting instructions 1555 // into scratch buffer is used to get size in 64-bit VM. 1556 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1557 if (cbuf) { 1558 MacroAssembler _masm(cbuf); 1559 int offset = __ offset(); 1560 if (is_load) { 1561 switch (ireg) { 1562 case Op_VecS: 1563 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1564 break; 1565 case Op_VecD: 1566 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1567 break; 1568 case Op_VecX: 1569 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1570 break; 1571 case Op_VecY: 1572 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1573 break; 1574 case Op_VecZ: 1575 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1576 break; 1577 default: 1578 ShouldNotReachHere(); 1579 } 1580 } else { // store 1581 switch (ireg) { 1582 case Op_VecS: 1583 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1584 break; 1585 case Op_VecD: 1586 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1587 break; 1588 case Op_VecX: 1589 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1590 break; 1591 case Op_VecY: 1592 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1593 break; 1594 case Op_VecZ: 1595 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1596 break; 1597 default: 1598 ShouldNotReachHere(); 1599 } 1600 } 1601 int size = __ offset() - offset; 1602 #ifdef ASSERT 1603 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1604 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1605 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1606 #endif 1607 return size; 1608 #ifndef PRODUCT 1609 } else if (!do_size) { 1610 if (is_load) { 1611 switch (ireg) { 1612 case Op_VecS: 1613 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1614 break; 1615 case Op_VecD: 1616 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1617 break; 1618 case Op_VecX: 1619 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1620 break; 1621 case Op_VecY: 1622 case Op_VecZ: 1623 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1624 break; 1625 default: 1626 ShouldNotReachHere(); 1627 } 1628 } else { // store 1629 switch (ireg) { 1630 case Op_VecS: 1631 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1632 break; 1633 case Op_VecD: 1634 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1635 break; 1636 case Op_VecX: 1637 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1638 break; 1639 case Op_VecY: 1640 case Op_VecZ: 1641 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1642 break; 1643 default: 1644 ShouldNotReachHere(); 1645 } 1646 } 1647 #endif 1648 } 1649 bool is_single_byte = false; 1650 int vec_len = 0; 1651 if ((UseAVX > 2) && (stack_offset != 0)) { 1652 int tuple_type = Assembler::EVEX_FVM; 1653 int input_size = Assembler::EVEX_32bit; 1654 switch (ireg) { 1655 case Op_VecS: 1656 tuple_type = Assembler::EVEX_T1S; 1657 break; 1658 case Op_VecD: 1659 tuple_type = Assembler::EVEX_T1S; 1660 input_size = Assembler::EVEX_64bit; 1661 break; 1662 case Op_VecX: 1663 break; 1664 case Op_VecY: 1665 vec_len = 1; 1666 break; 1667 case Op_VecZ: 1668 vec_len = 2; 1669 break; 1670 } 1671 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1672 } 1673 int offset_size = 0; 1674 int size = 5; 1675 if (UseAVX > 2 ) { 1676 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1677 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1678 size += 2; // Need an additional two bytes for EVEX encoding 1679 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1680 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1681 } else { 1682 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1683 size += 2; // Need an additional two bytes for EVEX encodding 1684 } 1685 } else { 1686 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1687 } 1688 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1689 return size+offset_size; 1690 } 1691 1692 static inline jint replicate4_imm(int con, int width) { 1693 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1694 assert(width == 1 || width == 2, "only byte or short types here"); 1695 int bit_width = width * 8; 1696 jint val = con; 1697 val &= (1 << bit_width) - 1; // mask off sign bits 1698 while(bit_width < 32) { 1699 val |= (val << bit_width); 1700 bit_width <<= 1; 1701 } 1702 return val; 1703 } 1704 1705 static inline jlong replicate8_imm(int con, int width) { 1706 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1707 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1708 int bit_width = width * 8; 1709 jlong val = con; 1710 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1711 while(bit_width < 64) { 1712 val |= (val << bit_width); 1713 bit_width <<= 1; 1714 } 1715 return val; 1716 } 1717 1718 #ifndef PRODUCT 1719 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1720 st->print("nop \t# %d bytes pad for loops and calls", _count); 1721 } 1722 #endif 1723 1724 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1725 MacroAssembler _masm(&cbuf); 1726 __ nop(_count); 1727 } 1728 1729 uint MachNopNode::size(PhaseRegAlloc*) const { 1730 return _count; 1731 } 1732 1733 #ifndef PRODUCT 1734 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1735 st->print("# breakpoint"); 1736 } 1737 #endif 1738 1739 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1740 MacroAssembler _masm(&cbuf); 1741 __ int3(); 1742 } 1743 1744 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1745 return MachNode::size(ra_); 1746 } 1747 1748 %} 1749 1750 encode %{ 1751 1752 enc_class call_epilog %{ 1753 if (VerifyStackAtCalls) { 1754 // Check that stack depth is unchanged: find majik cookie on stack 1755 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1756 MacroAssembler _masm(&cbuf); 1757 Label L; 1758 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1759 __ jccb(Assembler::equal, L); 1760 // Die if stack mismatch 1761 __ int3(); 1762 __ bind(L); 1763 } 1764 %} 1765 1766 %} 1767 1768 1769 //----------OPERANDS----------------------------------------------------------- 1770 // Operand definitions must precede instruction definitions for correct parsing 1771 // in the ADLC because operands constitute user defined types which are used in 1772 // instruction definitions. 1773 1774 // This one generically applies only for evex, so only one version 1775 operand vecZ() %{ 1776 constraint(ALLOC_IN_RC(vectorz_reg)); 1777 match(VecZ); 1778 1779 format %{ %} 1780 interface(REG_INTER); 1781 %} 1782 1783 // Comparison Code for FP conditional move 1784 operand cmpOp_vcmppd() %{ 1785 match(Bool); 1786 1787 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1788 n->as_Bool()->_test._test != BoolTest::no_overflow); 1789 format %{ "" %} 1790 interface(COND_INTER) %{ 1791 equal (0x0, "eq"); 1792 less (0x1, "lt"); 1793 less_equal (0x2, "le"); 1794 not_equal (0xC, "ne"); 1795 greater_equal(0xD, "ge"); 1796 greater (0xE, "gt"); 1797 //TODO cannot compile (adlc breaks) without two next lines with error: 1798 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1799 // equal' for overflow. 1800 overflow (0x20, "o"); // not really supported by the instruction 1801 no_overflow (0x21, "no"); // not really supported by the instruction 1802 %} 1803 %} 1804 1805 1806 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1807 1808 // ============================================================================ 1809 1810 instruct ShouldNotReachHere() %{ 1811 match(Halt); 1812 format %{ "ud2\t# ShouldNotReachHere" %} 1813 ins_encode %{ 1814 __ ud2(); 1815 %} 1816 ins_pipe(pipe_slow); 1817 %} 1818 1819 // =================================EVEX special=============================== 1820 1821 instruct setMask(rRegI dst, rRegI src) %{ 1822 predicate(Matcher::has_predicated_vectors()); 1823 match(Set dst (SetVectMaskI src)); 1824 effect(TEMP dst); 1825 format %{ "setvectmask $dst, $src" %} 1826 ins_encode %{ 1827 __ setvectmask($dst$$Register, $src$$Register); 1828 %} 1829 ins_pipe(pipe_slow); 1830 %} 1831 1832 // ============================================================================ 1833 1834 instruct addF_reg(regF dst, regF src) %{ 1835 predicate((UseSSE>=1) && (UseAVX == 0)); 1836 match(Set dst (AddF dst src)); 1837 1838 format %{ "addss $dst, $src" %} 1839 ins_cost(150); 1840 ins_encode %{ 1841 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1842 %} 1843 ins_pipe(pipe_slow); 1844 %} 1845 1846 instruct addF_mem(regF dst, memory src) %{ 1847 predicate((UseSSE>=1) && (UseAVX == 0)); 1848 match(Set dst (AddF dst (LoadF src))); 1849 1850 format %{ "addss $dst, $src" %} 1851 ins_cost(150); 1852 ins_encode %{ 1853 __ addss($dst$$XMMRegister, $src$$Address); 1854 %} 1855 ins_pipe(pipe_slow); 1856 %} 1857 1858 instruct addF_imm(regF dst, immF con) %{ 1859 predicate((UseSSE>=1) && (UseAVX == 0)); 1860 match(Set dst (AddF dst con)); 1861 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1862 ins_cost(150); 1863 ins_encode %{ 1864 __ addss($dst$$XMMRegister, $constantaddress($con)); 1865 %} 1866 ins_pipe(pipe_slow); 1867 %} 1868 1869 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1870 predicate(UseAVX > 0); 1871 match(Set dst (AddF src1 src2)); 1872 1873 format %{ "vaddss $dst, $src1, $src2" %} 1874 ins_cost(150); 1875 ins_encode %{ 1876 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1877 %} 1878 ins_pipe(pipe_slow); 1879 %} 1880 1881 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1882 predicate(UseAVX > 0); 1883 match(Set dst (AddF src1 (LoadF src2))); 1884 1885 format %{ "vaddss $dst, $src1, $src2" %} 1886 ins_cost(150); 1887 ins_encode %{ 1888 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1889 %} 1890 ins_pipe(pipe_slow); 1891 %} 1892 1893 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1894 predicate(UseAVX > 0); 1895 match(Set dst (AddF src con)); 1896 1897 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1898 ins_cost(150); 1899 ins_encode %{ 1900 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1901 %} 1902 ins_pipe(pipe_slow); 1903 %} 1904 1905 instruct addD_reg(regD dst, regD src) %{ 1906 predicate((UseSSE>=2) && (UseAVX == 0)); 1907 match(Set dst (AddD dst src)); 1908 1909 format %{ "addsd $dst, $src" %} 1910 ins_cost(150); 1911 ins_encode %{ 1912 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1913 %} 1914 ins_pipe(pipe_slow); 1915 %} 1916 1917 instruct addD_mem(regD dst, memory src) %{ 1918 predicate((UseSSE>=2) && (UseAVX == 0)); 1919 match(Set dst (AddD dst (LoadD src))); 1920 1921 format %{ "addsd $dst, $src" %} 1922 ins_cost(150); 1923 ins_encode %{ 1924 __ addsd($dst$$XMMRegister, $src$$Address); 1925 %} 1926 ins_pipe(pipe_slow); 1927 %} 1928 1929 instruct addD_imm(regD dst, immD con) %{ 1930 predicate((UseSSE>=2) && (UseAVX == 0)); 1931 match(Set dst (AddD dst con)); 1932 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1933 ins_cost(150); 1934 ins_encode %{ 1935 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1936 %} 1937 ins_pipe(pipe_slow); 1938 %} 1939 1940 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1941 predicate(UseAVX > 0); 1942 match(Set dst (AddD src1 src2)); 1943 1944 format %{ "vaddsd $dst, $src1, $src2" %} 1945 ins_cost(150); 1946 ins_encode %{ 1947 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1948 %} 1949 ins_pipe(pipe_slow); 1950 %} 1951 1952 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1953 predicate(UseAVX > 0); 1954 match(Set dst (AddD src1 (LoadD src2))); 1955 1956 format %{ "vaddsd $dst, $src1, $src2" %} 1957 ins_cost(150); 1958 ins_encode %{ 1959 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1960 %} 1961 ins_pipe(pipe_slow); 1962 %} 1963 1964 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1965 predicate(UseAVX > 0); 1966 match(Set dst (AddD src con)); 1967 1968 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1969 ins_cost(150); 1970 ins_encode %{ 1971 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1972 %} 1973 ins_pipe(pipe_slow); 1974 %} 1975 1976 instruct subF_reg(regF dst, regF src) %{ 1977 predicate((UseSSE>=1) && (UseAVX == 0)); 1978 match(Set dst (SubF dst src)); 1979 1980 format %{ "subss $dst, $src" %} 1981 ins_cost(150); 1982 ins_encode %{ 1983 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1984 %} 1985 ins_pipe(pipe_slow); 1986 %} 1987 1988 instruct subF_mem(regF dst, memory src) %{ 1989 predicate((UseSSE>=1) && (UseAVX == 0)); 1990 match(Set dst (SubF dst (LoadF src))); 1991 1992 format %{ "subss $dst, $src" %} 1993 ins_cost(150); 1994 ins_encode %{ 1995 __ subss($dst$$XMMRegister, $src$$Address); 1996 %} 1997 ins_pipe(pipe_slow); 1998 %} 1999 2000 instruct subF_imm(regF dst, immF con) %{ 2001 predicate((UseSSE>=1) && (UseAVX == 0)); 2002 match(Set dst (SubF dst con)); 2003 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2004 ins_cost(150); 2005 ins_encode %{ 2006 __ subss($dst$$XMMRegister, $constantaddress($con)); 2007 %} 2008 ins_pipe(pipe_slow); 2009 %} 2010 2011 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2012 predicate(UseAVX > 0); 2013 match(Set dst (SubF src1 src2)); 2014 2015 format %{ "vsubss $dst, $src1, $src2" %} 2016 ins_cost(150); 2017 ins_encode %{ 2018 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2019 %} 2020 ins_pipe(pipe_slow); 2021 %} 2022 2023 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2024 predicate(UseAVX > 0); 2025 match(Set dst (SubF src1 (LoadF src2))); 2026 2027 format %{ "vsubss $dst, $src1, $src2" %} 2028 ins_cost(150); 2029 ins_encode %{ 2030 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2031 %} 2032 ins_pipe(pipe_slow); 2033 %} 2034 2035 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2036 predicate(UseAVX > 0); 2037 match(Set dst (SubF src con)); 2038 2039 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2040 ins_cost(150); 2041 ins_encode %{ 2042 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2043 %} 2044 ins_pipe(pipe_slow); 2045 %} 2046 2047 instruct subD_reg(regD dst, regD src) %{ 2048 predicate((UseSSE>=2) && (UseAVX == 0)); 2049 match(Set dst (SubD dst src)); 2050 2051 format %{ "subsd $dst, $src" %} 2052 ins_cost(150); 2053 ins_encode %{ 2054 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2055 %} 2056 ins_pipe(pipe_slow); 2057 %} 2058 2059 instruct subD_mem(regD dst, memory src) %{ 2060 predicate((UseSSE>=2) && (UseAVX == 0)); 2061 match(Set dst (SubD dst (LoadD src))); 2062 2063 format %{ "subsd $dst, $src" %} 2064 ins_cost(150); 2065 ins_encode %{ 2066 __ subsd($dst$$XMMRegister, $src$$Address); 2067 %} 2068 ins_pipe(pipe_slow); 2069 %} 2070 2071 instruct subD_imm(regD dst, immD con) %{ 2072 predicate((UseSSE>=2) && (UseAVX == 0)); 2073 match(Set dst (SubD dst con)); 2074 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2075 ins_cost(150); 2076 ins_encode %{ 2077 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2078 %} 2079 ins_pipe(pipe_slow); 2080 %} 2081 2082 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2083 predicate(UseAVX > 0); 2084 match(Set dst (SubD src1 src2)); 2085 2086 format %{ "vsubsd $dst, $src1, $src2" %} 2087 ins_cost(150); 2088 ins_encode %{ 2089 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2090 %} 2091 ins_pipe(pipe_slow); 2092 %} 2093 2094 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2095 predicate(UseAVX > 0); 2096 match(Set dst (SubD src1 (LoadD src2))); 2097 2098 format %{ "vsubsd $dst, $src1, $src2" %} 2099 ins_cost(150); 2100 ins_encode %{ 2101 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2102 %} 2103 ins_pipe(pipe_slow); 2104 %} 2105 2106 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2107 predicate(UseAVX > 0); 2108 match(Set dst (SubD src con)); 2109 2110 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2111 ins_cost(150); 2112 ins_encode %{ 2113 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2114 %} 2115 ins_pipe(pipe_slow); 2116 %} 2117 2118 instruct mulF_reg(regF dst, regF src) %{ 2119 predicate((UseSSE>=1) && (UseAVX == 0)); 2120 match(Set dst (MulF dst src)); 2121 2122 format %{ "mulss $dst, $src" %} 2123 ins_cost(150); 2124 ins_encode %{ 2125 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2126 %} 2127 ins_pipe(pipe_slow); 2128 %} 2129 2130 instruct mulF_mem(regF dst, memory src) %{ 2131 predicate((UseSSE>=1) && (UseAVX == 0)); 2132 match(Set dst (MulF dst (LoadF src))); 2133 2134 format %{ "mulss $dst, $src" %} 2135 ins_cost(150); 2136 ins_encode %{ 2137 __ mulss($dst$$XMMRegister, $src$$Address); 2138 %} 2139 ins_pipe(pipe_slow); 2140 %} 2141 2142 instruct mulF_imm(regF dst, immF con) %{ 2143 predicate((UseSSE>=1) && (UseAVX == 0)); 2144 match(Set dst (MulF dst con)); 2145 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2146 ins_cost(150); 2147 ins_encode %{ 2148 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2149 %} 2150 ins_pipe(pipe_slow); 2151 %} 2152 2153 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2154 predicate(UseAVX > 0); 2155 match(Set dst (MulF src1 src2)); 2156 2157 format %{ "vmulss $dst, $src1, $src2" %} 2158 ins_cost(150); 2159 ins_encode %{ 2160 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2161 %} 2162 ins_pipe(pipe_slow); 2163 %} 2164 2165 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2166 predicate(UseAVX > 0); 2167 match(Set dst (MulF src1 (LoadF src2))); 2168 2169 format %{ "vmulss $dst, $src1, $src2" %} 2170 ins_cost(150); 2171 ins_encode %{ 2172 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2173 %} 2174 ins_pipe(pipe_slow); 2175 %} 2176 2177 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2178 predicate(UseAVX > 0); 2179 match(Set dst (MulF src con)); 2180 2181 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2182 ins_cost(150); 2183 ins_encode %{ 2184 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2185 %} 2186 ins_pipe(pipe_slow); 2187 %} 2188 2189 instruct mulD_reg(regD dst, regD src) %{ 2190 predicate((UseSSE>=2) && (UseAVX == 0)); 2191 match(Set dst (MulD dst src)); 2192 2193 format %{ "mulsd $dst, $src" %} 2194 ins_cost(150); 2195 ins_encode %{ 2196 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2197 %} 2198 ins_pipe(pipe_slow); 2199 %} 2200 2201 instruct mulD_mem(regD dst, memory src) %{ 2202 predicate((UseSSE>=2) && (UseAVX == 0)); 2203 match(Set dst (MulD dst (LoadD src))); 2204 2205 format %{ "mulsd $dst, $src" %} 2206 ins_cost(150); 2207 ins_encode %{ 2208 __ mulsd($dst$$XMMRegister, $src$$Address); 2209 %} 2210 ins_pipe(pipe_slow); 2211 %} 2212 2213 instruct mulD_imm(regD dst, immD con) %{ 2214 predicate((UseSSE>=2) && (UseAVX == 0)); 2215 match(Set dst (MulD dst con)); 2216 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2217 ins_cost(150); 2218 ins_encode %{ 2219 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2220 %} 2221 ins_pipe(pipe_slow); 2222 %} 2223 2224 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2225 predicate(UseAVX > 0); 2226 match(Set dst (MulD src1 src2)); 2227 2228 format %{ "vmulsd $dst, $src1, $src2" %} 2229 ins_cost(150); 2230 ins_encode %{ 2231 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2232 %} 2233 ins_pipe(pipe_slow); 2234 %} 2235 2236 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2237 predicate(UseAVX > 0); 2238 match(Set dst (MulD src1 (LoadD src2))); 2239 2240 format %{ "vmulsd $dst, $src1, $src2" %} 2241 ins_cost(150); 2242 ins_encode %{ 2243 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2244 %} 2245 ins_pipe(pipe_slow); 2246 %} 2247 2248 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2249 predicate(UseAVX > 0); 2250 match(Set dst (MulD src con)); 2251 2252 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2253 ins_cost(150); 2254 ins_encode %{ 2255 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2256 %} 2257 ins_pipe(pipe_slow); 2258 %} 2259 2260 instruct divF_reg(regF dst, regF src) %{ 2261 predicate((UseSSE>=1) && (UseAVX == 0)); 2262 match(Set dst (DivF dst src)); 2263 2264 format %{ "divss $dst, $src" %} 2265 ins_cost(150); 2266 ins_encode %{ 2267 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2268 %} 2269 ins_pipe(pipe_slow); 2270 %} 2271 2272 instruct divF_mem(regF dst, memory src) %{ 2273 predicate((UseSSE>=1) && (UseAVX == 0)); 2274 match(Set dst (DivF dst (LoadF src))); 2275 2276 format %{ "divss $dst, $src" %} 2277 ins_cost(150); 2278 ins_encode %{ 2279 __ divss($dst$$XMMRegister, $src$$Address); 2280 %} 2281 ins_pipe(pipe_slow); 2282 %} 2283 2284 instruct divF_imm(regF dst, immF con) %{ 2285 predicate((UseSSE>=1) && (UseAVX == 0)); 2286 match(Set dst (DivF dst con)); 2287 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2288 ins_cost(150); 2289 ins_encode %{ 2290 __ divss($dst$$XMMRegister, $constantaddress($con)); 2291 %} 2292 ins_pipe(pipe_slow); 2293 %} 2294 2295 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2296 predicate(UseAVX > 0); 2297 match(Set dst (DivF src1 src2)); 2298 2299 format %{ "vdivss $dst, $src1, $src2" %} 2300 ins_cost(150); 2301 ins_encode %{ 2302 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2303 %} 2304 ins_pipe(pipe_slow); 2305 %} 2306 2307 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2308 predicate(UseAVX > 0); 2309 match(Set dst (DivF src1 (LoadF src2))); 2310 2311 format %{ "vdivss $dst, $src1, $src2" %} 2312 ins_cost(150); 2313 ins_encode %{ 2314 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2315 %} 2316 ins_pipe(pipe_slow); 2317 %} 2318 2319 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2320 predicate(UseAVX > 0); 2321 match(Set dst (DivF src con)); 2322 2323 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2324 ins_cost(150); 2325 ins_encode %{ 2326 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2327 %} 2328 ins_pipe(pipe_slow); 2329 %} 2330 2331 instruct divD_reg(regD dst, regD src) %{ 2332 predicate((UseSSE>=2) && (UseAVX == 0)); 2333 match(Set dst (DivD dst src)); 2334 2335 format %{ "divsd $dst, $src" %} 2336 ins_cost(150); 2337 ins_encode %{ 2338 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2339 %} 2340 ins_pipe(pipe_slow); 2341 %} 2342 2343 instruct divD_mem(regD dst, memory src) %{ 2344 predicate((UseSSE>=2) && (UseAVX == 0)); 2345 match(Set dst (DivD dst (LoadD src))); 2346 2347 format %{ "divsd $dst, $src" %} 2348 ins_cost(150); 2349 ins_encode %{ 2350 __ divsd($dst$$XMMRegister, $src$$Address); 2351 %} 2352 ins_pipe(pipe_slow); 2353 %} 2354 2355 instruct divD_imm(regD dst, immD con) %{ 2356 predicate((UseSSE>=2) && (UseAVX == 0)); 2357 match(Set dst (DivD dst con)); 2358 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2359 ins_cost(150); 2360 ins_encode %{ 2361 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2362 %} 2363 ins_pipe(pipe_slow); 2364 %} 2365 2366 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2367 predicate(UseAVX > 0); 2368 match(Set dst (DivD src1 src2)); 2369 2370 format %{ "vdivsd $dst, $src1, $src2" %} 2371 ins_cost(150); 2372 ins_encode %{ 2373 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2374 %} 2375 ins_pipe(pipe_slow); 2376 %} 2377 2378 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2379 predicate(UseAVX > 0); 2380 match(Set dst (DivD src1 (LoadD src2))); 2381 2382 format %{ "vdivsd $dst, $src1, $src2" %} 2383 ins_cost(150); 2384 ins_encode %{ 2385 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2386 %} 2387 ins_pipe(pipe_slow); 2388 %} 2389 2390 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2391 predicate(UseAVX > 0); 2392 match(Set dst (DivD src con)); 2393 2394 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2395 ins_cost(150); 2396 ins_encode %{ 2397 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2398 %} 2399 ins_pipe(pipe_slow); 2400 %} 2401 2402 instruct absF_reg(regF dst) %{ 2403 predicate((UseSSE>=1) && (UseAVX == 0)); 2404 match(Set dst (AbsF dst)); 2405 ins_cost(150); 2406 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2407 ins_encode %{ 2408 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2409 %} 2410 ins_pipe(pipe_slow); 2411 %} 2412 2413 instruct absF_reg_reg(regF dst, regF src) %{ 2414 predicate(VM_Version::supports_avxonly()); 2415 match(Set dst (AbsF src)); 2416 ins_cost(150); 2417 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2418 ins_encode %{ 2419 int vector_len = 0; 2420 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2421 ExternalAddress(float_signmask()), vector_len); 2422 %} 2423 ins_pipe(pipe_slow); 2424 %} 2425 2426 #ifdef _LP64 2427 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2428 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2429 match(Set dst (AbsF src)); 2430 ins_cost(150); 2431 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2432 ins_encode %{ 2433 int vector_len = 0; 2434 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2435 ExternalAddress(float_signmask()), vector_len); 2436 %} 2437 ins_pipe(pipe_slow); 2438 %} 2439 2440 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2441 predicate(VM_Version::supports_avx512novl()); 2442 match(Set dst (AbsF src1)); 2443 effect(TEMP src2); 2444 ins_cost(150); 2445 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2446 ins_encode %{ 2447 int vector_len = 0; 2448 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2449 ExternalAddress(float_signmask()), vector_len); 2450 %} 2451 ins_pipe(pipe_slow); 2452 %} 2453 #else // _LP64 2454 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2455 predicate(UseAVX > 2); 2456 match(Set dst (AbsF src)); 2457 ins_cost(150); 2458 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2459 ins_encode %{ 2460 int vector_len = 0; 2461 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2462 ExternalAddress(float_signmask()), vector_len); 2463 %} 2464 ins_pipe(pipe_slow); 2465 %} 2466 #endif 2467 2468 instruct absD_reg(regD dst) %{ 2469 predicate((UseSSE>=2) && (UseAVX == 0)); 2470 match(Set dst (AbsD dst)); 2471 ins_cost(150); 2472 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2473 "# abs double by sign masking" %} 2474 ins_encode %{ 2475 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2476 %} 2477 ins_pipe(pipe_slow); 2478 %} 2479 2480 instruct absD_reg_reg(regD dst, regD src) %{ 2481 predicate(VM_Version::supports_avxonly()); 2482 match(Set dst (AbsD src)); 2483 ins_cost(150); 2484 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2485 "# abs double by sign masking" %} 2486 ins_encode %{ 2487 int vector_len = 0; 2488 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2489 ExternalAddress(double_signmask()), vector_len); 2490 %} 2491 ins_pipe(pipe_slow); 2492 %} 2493 2494 #ifdef _LP64 2495 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2496 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2497 match(Set dst (AbsD src)); 2498 ins_cost(150); 2499 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2500 "# abs double by sign masking" %} 2501 ins_encode %{ 2502 int vector_len = 0; 2503 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2504 ExternalAddress(double_signmask()), vector_len); 2505 %} 2506 ins_pipe(pipe_slow); 2507 %} 2508 2509 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2510 predicate(VM_Version::supports_avx512novl()); 2511 match(Set dst (AbsD src1)); 2512 effect(TEMP src2); 2513 ins_cost(150); 2514 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2515 ins_encode %{ 2516 int vector_len = 0; 2517 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2518 ExternalAddress(double_signmask()), vector_len); 2519 %} 2520 ins_pipe(pipe_slow); 2521 %} 2522 #else // _LP64 2523 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2524 predicate(UseAVX > 2); 2525 match(Set dst (AbsD src)); 2526 ins_cost(150); 2527 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2528 "# abs double by sign masking" %} 2529 ins_encode %{ 2530 int vector_len = 0; 2531 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2532 ExternalAddress(double_signmask()), vector_len); 2533 %} 2534 ins_pipe(pipe_slow); 2535 %} 2536 #endif 2537 2538 instruct negF_reg(regF dst) %{ 2539 predicate((UseSSE>=1) && (UseAVX == 0)); 2540 match(Set dst (NegF dst)); 2541 ins_cost(150); 2542 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2543 ins_encode %{ 2544 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2545 %} 2546 ins_pipe(pipe_slow); 2547 %} 2548 2549 instruct negF_reg_reg(regF dst, regF src) %{ 2550 predicate(UseAVX > 0); 2551 match(Set dst (NegF src)); 2552 ins_cost(150); 2553 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2554 ins_encode %{ 2555 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2556 ExternalAddress(float_signflip())); 2557 %} 2558 ins_pipe(pipe_slow); 2559 %} 2560 2561 instruct negD_reg(regD dst) %{ 2562 predicate((UseSSE>=2) && (UseAVX == 0)); 2563 match(Set dst (NegD dst)); 2564 ins_cost(150); 2565 format %{ "xorpd $dst, [0x8000000000000000]\t" 2566 "# neg double by sign flipping" %} 2567 ins_encode %{ 2568 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2569 %} 2570 ins_pipe(pipe_slow); 2571 %} 2572 2573 instruct negD_reg_reg(regD dst, regD src) %{ 2574 predicate(UseAVX > 0); 2575 match(Set dst (NegD src)); 2576 ins_cost(150); 2577 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2578 "# neg double by sign flipping" %} 2579 ins_encode %{ 2580 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2581 ExternalAddress(double_signflip())); 2582 %} 2583 ins_pipe(pipe_slow); 2584 %} 2585 2586 instruct sqrtF_reg(regF dst, regF src) %{ 2587 predicate(UseSSE>=1); 2588 match(Set dst (SqrtF src)); 2589 2590 format %{ "sqrtss $dst, $src" %} 2591 ins_cost(150); 2592 ins_encode %{ 2593 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2594 %} 2595 ins_pipe(pipe_slow); 2596 %} 2597 2598 instruct sqrtF_mem(regF dst, memory src) %{ 2599 predicate(UseSSE>=1); 2600 match(Set dst (SqrtF (LoadF src))); 2601 2602 format %{ "sqrtss $dst, $src" %} 2603 ins_cost(150); 2604 ins_encode %{ 2605 __ sqrtss($dst$$XMMRegister, $src$$Address); 2606 %} 2607 ins_pipe(pipe_slow); 2608 %} 2609 2610 instruct sqrtF_imm(regF dst, immF con) %{ 2611 predicate(UseSSE>=1); 2612 match(Set dst (SqrtF con)); 2613 2614 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2615 ins_cost(150); 2616 ins_encode %{ 2617 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2618 %} 2619 ins_pipe(pipe_slow); 2620 %} 2621 2622 instruct sqrtD_reg(regD dst, regD src) %{ 2623 predicate(UseSSE>=2); 2624 match(Set dst (SqrtD src)); 2625 2626 format %{ "sqrtsd $dst, $src" %} 2627 ins_cost(150); 2628 ins_encode %{ 2629 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2630 %} 2631 ins_pipe(pipe_slow); 2632 %} 2633 2634 instruct sqrtD_mem(regD dst, memory src) %{ 2635 predicate(UseSSE>=2); 2636 match(Set dst (SqrtD (LoadD src))); 2637 2638 format %{ "sqrtsd $dst, $src" %} 2639 ins_cost(150); 2640 ins_encode %{ 2641 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2642 %} 2643 ins_pipe(pipe_slow); 2644 %} 2645 2646 instruct sqrtD_imm(regD dst, immD con) %{ 2647 predicate(UseSSE>=2); 2648 match(Set dst (SqrtD con)); 2649 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2650 ins_cost(150); 2651 ins_encode %{ 2652 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2653 %} 2654 ins_pipe(pipe_slow); 2655 %} 2656 2657 instruct onspinwait() %{ 2658 match(OnSpinWait); 2659 ins_cost(200); 2660 2661 format %{ 2662 $$template 2663 if (os::is_MP()) { 2664 $$emit$$"pause\t! membar_onspinwait" 2665 } else { 2666 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2667 } 2668 %} 2669 ins_encode %{ 2670 __ pause(); 2671 %} 2672 ins_pipe(pipe_slow); 2673 %} 2674 2675 // a * b + c 2676 instruct fmaD_reg(regD a, regD b, regD c) %{ 2677 predicate(UseFMA); 2678 match(Set c (FmaD c (Binary a b))); 2679 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2680 ins_cost(150); 2681 ins_encode %{ 2682 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2683 %} 2684 ins_pipe( pipe_slow ); 2685 %} 2686 2687 // a * b + c 2688 instruct fmaF_reg(regF a, regF b, regF c) %{ 2689 predicate(UseFMA); 2690 match(Set c (FmaF c (Binary a b))); 2691 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2692 ins_cost(150); 2693 ins_encode %{ 2694 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2695 %} 2696 ins_pipe( pipe_slow ); 2697 %} 2698 2699 // ====================VECTOR INSTRUCTIONS===================================== 2700 2701 // Load vectors (4 bytes long) 2702 instruct loadV4(vecS dst, memory mem) %{ 2703 predicate(n->as_LoadVector()->memory_size() == 4); 2704 match(Set dst (LoadVector mem)); 2705 ins_cost(125); 2706 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2707 ins_encode %{ 2708 __ movdl($dst$$XMMRegister, $mem$$Address); 2709 %} 2710 ins_pipe( pipe_slow ); 2711 %} 2712 2713 // Load vectors (8 bytes long) 2714 instruct loadV8(vecD dst, memory mem) %{ 2715 predicate(n->as_LoadVector()->memory_size() == 8); 2716 match(Set dst (LoadVector mem)); 2717 ins_cost(125); 2718 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2719 ins_encode %{ 2720 __ movq($dst$$XMMRegister, $mem$$Address); 2721 %} 2722 ins_pipe( pipe_slow ); 2723 %} 2724 2725 // Load vectors (16 bytes long) 2726 instruct loadV16(vecX dst, memory mem) %{ 2727 predicate(n->as_LoadVector()->memory_size() == 16); 2728 match(Set dst (LoadVector mem)); 2729 ins_cost(125); 2730 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2731 ins_encode %{ 2732 __ movdqu($dst$$XMMRegister, $mem$$Address); 2733 %} 2734 ins_pipe( pipe_slow ); 2735 %} 2736 2737 // Load vectors (32 bytes long) 2738 instruct loadV32(vecY dst, memory mem) %{ 2739 predicate(n->as_LoadVector()->memory_size() == 32); 2740 match(Set dst (LoadVector mem)); 2741 ins_cost(125); 2742 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2743 ins_encode %{ 2744 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2745 %} 2746 ins_pipe( pipe_slow ); 2747 %} 2748 2749 // Load vectors (64 bytes long) 2750 instruct loadV64_dword(vecZ dst, memory mem) %{ 2751 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2752 match(Set dst (LoadVector mem)); 2753 ins_cost(125); 2754 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2755 ins_encode %{ 2756 int vector_len = 2; 2757 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2758 %} 2759 ins_pipe( pipe_slow ); 2760 %} 2761 2762 // Load vectors (64 bytes long) 2763 instruct loadV64_qword(vecZ dst, memory mem) %{ 2764 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 2765 match(Set dst (LoadVector mem)); 2766 ins_cost(125); 2767 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 2768 ins_encode %{ 2769 int vector_len = 2; 2770 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 2771 %} 2772 ins_pipe( pipe_slow ); 2773 %} 2774 2775 // Store vectors 2776 instruct storeV4(memory mem, vecS src) %{ 2777 predicate(n->as_StoreVector()->memory_size() == 4); 2778 match(Set mem (StoreVector mem src)); 2779 ins_cost(145); 2780 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2781 ins_encode %{ 2782 __ movdl($mem$$Address, $src$$XMMRegister); 2783 %} 2784 ins_pipe( pipe_slow ); 2785 %} 2786 2787 instruct storeV8(memory mem, vecD src) %{ 2788 predicate(n->as_StoreVector()->memory_size() == 8); 2789 match(Set mem (StoreVector mem src)); 2790 ins_cost(145); 2791 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2792 ins_encode %{ 2793 __ movq($mem$$Address, $src$$XMMRegister); 2794 %} 2795 ins_pipe( pipe_slow ); 2796 %} 2797 2798 instruct storeV16(memory mem, vecX src) %{ 2799 predicate(n->as_StoreVector()->memory_size() == 16); 2800 match(Set mem (StoreVector mem src)); 2801 ins_cost(145); 2802 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2803 ins_encode %{ 2804 __ movdqu($mem$$Address, $src$$XMMRegister); 2805 %} 2806 ins_pipe( pipe_slow ); 2807 %} 2808 2809 instruct storeV32(memory mem, vecY src) %{ 2810 predicate(n->as_StoreVector()->memory_size() == 32); 2811 match(Set mem (StoreVector mem src)); 2812 ins_cost(145); 2813 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2814 ins_encode %{ 2815 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2816 %} 2817 ins_pipe( pipe_slow ); 2818 %} 2819 2820 instruct storeV64_dword(memory mem, vecZ src) %{ 2821 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 2822 match(Set mem (StoreVector mem src)); 2823 ins_cost(145); 2824 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 2825 ins_encode %{ 2826 int vector_len = 2; 2827 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 2828 %} 2829 ins_pipe( pipe_slow ); 2830 %} 2831 2832 instruct storeV64_qword(memory mem, vecZ src) %{ 2833 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 2834 match(Set mem (StoreVector mem src)); 2835 ins_cost(145); 2836 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 2837 ins_encode %{ 2838 int vector_len = 2; 2839 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 2840 %} 2841 ins_pipe( pipe_slow ); 2842 %} 2843 2844 // ====================LEGACY REPLICATE======================================= 2845 2846 instruct Repl4B_mem(vecS dst, memory mem) %{ 2847 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2848 match(Set dst (ReplicateB (LoadB mem))); 2849 format %{ "punpcklbw $dst,$mem\n\t" 2850 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2851 ins_encode %{ 2852 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2853 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2854 %} 2855 ins_pipe( pipe_slow ); 2856 %} 2857 2858 instruct Repl8B_mem(vecD dst, memory mem) %{ 2859 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2860 match(Set dst (ReplicateB (LoadB mem))); 2861 format %{ "punpcklbw $dst,$mem\n\t" 2862 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2863 ins_encode %{ 2864 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2865 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2866 %} 2867 ins_pipe( pipe_slow ); 2868 %} 2869 2870 instruct Repl16B(vecX dst, rRegI src) %{ 2871 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2872 match(Set dst (ReplicateB src)); 2873 format %{ "movd $dst,$src\n\t" 2874 "punpcklbw $dst,$dst\n\t" 2875 "pshuflw $dst,$dst,0x00\n\t" 2876 "punpcklqdq $dst,$dst\t! replicate16B" %} 2877 ins_encode %{ 2878 __ movdl($dst$$XMMRegister, $src$$Register); 2879 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2880 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2881 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2882 %} 2883 ins_pipe( pipe_slow ); 2884 %} 2885 2886 instruct Repl16B_mem(vecX dst, memory mem) %{ 2887 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2888 match(Set dst (ReplicateB (LoadB mem))); 2889 format %{ "punpcklbw $dst,$mem\n\t" 2890 "pshuflw $dst,$dst,0x00\n\t" 2891 "punpcklqdq $dst,$dst\t! replicate16B" %} 2892 ins_encode %{ 2893 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2894 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2895 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2896 %} 2897 ins_pipe( pipe_slow ); 2898 %} 2899 2900 instruct Repl32B(vecY dst, rRegI src) %{ 2901 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2902 match(Set dst (ReplicateB src)); 2903 format %{ "movd $dst,$src\n\t" 2904 "punpcklbw $dst,$dst\n\t" 2905 "pshuflw $dst,$dst,0x00\n\t" 2906 "punpcklqdq $dst,$dst\n\t" 2907 "vinserti128_high $dst,$dst\t! replicate32B" %} 2908 ins_encode %{ 2909 __ movdl($dst$$XMMRegister, $src$$Register); 2910 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2911 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2912 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2913 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2914 %} 2915 ins_pipe( pipe_slow ); 2916 %} 2917 2918 instruct Repl32B_mem(vecY dst, memory mem) %{ 2919 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2920 match(Set dst (ReplicateB (LoadB mem))); 2921 format %{ "punpcklbw $dst,$mem\n\t" 2922 "pshuflw $dst,$dst,0x00\n\t" 2923 "punpcklqdq $dst,$dst\n\t" 2924 "vinserti128_high $dst,$dst\t! replicate32B" %} 2925 ins_encode %{ 2926 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2927 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2928 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2929 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2930 %} 2931 ins_pipe( pipe_slow ); 2932 %} 2933 2934 instruct Repl16B_imm(vecX dst, immI con) %{ 2935 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2936 match(Set dst (ReplicateB con)); 2937 format %{ "movq $dst,[$constantaddress]\n\t" 2938 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2939 ins_encode %{ 2940 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2941 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2942 %} 2943 ins_pipe( pipe_slow ); 2944 %} 2945 2946 instruct Repl32B_imm(vecY dst, immI con) %{ 2947 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2948 match(Set dst (ReplicateB con)); 2949 format %{ "movq $dst,[$constantaddress]\n\t" 2950 "punpcklqdq $dst,$dst\n\t" 2951 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 2952 ins_encode %{ 2953 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2954 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2955 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2956 %} 2957 ins_pipe( pipe_slow ); 2958 %} 2959 2960 instruct Repl4S(vecD dst, rRegI src) %{ 2961 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 2962 match(Set dst (ReplicateS src)); 2963 format %{ "movd $dst,$src\n\t" 2964 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 2965 ins_encode %{ 2966 __ movdl($dst$$XMMRegister, $src$$Register); 2967 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2968 %} 2969 ins_pipe( pipe_slow ); 2970 %} 2971 2972 instruct Repl4S_mem(vecD dst, memory mem) %{ 2973 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2974 match(Set dst (ReplicateS (LoadS mem))); 2975 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 2976 ins_encode %{ 2977 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 2978 %} 2979 ins_pipe( pipe_slow ); 2980 %} 2981 2982 instruct Repl8S(vecX dst, rRegI src) %{ 2983 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 2984 match(Set dst (ReplicateS src)); 2985 format %{ "movd $dst,$src\n\t" 2986 "pshuflw $dst,$dst,0x00\n\t" 2987 "punpcklqdq $dst,$dst\t! replicate8S" %} 2988 ins_encode %{ 2989 __ movdl($dst$$XMMRegister, $src$$Register); 2990 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2991 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2992 %} 2993 ins_pipe( pipe_slow ); 2994 %} 2995 2996 instruct Repl8S_mem(vecX dst, memory mem) %{ 2997 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2998 match(Set dst (ReplicateS (LoadS mem))); 2999 format %{ "pshuflw $dst,$mem,0x00\n\t" 3000 "punpcklqdq $dst,$dst\t! replicate8S" %} 3001 ins_encode %{ 3002 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3003 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3004 %} 3005 ins_pipe( pipe_slow ); 3006 %} 3007 3008 instruct Repl8S_imm(vecX dst, immI con) %{ 3009 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3010 match(Set dst (ReplicateS con)); 3011 format %{ "movq $dst,[$constantaddress]\n\t" 3012 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3013 ins_encode %{ 3014 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3015 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3016 %} 3017 ins_pipe( pipe_slow ); 3018 %} 3019 3020 instruct Repl16S(vecY dst, rRegI src) %{ 3021 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3022 match(Set dst (ReplicateS src)); 3023 format %{ "movd $dst,$src\n\t" 3024 "pshuflw $dst,$dst,0x00\n\t" 3025 "punpcklqdq $dst,$dst\n\t" 3026 "vinserti128_high $dst,$dst\t! replicate16S" %} 3027 ins_encode %{ 3028 __ movdl($dst$$XMMRegister, $src$$Register); 3029 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3030 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3031 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3032 %} 3033 ins_pipe( pipe_slow ); 3034 %} 3035 3036 instruct Repl16S_mem(vecY dst, memory mem) %{ 3037 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3038 match(Set dst (ReplicateS (LoadS mem))); 3039 format %{ "pshuflw $dst,$mem,0x00\n\t" 3040 "punpcklqdq $dst,$dst\n\t" 3041 "vinserti128_high $dst,$dst\t! replicate16S" %} 3042 ins_encode %{ 3043 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3044 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3045 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3046 %} 3047 ins_pipe( pipe_slow ); 3048 %} 3049 3050 instruct Repl16S_imm(vecY dst, immI con) %{ 3051 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3052 match(Set dst (ReplicateS con)); 3053 format %{ "movq $dst,[$constantaddress]\n\t" 3054 "punpcklqdq $dst,$dst\n\t" 3055 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3056 ins_encode %{ 3057 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3058 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3059 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3060 %} 3061 ins_pipe( pipe_slow ); 3062 %} 3063 3064 instruct Repl4I(vecX dst, rRegI src) %{ 3065 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3066 match(Set dst (ReplicateI src)); 3067 format %{ "movd $dst,$src\n\t" 3068 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3069 ins_encode %{ 3070 __ movdl($dst$$XMMRegister, $src$$Register); 3071 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3072 %} 3073 ins_pipe( pipe_slow ); 3074 %} 3075 3076 instruct Repl4I_mem(vecX dst, memory mem) %{ 3077 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3078 match(Set dst (ReplicateI (LoadI mem))); 3079 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3080 ins_encode %{ 3081 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3082 %} 3083 ins_pipe( pipe_slow ); 3084 %} 3085 3086 instruct Repl8I(vecY dst, rRegI src) %{ 3087 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3088 match(Set dst (ReplicateI src)); 3089 format %{ "movd $dst,$src\n\t" 3090 "pshufd $dst,$dst,0x00\n\t" 3091 "vinserti128_high $dst,$dst\t! replicate8I" %} 3092 ins_encode %{ 3093 __ movdl($dst$$XMMRegister, $src$$Register); 3094 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3095 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3096 %} 3097 ins_pipe( pipe_slow ); 3098 %} 3099 3100 instruct Repl8I_mem(vecY dst, memory mem) %{ 3101 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3102 match(Set dst (ReplicateI (LoadI mem))); 3103 format %{ "pshufd $dst,$mem,0x00\n\t" 3104 "vinserti128_high $dst,$dst\t! replicate8I" %} 3105 ins_encode %{ 3106 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3107 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3108 %} 3109 ins_pipe( pipe_slow ); 3110 %} 3111 3112 instruct Repl4I_imm(vecX dst, immI con) %{ 3113 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3114 match(Set dst (ReplicateI con)); 3115 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3116 "punpcklqdq $dst,$dst" %} 3117 ins_encode %{ 3118 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3119 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3120 %} 3121 ins_pipe( pipe_slow ); 3122 %} 3123 3124 instruct Repl8I_imm(vecY dst, immI con) %{ 3125 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3126 match(Set dst (ReplicateI con)); 3127 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3128 "punpcklqdq $dst,$dst\n\t" 3129 "vinserti128_high $dst,$dst" %} 3130 ins_encode %{ 3131 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3132 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3133 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3134 %} 3135 ins_pipe( pipe_slow ); 3136 %} 3137 3138 // Long could be loaded into xmm register directly from memory. 3139 instruct Repl2L_mem(vecX dst, memory mem) %{ 3140 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3141 match(Set dst (ReplicateL (LoadL mem))); 3142 format %{ "movq $dst,$mem\n\t" 3143 "punpcklqdq $dst,$dst\t! replicate2L" %} 3144 ins_encode %{ 3145 __ movq($dst$$XMMRegister, $mem$$Address); 3146 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3147 %} 3148 ins_pipe( pipe_slow ); 3149 %} 3150 3151 // Replicate long (8 byte) scalar to be vector 3152 #ifdef _LP64 3153 instruct Repl4L(vecY dst, rRegL src) %{ 3154 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3155 match(Set dst (ReplicateL src)); 3156 format %{ "movdq $dst,$src\n\t" 3157 "punpcklqdq $dst,$dst\n\t" 3158 "vinserti128_high $dst,$dst\t! replicate4L" %} 3159 ins_encode %{ 3160 __ movdq($dst$$XMMRegister, $src$$Register); 3161 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3162 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3163 %} 3164 ins_pipe( pipe_slow ); 3165 %} 3166 #else // _LP64 3167 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3168 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3169 match(Set dst (ReplicateL src)); 3170 effect(TEMP dst, USE src, TEMP tmp); 3171 format %{ "movdl $dst,$src.lo\n\t" 3172 "movdl $tmp,$src.hi\n\t" 3173 "punpckldq $dst,$tmp\n\t" 3174 "punpcklqdq $dst,$dst\n\t" 3175 "vinserti128_high $dst,$dst\t! replicate4L" %} 3176 ins_encode %{ 3177 __ movdl($dst$$XMMRegister, $src$$Register); 3178 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3179 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3180 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3181 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3182 %} 3183 ins_pipe( pipe_slow ); 3184 %} 3185 #endif // _LP64 3186 3187 instruct Repl4L_imm(vecY dst, immL con) %{ 3188 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3189 match(Set dst (ReplicateL con)); 3190 format %{ "movq $dst,[$constantaddress]\n\t" 3191 "punpcklqdq $dst,$dst\n\t" 3192 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3193 ins_encode %{ 3194 __ movq($dst$$XMMRegister, $constantaddress($con)); 3195 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3196 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3197 %} 3198 ins_pipe( pipe_slow ); 3199 %} 3200 3201 instruct Repl4L_mem(vecY dst, memory mem) %{ 3202 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3203 match(Set dst (ReplicateL (LoadL mem))); 3204 format %{ "movq $dst,$mem\n\t" 3205 "punpcklqdq $dst,$dst\n\t" 3206 "vinserti128_high $dst,$dst\t! replicate4L" %} 3207 ins_encode %{ 3208 __ movq($dst$$XMMRegister, $mem$$Address); 3209 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3210 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3211 %} 3212 ins_pipe( pipe_slow ); 3213 %} 3214 3215 instruct Repl2F_mem(vecD dst, memory mem) %{ 3216 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3217 match(Set dst (ReplicateF (LoadF mem))); 3218 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3219 ins_encode %{ 3220 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3221 %} 3222 ins_pipe( pipe_slow ); 3223 %} 3224 3225 instruct Repl4F_mem(vecX dst, memory mem) %{ 3226 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3227 match(Set dst (ReplicateF (LoadF mem))); 3228 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3229 ins_encode %{ 3230 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3231 %} 3232 ins_pipe( pipe_slow ); 3233 %} 3234 3235 instruct Repl8F(vecY dst, regF src) %{ 3236 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3237 match(Set dst (ReplicateF src)); 3238 format %{ "pshufd $dst,$src,0x00\n\t" 3239 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3240 ins_encode %{ 3241 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3242 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3243 %} 3244 ins_pipe( pipe_slow ); 3245 %} 3246 3247 instruct Repl8F_mem(vecY dst, memory mem) %{ 3248 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3249 match(Set dst (ReplicateF (LoadF mem))); 3250 format %{ "pshufd $dst,$mem,0x00\n\t" 3251 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3252 ins_encode %{ 3253 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3254 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3255 %} 3256 ins_pipe( pipe_slow ); 3257 %} 3258 3259 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3260 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3261 match(Set dst (ReplicateF zero)); 3262 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3263 ins_encode %{ 3264 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3265 %} 3266 ins_pipe( fpu_reg_reg ); 3267 %} 3268 3269 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3270 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3271 match(Set dst (ReplicateF zero)); 3272 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3273 ins_encode %{ 3274 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3275 %} 3276 ins_pipe( fpu_reg_reg ); 3277 %} 3278 3279 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3280 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3281 match(Set dst (ReplicateF zero)); 3282 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3283 ins_encode %{ 3284 int vector_len = 1; 3285 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3286 %} 3287 ins_pipe( fpu_reg_reg ); 3288 %} 3289 3290 instruct Repl2D_mem(vecX dst, memory mem) %{ 3291 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3292 match(Set dst (ReplicateD (LoadD mem))); 3293 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3294 ins_encode %{ 3295 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3296 %} 3297 ins_pipe( pipe_slow ); 3298 %} 3299 3300 instruct Repl4D(vecY dst, regD src) %{ 3301 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3302 match(Set dst (ReplicateD src)); 3303 format %{ "pshufd $dst,$src,0x44\n\t" 3304 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3305 ins_encode %{ 3306 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3307 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3308 %} 3309 ins_pipe( pipe_slow ); 3310 %} 3311 3312 instruct Repl4D_mem(vecY dst, memory mem) %{ 3313 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3314 match(Set dst (ReplicateD (LoadD mem))); 3315 format %{ "pshufd $dst,$mem,0x44\n\t" 3316 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3317 ins_encode %{ 3318 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3319 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3320 %} 3321 ins_pipe( pipe_slow ); 3322 %} 3323 3324 // Replicate double (8 byte) scalar zero to be vector 3325 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3326 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3327 match(Set dst (ReplicateD zero)); 3328 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3329 ins_encode %{ 3330 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3331 %} 3332 ins_pipe( fpu_reg_reg ); 3333 %} 3334 3335 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3336 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3337 match(Set dst (ReplicateD zero)); 3338 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3339 ins_encode %{ 3340 int vector_len = 1; 3341 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3342 %} 3343 ins_pipe( fpu_reg_reg ); 3344 %} 3345 3346 // ====================GENERIC REPLICATE========================================== 3347 3348 // Replicate byte scalar to be vector 3349 instruct Repl4B(vecS dst, rRegI src) %{ 3350 predicate(n->as_Vector()->length() == 4); 3351 match(Set dst (ReplicateB src)); 3352 format %{ "movd $dst,$src\n\t" 3353 "punpcklbw $dst,$dst\n\t" 3354 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3355 ins_encode %{ 3356 __ movdl($dst$$XMMRegister, $src$$Register); 3357 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3358 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3359 %} 3360 ins_pipe( pipe_slow ); 3361 %} 3362 3363 instruct Repl8B(vecD dst, rRegI src) %{ 3364 predicate(n->as_Vector()->length() == 8); 3365 match(Set dst (ReplicateB src)); 3366 format %{ "movd $dst,$src\n\t" 3367 "punpcklbw $dst,$dst\n\t" 3368 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3369 ins_encode %{ 3370 __ movdl($dst$$XMMRegister, $src$$Register); 3371 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3372 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3373 %} 3374 ins_pipe( pipe_slow ); 3375 %} 3376 3377 // Replicate byte scalar immediate to be vector by loading from const table. 3378 instruct Repl4B_imm(vecS dst, immI con) %{ 3379 predicate(n->as_Vector()->length() == 4); 3380 match(Set dst (ReplicateB con)); 3381 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3382 ins_encode %{ 3383 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3384 %} 3385 ins_pipe( pipe_slow ); 3386 %} 3387 3388 instruct Repl8B_imm(vecD dst, immI con) %{ 3389 predicate(n->as_Vector()->length() == 8); 3390 match(Set dst (ReplicateB con)); 3391 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3392 ins_encode %{ 3393 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3394 %} 3395 ins_pipe( pipe_slow ); 3396 %} 3397 3398 // Replicate byte scalar zero to be vector 3399 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3400 predicate(n->as_Vector()->length() == 4); 3401 match(Set dst (ReplicateB zero)); 3402 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3403 ins_encode %{ 3404 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3405 %} 3406 ins_pipe( fpu_reg_reg ); 3407 %} 3408 3409 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3410 predicate(n->as_Vector()->length() == 8); 3411 match(Set dst (ReplicateB zero)); 3412 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3413 ins_encode %{ 3414 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3415 %} 3416 ins_pipe( fpu_reg_reg ); 3417 %} 3418 3419 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3420 predicate(n->as_Vector()->length() == 16); 3421 match(Set dst (ReplicateB zero)); 3422 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3423 ins_encode %{ 3424 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3425 %} 3426 ins_pipe( fpu_reg_reg ); 3427 %} 3428 3429 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3430 predicate(n->as_Vector()->length() == 32); 3431 match(Set dst (ReplicateB zero)); 3432 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3433 ins_encode %{ 3434 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3435 int vector_len = 1; 3436 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3437 %} 3438 ins_pipe( fpu_reg_reg ); 3439 %} 3440 3441 // Replicate char/short (2 byte) scalar to be vector 3442 instruct Repl2S(vecS dst, rRegI src) %{ 3443 predicate(n->as_Vector()->length() == 2); 3444 match(Set dst (ReplicateS src)); 3445 format %{ "movd $dst,$src\n\t" 3446 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3447 ins_encode %{ 3448 __ movdl($dst$$XMMRegister, $src$$Register); 3449 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3450 %} 3451 ins_pipe( fpu_reg_reg ); 3452 %} 3453 3454 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3455 instruct Repl2S_imm(vecS dst, immI con) %{ 3456 predicate(n->as_Vector()->length() == 2); 3457 match(Set dst (ReplicateS con)); 3458 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3459 ins_encode %{ 3460 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3461 %} 3462 ins_pipe( fpu_reg_reg ); 3463 %} 3464 3465 instruct Repl4S_imm(vecD dst, immI con) %{ 3466 predicate(n->as_Vector()->length() == 4); 3467 match(Set dst (ReplicateS con)); 3468 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3469 ins_encode %{ 3470 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3471 %} 3472 ins_pipe( fpu_reg_reg ); 3473 %} 3474 3475 // Replicate char/short (2 byte) scalar zero to be vector 3476 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3477 predicate(n->as_Vector()->length() == 2); 3478 match(Set dst (ReplicateS zero)); 3479 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3480 ins_encode %{ 3481 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3482 %} 3483 ins_pipe( fpu_reg_reg ); 3484 %} 3485 3486 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3487 predicate(n->as_Vector()->length() == 4); 3488 match(Set dst (ReplicateS zero)); 3489 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3490 ins_encode %{ 3491 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3492 %} 3493 ins_pipe( fpu_reg_reg ); 3494 %} 3495 3496 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3497 predicate(n->as_Vector()->length() == 8); 3498 match(Set dst (ReplicateS zero)); 3499 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3500 ins_encode %{ 3501 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3502 %} 3503 ins_pipe( fpu_reg_reg ); 3504 %} 3505 3506 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3507 predicate(n->as_Vector()->length() == 16); 3508 match(Set dst (ReplicateS zero)); 3509 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3510 ins_encode %{ 3511 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3512 int vector_len = 1; 3513 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3514 %} 3515 ins_pipe( fpu_reg_reg ); 3516 %} 3517 3518 // Replicate integer (4 byte) scalar to be vector 3519 instruct Repl2I(vecD dst, rRegI src) %{ 3520 predicate(n->as_Vector()->length() == 2); 3521 match(Set dst (ReplicateI src)); 3522 format %{ "movd $dst,$src\n\t" 3523 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3524 ins_encode %{ 3525 __ movdl($dst$$XMMRegister, $src$$Register); 3526 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3527 %} 3528 ins_pipe( fpu_reg_reg ); 3529 %} 3530 3531 // Integer could be loaded into xmm register directly from memory. 3532 instruct Repl2I_mem(vecD dst, memory mem) %{ 3533 predicate(n->as_Vector()->length() == 2); 3534 match(Set dst (ReplicateI (LoadI mem))); 3535 format %{ "movd $dst,$mem\n\t" 3536 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3537 ins_encode %{ 3538 __ movdl($dst$$XMMRegister, $mem$$Address); 3539 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3540 %} 3541 ins_pipe( fpu_reg_reg ); 3542 %} 3543 3544 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3545 instruct Repl2I_imm(vecD dst, immI con) %{ 3546 predicate(n->as_Vector()->length() == 2); 3547 match(Set dst (ReplicateI con)); 3548 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3549 ins_encode %{ 3550 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3551 %} 3552 ins_pipe( fpu_reg_reg ); 3553 %} 3554 3555 // Replicate integer (4 byte) scalar zero to be vector 3556 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3557 predicate(n->as_Vector()->length() == 2); 3558 match(Set dst (ReplicateI zero)); 3559 format %{ "pxor $dst,$dst\t! replicate2I" %} 3560 ins_encode %{ 3561 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe( fpu_reg_reg ); 3564 %} 3565 3566 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3567 predicate(n->as_Vector()->length() == 4); 3568 match(Set dst (ReplicateI zero)); 3569 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3570 ins_encode %{ 3571 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3572 %} 3573 ins_pipe( fpu_reg_reg ); 3574 %} 3575 3576 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3577 predicate(n->as_Vector()->length() == 8); 3578 match(Set dst (ReplicateI zero)); 3579 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3580 ins_encode %{ 3581 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3582 int vector_len = 1; 3583 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3584 %} 3585 ins_pipe( fpu_reg_reg ); 3586 %} 3587 3588 // Replicate long (8 byte) scalar to be vector 3589 #ifdef _LP64 3590 instruct Repl2L(vecX dst, rRegL src) %{ 3591 predicate(n->as_Vector()->length() == 2); 3592 match(Set dst (ReplicateL src)); 3593 format %{ "movdq $dst,$src\n\t" 3594 "punpcklqdq $dst,$dst\t! replicate2L" %} 3595 ins_encode %{ 3596 __ movdq($dst$$XMMRegister, $src$$Register); 3597 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3598 %} 3599 ins_pipe( pipe_slow ); 3600 %} 3601 #else // _LP64 3602 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3603 predicate(n->as_Vector()->length() == 2); 3604 match(Set dst (ReplicateL src)); 3605 effect(TEMP dst, USE src, TEMP tmp); 3606 format %{ "movdl $dst,$src.lo\n\t" 3607 "movdl $tmp,$src.hi\n\t" 3608 "punpckldq $dst,$tmp\n\t" 3609 "punpcklqdq $dst,$dst\t! replicate2L"%} 3610 ins_encode %{ 3611 __ movdl($dst$$XMMRegister, $src$$Register); 3612 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3613 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3614 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3615 %} 3616 ins_pipe( pipe_slow ); 3617 %} 3618 #endif // _LP64 3619 3620 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3621 instruct Repl2L_imm(vecX dst, immL con) %{ 3622 predicate(n->as_Vector()->length() == 2); 3623 match(Set dst (ReplicateL con)); 3624 format %{ "movq $dst,[$constantaddress]\n\t" 3625 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3626 ins_encode %{ 3627 __ movq($dst$$XMMRegister, $constantaddress($con)); 3628 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 // Replicate long (8 byte) scalar zero to be vector 3634 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3635 predicate(n->as_Vector()->length() == 2); 3636 match(Set dst (ReplicateL zero)); 3637 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3638 ins_encode %{ 3639 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3640 %} 3641 ins_pipe( fpu_reg_reg ); 3642 %} 3643 3644 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3645 predicate(n->as_Vector()->length() == 4); 3646 match(Set dst (ReplicateL zero)); 3647 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3648 ins_encode %{ 3649 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3650 int vector_len = 1; 3651 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3652 %} 3653 ins_pipe( fpu_reg_reg ); 3654 %} 3655 3656 // Replicate float (4 byte) scalar to be vector 3657 instruct Repl2F(vecD dst, regF src) %{ 3658 predicate(n->as_Vector()->length() == 2); 3659 match(Set dst (ReplicateF src)); 3660 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3661 ins_encode %{ 3662 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3663 %} 3664 ins_pipe( fpu_reg_reg ); 3665 %} 3666 3667 instruct Repl4F(vecX dst, regF src) %{ 3668 predicate(n->as_Vector()->length() == 4); 3669 match(Set dst (ReplicateF src)); 3670 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3671 ins_encode %{ 3672 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3673 %} 3674 ins_pipe( pipe_slow ); 3675 %} 3676 3677 // Replicate double (8 bytes) scalar to be vector 3678 instruct Repl2D(vecX dst, regD src) %{ 3679 predicate(n->as_Vector()->length() == 2); 3680 match(Set dst (ReplicateD src)); 3681 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3682 ins_encode %{ 3683 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3684 %} 3685 ins_pipe( pipe_slow ); 3686 %} 3687 3688 // ====================EVEX REPLICATE============================================= 3689 3690 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3691 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3692 match(Set dst (ReplicateB (LoadB mem))); 3693 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3694 ins_encode %{ 3695 int vector_len = 0; 3696 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3697 %} 3698 ins_pipe( pipe_slow ); 3699 %} 3700 3701 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3702 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3703 match(Set dst (ReplicateB (LoadB mem))); 3704 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3705 ins_encode %{ 3706 int vector_len = 0; 3707 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3708 %} 3709 ins_pipe( pipe_slow ); 3710 %} 3711 3712 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3713 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3714 match(Set dst (ReplicateB src)); 3715 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3716 ins_encode %{ 3717 int vector_len = 0; 3718 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3719 %} 3720 ins_pipe( pipe_slow ); 3721 %} 3722 3723 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3724 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3725 match(Set dst (ReplicateB (LoadB mem))); 3726 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3727 ins_encode %{ 3728 int vector_len = 0; 3729 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3730 %} 3731 ins_pipe( pipe_slow ); 3732 %} 3733 3734 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3735 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3736 match(Set dst (ReplicateB src)); 3737 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3738 ins_encode %{ 3739 int vector_len = 1; 3740 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3741 %} 3742 ins_pipe( pipe_slow ); 3743 %} 3744 3745 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3746 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3747 match(Set dst (ReplicateB (LoadB mem))); 3748 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3749 ins_encode %{ 3750 int vector_len = 1; 3751 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3752 %} 3753 ins_pipe( pipe_slow ); 3754 %} 3755 3756 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3757 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3758 match(Set dst (ReplicateB src)); 3759 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3760 ins_encode %{ 3761 int vector_len = 2; 3762 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3763 %} 3764 ins_pipe( pipe_slow ); 3765 %} 3766 3767 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3768 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3769 match(Set dst (ReplicateB (LoadB mem))); 3770 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3771 ins_encode %{ 3772 int vector_len = 2; 3773 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3774 %} 3775 ins_pipe( pipe_slow ); 3776 %} 3777 3778 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3779 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3780 match(Set dst (ReplicateB con)); 3781 format %{ "movq $dst,[$constantaddress]\n\t" 3782 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3783 ins_encode %{ 3784 int vector_len = 0; 3785 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3786 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3787 %} 3788 ins_pipe( pipe_slow ); 3789 %} 3790 3791 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3792 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3793 match(Set dst (ReplicateB con)); 3794 format %{ "movq $dst,[$constantaddress]\n\t" 3795 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3796 ins_encode %{ 3797 int vector_len = 1; 3798 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3799 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3800 %} 3801 ins_pipe( pipe_slow ); 3802 %} 3803 3804 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3805 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3806 match(Set dst (ReplicateB con)); 3807 format %{ "movq $dst,[$constantaddress]\n\t" 3808 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3809 ins_encode %{ 3810 int vector_len = 2; 3811 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3812 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3813 %} 3814 ins_pipe( pipe_slow ); 3815 %} 3816 3817 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3818 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3819 match(Set dst (ReplicateB zero)); 3820 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3821 ins_encode %{ 3822 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3823 int vector_len = 2; 3824 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3825 %} 3826 ins_pipe( fpu_reg_reg ); 3827 %} 3828 3829 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3830 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3831 match(Set dst (ReplicateS src)); 3832 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3833 ins_encode %{ 3834 int vector_len = 0; 3835 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3836 %} 3837 ins_pipe( pipe_slow ); 3838 %} 3839 3840 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3841 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3842 match(Set dst (ReplicateS (LoadS mem))); 3843 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3844 ins_encode %{ 3845 int vector_len = 0; 3846 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3847 %} 3848 ins_pipe( pipe_slow ); 3849 %} 3850 3851 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3852 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3853 match(Set dst (ReplicateS src)); 3854 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3855 ins_encode %{ 3856 int vector_len = 0; 3857 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3858 %} 3859 ins_pipe( pipe_slow ); 3860 %} 3861 3862 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3863 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3864 match(Set dst (ReplicateS (LoadS mem))); 3865 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3866 ins_encode %{ 3867 int vector_len = 0; 3868 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3869 %} 3870 ins_pipe( pipe_slow ); 3871 %} 3872 3873 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3874 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3875 match(Set dst (ReplicateS src)); 3876 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3877 ins_encode %{ 3878 int vector_len = 1; 3879 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3880 %} 3881 ins_pipe( pipe_slow ); 3882 %} 3883 3884 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3885 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3886 match(Set dst (ReplicateS (LoadS mem))); 3887 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3888 ins_encode %{ 3889 int vector_len = 1; 3890 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3891 %} 3892 ins_pipe( pipe_slow ); 3893 %} 3894 3895 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3896 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3897 match(Set dst (ReplicateS src)); 3898 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3899 ins_encode %{ 3900 int vector_len = 2; 3901 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3902 %} 3903 ins_pipe( pipe_slow ); 3904 %} 3905 3906 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3907 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3908 match(Set dst (ReplicateS (LoadS mem))); 3909 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3910 ins_encode %{ 3911 int vector_len = 2; 3912 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3913 %} 3914 ins_pipe( pipe_slow ); 3915 %} 3916 3917 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3918 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3919 match(Set dst (ReplicateS con)); 3920 format %{ "movq $dst,[$constantaddress]\n\t" 3921 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3922 ins_encode %{ 3923 int vector_len = 0; 3924 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3925 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3926 %} 3927 ins_pipe( pipe_slow ); 3928 %} 3929 3930 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3931 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3932 match(Set dst (ReplicateS con)); 3933 format %{ "movq $dst,[$constantaddress]\n\t" 3934 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3935 ins_encode %{ 3936 int vector_len = 1; 3937 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3938 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3939 %} 3940 ins_pipe( pipe_slow ); 3941 %} 3942 3943 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3944 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3945 match(Set dst (ReplicateS con)); 3946 format %{ "movq $dst,[$constantaddress]\n\t" 3947 "vpbroadcastw $dst,$dst\t! replicate32S" %} 3948 ins_encode %{ 3949 int vector_len = 2; 3950 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3951 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3952 %} 3953 ins_pipe( pipe_slow ); 3954 %} 3955 3956 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 3957 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3958 match(Set dst (ReplicateS zero)); 3959 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3960 ins_encode %{ 3961 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3962 int vector_len = 2; 3963 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3964 %} 3965 ins_pipe( fpu_reg_reg ); 3966 %} 3967 3968 instruct Repl4I_evex(vecX dst, rRegI src) %{ 3969 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3970 match(Set dst (ReplicateI src)); 3971 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 3972 ins_encode %{ 3973 int vector_len = 0; 3974 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3975 %} 3976 ins_pipe( pipe_slow ); 3977 %} 3978 3979 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 3980 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3981 match(Set dst (ReplicateI (LoadI mem))); 3982 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 3983 ins_encode %{ 3984 int vector_len = 0; 3985 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3986 %} 3987 ins_pipe( pipe_slow ); 3988 %} 3989 3990 instruct Repl8I_evex(vecY dst, rRegI src) %{ 3991 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3992 match(Set dst (ReplicateI src)); 3993 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 3994 ins_encode %{ 3995 int vector_len = 1; 3996 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3997 %} 3998 ins_pipe( pipe_slow ); 3999 %} 4000 4001 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4002 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4003 match(Set dst (ReplicateI (LoadI mem))); 4004 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4005 ins_encode %{ 4006 int vector_len = 1; 4007 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4008 %} 4009 ins_pipe( pipe_slow ); 4010 %} 4011 4012 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4013 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4014 match(Set dst (ReplicateI src)); 4015 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4016 ins_encode %{ 4017 int vector_len = 2; 4018 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4019 %} 4020 ins_pipe( pipe_slow ); 4021 %} 4022 4023 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4024 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4025 match(Set dst (ReplicateI (LoadI mem))); 4026 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4027 ins_encode %{ 4028 int vector_len = 2; 4029 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4030 %} 4031 ins_pipe( pipe_slow ); 4032 %} 4033 4034 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4035 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4036 match(Set dst (ReplicateI con)); 4037 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4038 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4039 ins_encode %{ 4040 int vector_len = 0; 4041 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4042 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4043 %} 4044 ins_pipe( pipe_slow ); 4045 %} 4046 4047 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4048 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4049 match(Set dst (ReplicateI con)); 4050 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4051 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4052 ins_encode %{ 4053 int vector_len = 1; 4054 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4055 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4056 %} 4057 ins_pipe( pipe_slow ); 4058 %} 4059 4060 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4061 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4062 match(Set dst (ReplicateI con)); 4063 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4064 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4065 ins_encode %{ 4066 int vector_len = 2; 4067 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4068 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4069 %} 4070 ins_pipe( pipe_slow ); 4071 %} 4072 4073 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4074 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4075 match(Set dst (ReplicateI zero)); 4076 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4077 ins_encode %{ 4078 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4079 int vector_len = 2; 4080 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4081 %} 4082 ins_pipe( fpu_reg_reg ); 4083 %} 4084 4085 // Replicate long (8 byte) scalar to be vector 4086 #ifdef _LP64 4087 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4088 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4089 match(Set dst (ReplicateL src)); 4090 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4091 ins_encode %{ 4092 int vector_len = 1; 4093 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4094 %} 4095 ins_pipe( pipe_slow ); 4096 %} 4097 4098 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4099 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4100 match(Set dst (ReplicateL src)); 4101 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4102 ins_encode %{ 4103 int vector_len = 2; 4104 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4105 %} 4106 ins_pipe( pipe_slow ); 4107 %} 4108 #else // _LP64 4109 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4110 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4111 match(Set dst (ReplicateL src)); 4112 effect(TEMP dst, USE src, TEMP tmp); 4113 format %{ "movdl $dst,$src.lo\n\t" 4114 "movdl $tmp,$src.hi\n\t" 4115 "punpckldq $dst,$tmp\n\t" 4116 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4117 ins_encode %{ 4118 int vector_len = 1; 4119 __ movdl($dst$$XMMRegister, $src$$Register); 4120 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4121 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4122 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4123 %} 4124 ins_pipe( pipe_slow ); 4125 %} 4126 4127 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4128 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4129 match(Set dst (ReplicateL src)); 4130 effect(TEMP dst, USE src, TEMP tmp); 4131 format %{ "movdl $dst,$src.lo\n\t" 4132 "movdl $tmp,$src.hi\n\t" 4133 "punpckldq $dst,$tmp\n\t" 4134 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4135 ins_encode %{ 4136 int vector_len = 2; 4137 __ movdl($dst$$XMMRegister, $src$$Register); 4138 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4139 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4140 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4141 %} 4142 ins_pipe( pipe_slow ); 4143 %} 4144 #endif // _LP64 4145 4146 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4147 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4148 match(Set dst (ReplicateL con)); 4149 format %{ "movq $dst,[$constantaddress]\n\t" 4150 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4151 ins_encode %{ 4152 int vector_len = 1; 4153 __ movq($dst$$XMMRegister, $constantaddress($con)); 4154 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4155 %} 4156 ins_pipe( pipe_slow ); 4157 %} 4158 4159 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4160 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4161 match(Set dst (ReplicateL con)); 4162 format %{ "movq $dst,[$constantaddress]\n\t" 4163 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4164 ins_encode %{ 4165 int vector_len = 2; 4166 __ movq($dst$$XMMRegister, $constantaddress($con)); 4167 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4168 %} 4169 ins_pipe( pipe_slow ); 4170 %} 4171 4172 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4173 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4174 match(Set dst (ReplicateL (LoadL mem))); 4175 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4176 ins_encode %{ 4177 int vector_len = 0; 4178 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4179 %} 4180 ins_pipe( pipe_slow ); 4181 %} 4182 4183 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4184 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4185 match(Set dst (ReplicateL (LoadL mem))); 4186 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4187 ins_encode %{ 4188 int vector_len = 1; 4189 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4190 %} 4191 ins_pipe( pipe_slow ); 4192 %} 4193 4194 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4195 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4196 match(Set dst (ReplicateL (LoadL mem))); 4197 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4198 ins_encode %{ 4199 int vector_len = 2; 4200 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4201 %} 4202 ins_pipe( pipe_slow ); 4203 %} 4204 4205 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4206 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4207 match(Set dst (ReplicateL zero)); 4208 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4209 ins_encode %{ 4210 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4211 int vector_len = 2; 4212 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4213 %} 4214 ins_pipe( fpu_reg_reg ); 4215 %} 4216 4217 instruct Repl8F_evex(vecY dst, regF src) %{ 4218 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4219 match(Set dst (ReplicateF src)); 4220 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4221 ins_encode %{ 4222 int vector_len = 1; 4223 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4224 %} 4225 ins_pipe( pipe_slow ); 4226 %} 4227 4228 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4229 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4230 match(Set dst (ReplicateF (LoadF mem))); 4231 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4232 ins_encode %{ 4233 int vector_len = 1; 4234 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4235 %} 4236 ins_pipe( pipe_slow ); 4237 %} 4238 4239 instruct Repl16F_evex(vecZ dst, regF src) %{ 4240 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4241 match(Set dst (ReplicateF src)); 4242 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4243 ins_encode %{ 4244 int vector_len = 2; 4245 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4246 %} 4247 ins_pipe( pipe_slow ); 4248 %} 4249 4250 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4251 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4252 match(Set dst (ReplicateF (LoadF mem))); 4253 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4254 ins_encode %{ 4255 int vector_len = 2; 4256 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4257 %} 4258 ins_pipe( pipe_slow ); 4259 %} 4260 4261 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4262 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4263 match(Set dst (ReplicateF zero)); 4264 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4265 ins_encode %{ 4266 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4267 int vector_len = 2; 4268 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4269 %} 4270 ins_pipe( fpu_reg_reg ); 4271 %} 4272 4273 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4274 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4275 match(Set dst (ReplicateF zero)); 4276 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4277 ins_encode %{ 4278 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4279 int vector_len = 2; 4280 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4281 %} 4282 ins_pipe( fpu_reg_reg ); 4283 %} 4284 4285 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4286 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4287 match(Set dst (ReplicateF zero)); 4288 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4289 ins_encode %{ 4290 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4291 int vector_len = 2; 4292 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4293 %} 4294 ins_pipe( fpu_reg_reg ); 4295 %} 4296 4297 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4298 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4299 match(Set dst (ReplicateF zero)); 4300 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4301 ins_encode %{ 4302 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4303 int vector_len = 2; 4304 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4305 %} 4306 ins_pipe( fpu_reg_reg ); 4307 %} 4308 4309 instruct Repl4D_evex(vecY dst, regD src) %{ 4310 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4311 match(Set dst (ReplicateD src)); 4312 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4313 ins_encode %{ 4314 int vector_len = 1; 4315 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4316 %} 4317 ins_pipe( pipe_slow ); 4318 %} 4319 4320 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4321 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4322 match(Set dst (ReplicateD (LoadD mem))); 4323 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4324 ins_encode %{ 4325 int vector_len = 1; 4326 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4327 %} 4328 ins_pipe( pipe_slow ); 4329 %} 4330 4331 instruct Repl8D_evex(vecZ dst, regD src) %{ 4332 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4333 match(Set dst (ReplicateD src)); 4334 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4335 ins_encode %{ 4336 int vector_len = 2; 4337 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4338 %} 4339 ins_pipe( pipe_slow ); 4340 %} 4341 4342 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4343 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4344 match(Set dst (ReplicateD (LoadD mem))); 4345 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4346 ins_encode %{ 4347 int vector_len = 2; 4348 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4349 %} 4350 ins_pipe( pipe_slow ); 4351 %} 4352 4353 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4354 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4355 match(Set dst (ReplicateD zero)); 4356 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4357 ins_encode %{ 4358 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4359 int vector_len = 2; 4360 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4361 %} 4362 ins_pipe( fpu_reg_reg ); 4363 %} 4364 4365 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4366 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4367 match(Set dst (ReplicateD zero)); 4368 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4369 ins_encode %{ 4370 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4371 int vector_len = 2; 4372 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4373 %} 4374 ins_pipe( fpu_reg_reg ); 4375 %} 4376 4377 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4378 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4379 match(Set dst (ReplicateD zero)); 4380 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4381 ins_encode %{ 4382 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4383 int vector_len = 2; 4384 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4385 %} 4386 ins_pipe( fpu_reg_reg ); 4387 %} 4388 4389 // ====================REDUCTION ARITHMETIC======================================= 4390 4391 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4392 predicate(UseSSE > 2 && UseAVX == 0); 4393 match(Set dst (AddReductionVI src1 src2)); 4394 effect(TEMP tmp2, TEMP tmp); 4395 format %{ "movdqu $tmp2,$src2\n\t" 4396 "phaddd $tmp2,$tmp2\n\t" 4397 "movd $tmp,$src1\n\t" 4398 "paddd $tmp,$tmp2\n\t" 4399 "movd $dst,$tmp\t! add reduction2I" %} 4400 ins_encode %{ 4401 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4402 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4403 __ movdl($tmp$$XMMRegister, $src1$$Register); 4404 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4405 __ movdl($dst$$Register, $tmp$$XMMRegister); 4406 %} 4407 ins_pipe( pipe_slow ); 4408 %} 4409 4410 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4411 predicate(VM_Version::supports_avxonly()); 4412 match(Set dst (AddReductionVI src1 src2)); 4413 effect(TEMP tmp, TEMP tmp2); 4414 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4415 "movd $tmp2,$src1\n\t" 4416 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4417 "movd $dst,$tmp2\t! add reduction2I" %} 4418 ins_encode %{ 4419 int vector_len = 0; 4420 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4421 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4422 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4423 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4424 %} 4425 ins_pipe( pipe_slow ); 4426 %} 4427 4428 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4429 predicate(UseAVX > 2); 4430 match(Set dst (AddReductionVI src1 src2)); 4431 effect(TEMP tmp, TEMP tmp2); 4432 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4433 "vpaddd $tmp,$src2,$tmp2\n\t" 4434 "movd $tmp2,$src1\n\t" 4435 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4436 "movd $dst,$tmp2\t! add reduction2I" %} 4437 ins_encode %{ 4438 int vector_len = 0; 4439 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4440 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4441 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4442 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4443 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4444 %} 4445 ins_pipe( pipe_slow ); 4446 %} 4447 4448 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4449 predicate(UseSSE > 2 && UseAVX == 0); 4450 match(Set dst (AddReductionVI src1 src2)); 4451 effect(TEMP tmp, TEMP tmp2); 4452 format %{ "movdqu $tmp,$src2\n\t" 4453 "phaddd $tmp,$tmp\n\t" 4454 "phaddd $tmp,$tmp\n\t" 4455 "movd $tmp2,$src1\n\t" 4456 "paddd $tmp2,$tmp\n\t" 4457 "movd $dst,$tmp2\t! add reduction4I" %} 4458 ins_encode %{ 4459 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4460 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4461 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4462 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4463 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4464 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4470 predicate(VM_Version::supports_avxonly()); 4471 match(Set dst (AddReductionVI src1 src2)); 4472 effect(TEMP tmp, TEMP tmp2); 4473 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4474 "vphaddd $tmp,$tmp,$tmp\n\t" 4475 "movd $tmp2,$src1\n\t" 4476 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4477 "movd $dst,$tmp2\t! add reduction4I" %} 4478 ins_encode %{ 4479 int vector_len = 0; 4480 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4481 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4482 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4483 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4484 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4485 %} 4486 ins_pipe( pipe_slow ); 4487 %} 4488 4489 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4490 predicate(UseAVX > 2); 4491 match(Set dst (AddReductionVI src1 src2)); 4492 effect(TEMP tmp, TEMP tmp2); 4493 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4494 "vpaddd $tmp,$src2,$tmp2\n\t" 4495 "pshufd $tmp2,$tmp,0x1\n\t" 4496 "vpaddd $tmp,$tmp,$tmp2\n\t" 4497 "movd $tmp2,$src1\n\t" 4498 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4499 "movd $dst,$tmp2\t! add reduction4I" %} 4500 ins_encode %{ 4501 int vector_len = 0; 4502 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4503 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4504 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4505 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4506 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4507 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4508 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4509 %} 4510 ins_pipe( pipe_slow ); 4511 %} 4512 4513 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4514 predicate(VM_Version::supports_avxonly()); 4515 match(Set dst (AddReductionVI src1 src2)); 4516 effect(TEMP tmp, TEMP tmp2); 4517 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4518 "vphaddd $tmp,$tmp,$tmp2\n\t" 4519 "vextracti128_high $tmp2,$tmp\n\t" 4520 "vpaddd $tmp,$tmp,$tmp2\n\t" 4521 "movd $tmp2,$src1\n\t" 4522 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4523 "movd $dst,$tmp2\t! add reduction8I" %} 4524 ins_encode %{ 4525 int vector_len = 1; 4526 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4527 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4528 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4529 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4530 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4531 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4532 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4533 %} 4534 ins_pipe( pipe_slow ); 4535 %} 4536 4537 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4538 predicate(UseAVX > 2); 4539 match(Set dst (AddReductionVI src1 src2)); 4540 effect(TEMP tmp, TEMP tmp2); 4541 format %{ "vextracti128_high $tmp,$src2\n\t" 4542 "vpaddd $tmp,$tmp,$src2\n\t" 4543 "pshufd $tmp2,$tmp,0xE\n\t" 4544 "vpaddd $tmp,$tmp,$tmp2\n\t" 4545 "pshufd $tmp2,$tmp,0x1\n\t" 4546 "vpaddd $tmp,$tmp,$tmp2\n\t" 4547 "movd $tmp2,$src1\n\t" 4548 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4549 "movd $dst,$tmp2\t! add reduction8I" %} 4550 ins_encode %{ 4551 int vector_len = 0; 4552 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4553 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4554 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4555 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4556 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4557 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4558 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4559 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4560 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4561 %} 4562 ins_pipe( pipe_slow ); 4563 %} 4564 4565 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4566 predicate(UseAVX > 2); 4567 match(Set dst (AddReductionVI src1 src2)); 4568 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4569 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4570 "vpaddd $tmp3,$tmp3,$src2\n\t" 4571 "vextracti128_high $tmp,$tmp3\n\t" 4572 "vpaddd $tmp,$tmp,$tmp3\n\t" 4573 "pshufd $tmp2,$tmp,0xE\n\t" 4574 "vpaddd $tmp,$tmp,$tmp2\n\t" 4575 "pshufd $tmp2,$tmp,0x1\n\t" 4576 "vpaddd $tmp,$tmp,$tmp2\n\t" 4577 "movd $tmp2,$src1\n\t" 4578 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4579 "movd $dst,$tmp2\t! mul reduction16I" %} 4580 ins_encode %{ 4581 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4582 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4583 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4584 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4585 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4586 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4587 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4588 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4589 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4590 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4591 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4592 %} 4593 ins_pipe( pipe_slow ); 4594 %} 4595 4596 #ifdef _LP64 4597 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4598 predicate(UseAVX > 2); 4599 match(Set dst (AddReductionVL src1 src2)); 4600 effect(TEMP tmp, TEMP tmp2); 4601 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4602 "vpaddq $tmp,$src2,$tmp2\n\t" 4603 "movdq $tmp2,$src1\n\t" 4604 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4605 "movdq $dst,$tmp2\t! add reduction2L" %} 4606 ins_encode %{ 4607 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4608 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4609 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4610 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4611 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4612 %} 4613 ins_pipe( pipe_slow ); 4614 %} 4615 4616 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4617 predicate(UseAVX > 2); 4618 match(Set dst (AddReductionVL src1 src2)); 4619 effect(TEMP tmp, TEMP tmp2); 4620 format %{ "vextracti128_high $tmp,$src2\n\t" 4621 "vpaddq $tmp2,$tmp,$src2\n\t" 4622 "pshufd $tmp,$tmp2,0xE\n\t" 4623 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4624 "movdq $tmp,$src1\n\t" 4625 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4626 "movdq $dst,$tmp2\t! add reduction4L" %} 4627 ins_encode %{ 4628 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4629 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4630 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4631 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4632 __ movdq($tmp$$XMMRegister, $src1$$Register); 4633 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4634 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4635 %} 4636 ins_pipe( pipe_slow ); 4637 %} 4638 4639 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4640 predicate(UseAVX > 2); 4641 match(Set dst (AddReductionVL src1 src2)); 4642 effect(TEMP tmp, TEMP tmp2); 4643 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4644 "vpaddq $tmp2,$tmp2,$src2\n\t" 4645 "vextracti128_high $tmp,$tmp2\n\t" 4646 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4647 "pshufd $tmp,$tmp2,0xE\n\t" 4648 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4649 "movdq $tmp,$src1\n\t" 4650 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4651 "movdq $dst,$tmp2\t! add reduction8L" %} 4652 ins_encode %{ 4653 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4654 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4655 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 4656 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4657 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4658 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4659 __ movdq($tmp$$XMMRegister, $src1$$Register); 4660 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4661 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4662 %} 4663 ins_pipe( pipe_slow ); 4664 %} 4665 #endif 4666 4667 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4668 predicate(UseSSE >= 1 && UseAVX == 0); 4669 match(Set dst (AddReductionVF dst src2)); 4670 effect(TEMP dst, TEMP tmp); 4671 format %{ "addss $dst,$src2\n\t" 4672 "pshufd $tmp,$src2,0x01\n\t" 4673 "addss $dst,$tmp\t! add reduction2F" %} 4674 ins_encode %{ 4675 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4676 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4677 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4678 %} 4679 ins_pipe( pipe_slow ); 4680 %} 4681 4682 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4683 predicate(UseAVX > 0); 4684 match(Set dst (AddReductionVF dst src2)); 4685 effect(TEMP dst, TEMP tmp); 4686 format %{ "vaddss $dst,$dst,$src2\n\t" 4687 "pshufd $tmp,$src2,0x01\n\t" 4688 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4689 ins_encode %{ 4690 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4691 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4692 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4693 %} 4694 ins_pipe( pipe_slow ); 4695 %} 4696 4697 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4698 predicate(UseSSE >= 1 && UseAVX == 0); 4699 match(Set dst (AddReductionVF dst src2)); 4700 effect(TEMP dst, TEMP tmp); 4701 format %{ "addss $dst,$src2\n\t" 4702 "pshufd $tmp,$src2,0x01\n\t" 4703 "addss $dst,$tmp\n\t" 4704 "pshufd $tmp,$src2,0x02\n\t" 4705 "addss $dst,$tmp\n\t" 4706 "pshufd $tmp,$src2,0x03\n\t" 4707 "addss $dst,$tmp\t! add reduction4F" %} 4708 ins_encode %{ 4709 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4710 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4711 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4712 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4713 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4714 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4715 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4716 %} 4717 ins_pipe( pipe_slow ); 4718 %} 4719 4720 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4721 predicate(UseAVX > 0); 4722 match(Set dst (AddReductionVF dst src2)); 4723 effect(TEMP tmp, TEMP dst); 4724 format %{ "vaddss $dst,dst,$src2\n\t" 4725 "pshufd $tmp,$src2,0x01\n\t" 4726 "vaddss $dst,$dst,$tmp\n\t" 4727 "pshufd $tmp,$src2,0x02\n\t" 4728 "vaddss $dst,$dst,$tmp\n\t" 4729 "pshufd $tmp,$src2,0x03\n\t" 4730 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4731 ins_encode %{ 4732 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4733 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4734 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4735 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4736 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4737 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4738 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4739 %} 4740 ins_pipe( pipe_slow ); 4741 %} 4742 4743 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 4744 predicate(UseAVX > 0); 4745 match(Set dst (AddReductionVF dst src2)); 4746 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4747 format %{ "vaddss $dst,$dst,$src2\n\t" 4748 "pshufd $tmp,$src2,0x01\n\t" 4749 "vaddss $dst,$dst,$tmp\n\t" 4750 "pshufd $tmp,$src2,0x02\n\t" 4751 "vaddss $dst,$dst,$tmp\n\t" 4752 "pshufd $tmp,$src2,0x03\n\t" 4753 "vaddss $dst,$dst,$tmp\n\t" 4754 "vextractf128_high $tmp2,$src2\n\t" 4755 "vaddss $dst,$dst,$tmp2\n\t" 4756 "pshufd $tmp,$tmp2,0x01\n\t" 4757 "vaddss $dst,$dst,$tmp\n\t" 4758 "pshufd $tmp,$tmp2,0x02\n\t" 4759 "vaddss $dst,$dst,$tmp\n\t" 4760 "pshufd $tmp,$tmp2,0x03\n\t" 4761 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 4762 ins_encode %{ 4763 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4764 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4765 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4766 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4767 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4768 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4769 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4770 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4771 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4772 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4773 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4774 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4775 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4776 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4777 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4778 %} 4779 ins_pipe( pipe_slow ); 4780 %} 4781 4782 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 4783 predicate(UseAVX > 2); 4784 match(Set dst (AddReductionVF dst src2)); 4785 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4786 format %{ "vaddss $dst,$dst,$src2\n\t" 4787 "pshufd $tmp,$src2,0x01\n\t" 4788 "vaddss $dst,$dst,$tmp\n\t" 4789 "pshufd $tmp,$src2,0x02\n\t" 4790 "vaddss $dst,$dst,$tmp\n\t" 4791 "pshufd $tmp,$src2,0x03\n\t" 4792 "vaddss $dst,$dst,$tmp\n\t" 4793 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4794 "vaddss $dst,$dst,$tmp2\n\t" 4795 "pshufd $tmp,$tmp2,0x01\n\t" 4796 "vaddss $dst,$dst,$tmp\n\t" 4797 "pshufd $tmp,$tmp2,0x02\n\t" 4798 "vaddss $dst,$dst,$tmp\n\t" 4799 "pshufd $tmp,$tmp2,0x03\n\t" 4800 "vaddss $dst,$dst,$tmp\n\t" 4801 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4802 "vaddss $dst,$dst,$tmp2\n\t" 4803 "pshufd $tmp,$tmp2,0x01\n\t" 4804 "vaddss $dst,$dst,$tmp\n\t" 4805 "pshufd $tmp,$tmp2,0x02\n\t" 4806 "vaddss $dst,$dst,$tmp\n\t" 4807 "pshufd $tmp,$tmp2,0x03\n\t" 4808 "vaddss $dst,$dst,$tmp\n\t" 4809 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4810 "vaddss $dst,$dst,$tmp2\n\t" 4811 "pshufd $tmp,$tmp2,0x01\n\t" 4812 "vaddss $dst,$dst,$tmp\n\t" 4813 "pshufd $tmp,$tmp2,0x02\n\t" 4814 "vaddss $dst,$dst,$tmp\n\t" 4815 "pshufd $tmp,$tmp2,0x03\n\t" 4816 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 4817 ins_encode %{ 4818 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4819 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4820 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4821 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4822 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4823 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4824 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4825 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4826 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4827 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4828 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4829 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4830 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4831 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4832 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4833 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4834 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4835 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4836 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4837 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4838 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4839 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4840 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4841 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4842 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4843 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4844 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4845 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4846 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4847 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4848 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4849 %} 4850 ins_pipe( pipe_slow ); 4851 %} 4852 4853 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4854 predicate(UseSSE >= 1 && UseAVX == 0); 4855 match(Set dst (AddReductionVD dst src2)); 4856 effect(TEMP tmp, TEMP dst); 4857 format %{ "addsd $dst,$src2\n\t" 4858 "pshufd $tmp,$src2,0xE\n\t" 4859 "addsd $dst,$tmp\t! add reduction2D" %} 4860 ins_encode %{ 4861 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 4862 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4863 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4864 %} 4865 ins_pipe( pipe_slow ); 4866 %} 4867 4868 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4869 predicate(UseAVX > 0); 4870 match(Set dst (AddReductionVD dst src2)); 4871 effect(TEMP tmp, TEMP dst); 4872 format %{ "vaddsd $dst,$dst,$src2\n\t" 4873 "pshufd $tmp,$src2,0xE\n\t" 4874 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 4875 ins_encode %{ 4876 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4877 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4878 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4879 %} 4880 ins_pipe( pipe_slow ); 4881 %} 4882 4883 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 4884 predicate(UseAVX > 0); 4885 match(Set dst (AddReductionVD dst src2)); 4886 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4887 format %{ "vaddsd $dst,$dst,$src2\n\t" 4888 "pshufd $tmp,$src2,0xE\n\t" 4889 "vaddsd $dst,$dst,$tmp\n\t" 4890 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4891 "vaddsd $dst,$dst,$tmp2\n\t" 4892 "pshufd $tmp,$tmp2,0xE\n\t" 4893 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 4894 ins_encode %{ 4895 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4896 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4897 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4898 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4899 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4900 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4901 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4902 %} 4903 ins_pipe( pipe_slow ); 4904 %} 4905 4906 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 4907 predicate(UseAVX > 2); 4908 match(Set dst (AddReductionVD dst src2)); 4909 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4910 format %{ "vaddsd $dst,$dst,$src2\n\t" 4911 "pshufd $tmp,$src2,0xE\n\t" 4912 "vaddsd $dst,$dst,$tmp\n\t" 4913 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4914 "vaddsd $dst,$dst,$tmp2\n\t" 4915 "pshufd $tmp,$tmp2,0xE\n\t" 4916 "vaddsd $dst,$dst,$tmp\n\t" 4917 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4918 "vaddsd $dst,$dst,$tmp2\n\t" 4919 "pshufd $tmp,$tmp2,0xE\n\t" 4920 "vaddsd $dst,$dst,$tmp\n\t" 4921 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4922 "vaddsd $dst,$dst,$tmp2\n\t" 4923 "pshufd $tmp,$tmp2,0xE\n\t" 4924 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 4925 ins_encode %{ 4926 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4927 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4928 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4929 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4930 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4931 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4932 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4933 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4934 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4935 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4936 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4937 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4938 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4939 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4940 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4941 %} 4942 ins_pipe( pipe_slow ); 4943 %} 4944 4945 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4946 predicate(UseSSE > 3 && UseAVX == 0); 4947 match(Set dst (MulReductionVI src1 src2)); 4948 effect(TEMP tmp, TEMP tmp2); 4949 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4950 "pmulld $tmp2,$src2\n\t" 4951 "movd $tmp,$src1\n\t" 4952 "pmulld $tmp2,$tmp\n\t" 4953 "movd $dst,$tmp2\t! mul reduction2I" %} 4954 ins_encode %{ 4955 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4956 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4957 __ movdl($tmp$$XMMRegister, $src1$$Register); 4958 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4959 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4965 predicate(UseAVX > 0); 4966 match(Set dst (MulReductionVI src1 src2)); 4967 effect(TEMP tmp, TEMP tmp2); 4968 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4969 "vpmulld $tmp,$src2,$tmp2\n\t" 4970 "movd $tmp2,$src1\n\t" 4971 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4972 "movd $dst,$tmp2\t! mul reduction2I" %} 4973 ins_encode %{ 4974 int vector_len = 0; 4975 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4976 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4977 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4978 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4979 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4980 %} 4981 ins_pipe( pipe_slow ); 4982 %} 4983 4984 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4985 predicate(UseSSE > 3 && UseAVX == 0); 4986 match(Set dst (MulReductionVI src1 src2)); 4987 effect(TEMP tmp, TEMP tmp2); 4988 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4989 "pmulld $tmp2,$src2\n\t" 4990 "pshufd $tmp,$tmp2,0x1\n\t" 4991 "pmulld $tmp2,$tmp\n\t" 4992 "movd $tmp,$src1\n\t" 4993 "pmulld $tmp2,$tmp\n\t" 4994 "movd $dst,$tmp2\t! mul reduction4I" %} 4995 ins_encode %{ 4996 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4997 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4998 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 4999 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5000 __ movdl($tmp$$XMMRegister, $src1$$Register); 5001 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5002 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5003 %} 5004 ins_pipe( pipe_slow ); 5005 %} 5006 5007 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5008 predicate(UseAVX > 0); 5009 match(Set dst (MulReductionVI src1 src2)); 5010 effect(TEMP tmp, TEMP tmp2); 5011 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5012 "vpmulld $tmp,$src2,$tmp2\n\t" 5013 "pshufd $tmp2,$tmp,0x1\n\t" 5014 "vpmulld $tmp,$tmp,$tmp2\n\t" 5015 "movd $tmp2,$src1\n\t" 5016 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5017 "movd $dst,$tmp2\t! mul reduction4I" %} 5018 ins_encode %{ 5019 int vector_len = 0; 5020 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5021 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5022 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5023 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5024 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5025 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5026 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5027 %} 5028 ins_pipe( pipe_slow ); 5029 %} 5030 5031 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5032 predicate(UseAVX > 0); 5033 match(Set dst (MulReductionVI src1 src2)); 5034 effect(TEMP tmp, TEMP tmp2); 5035 format %{ "vextracti128_high $tmp,$src2\n\t" 5036 "vpmulld $tmp,$tmp,$src2\n\t" 5037 "pshufd $tmp2,$tmp,0xE\n\t" 5038 "vpmulld $tmp,$tmp,$tmp2\n\t" 5039 "pshufd $tmp2,$tmp,0x1\n\t" 5040 "vpmulld $tmp,$tmp,$tmp2\n\t" 5041 "movd $tmp2,$src1\n\t" 5042 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5043 "movd $dst,$tmp2\t! mul reduction8I" %} 5044 ins_encode %{ 5045 int vector_len = 0; 5046 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5047 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5048 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5049 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5050 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5051 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5052 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5053 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5054 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5055 %} 5056 ins_pipe( pipe_slow ); 5057 %} 5058 5059 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5060 predicate(UseAVX > 2); 5061 match(Set dst (MulReductionVI src1 src2)); 5062 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5063 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5064 "vpmulld $tmp3,$tmp3,$src2\n\t" 5065 "vextracti128_high $tmp,$tmp3\n\t" 5066 "vpmulld $tmp,$tmp,$src2\n\t" 5067 "pshufd $tmp2,$tmp,0xE\n\t" 5068 "vpmulld $tmp,$tmp,$tmp2\n\t" 5069 "pshufd $tmp2,$tmp,0x1\n\t" 5070 "vpmulld $tmp,$tmp,$tmp2\n\t" 5071 "movd $tmp2,$src1\n\t" 5072 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5073 "movd $dst,$tmp2\t! mul reduction16I" %} 5074 ins_encode %{ 5075 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5076 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5077 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5078 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5079 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5080 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5081 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5082 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5083 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5084 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5085 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5086 %} 5087 ins_pipe( pipe_slow ); 5088 %} 5089 5090 #ifdef _LP64 5091 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5092 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5093 match(Set dst (MulReductionVL src1 src2)); 5094 effect(TEMP tmp, TEMP tmp2); 5095 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5096 "vpmullq $tmp,$src2,$tmp2\n\t" 5097 "movdq $tmp2,$src1\n\t" 5098 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5099 "movdq $dst,$tmp2\t! mul reduction2L" %} 5100 ins_encode %{ 5101 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5102 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5103 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5104 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5105 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5106 %} 5107 ins_pipe( pipe_slow ); 5108 %} 5109 5110 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5111 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5112 match(Set dst (MulReductionVL src1 src2)); 5113 effect(TEMP tmp, TEMP tmp2); 5114 format %{ "vextracti128_high $tmp,$src2\n\t" 5115 "vpmullq $tmp2,$tmp,$src2\n\t" 5116 "pshufd $tmp,$tmp2,0xE\n\t" 5117 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5118 "movdq $tmp,$src1\n\t" 5119 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5120 "movdq $dst,$tmp2\t! mul reduction4L" %} 5121 ins_encode %{ 5122 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5123 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5124 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5125 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5126 __ movdq($tmp$$XMMRegister, $src1$$Register); 5127 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5128 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5129 %} 5130 ins_pipe( pipe_slow ); 5131 %} 5132 5133 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5134 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5135 match(Set dst (MulReductionVL src1 src2)); 5136 effect(TEMP tmp, TEMP tmp2); 5137 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5138 "vpmullq $tmp2,$tmp2,$src2\n\t" 5139 "vextracti128_high $tmp,$tmp2\n\t" 5140 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5141 "pshufd $tmp,$tmp2,0xE\n\t" 5142 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5143 "movdq $tmp,$src1\n\t" 5144 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5145 "movdq $dst,$tmp2\t! mul reduction8L" %} 5146 ins_encode %{ 5147 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5148 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5149 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5150 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5151 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5152 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5153 __ movdq($tmp$$XMMRegister, $src1$$Register); 5154 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5155 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5156 %} 5157 ins_pipe( pipe_slow ); 5158 %} 5159 #endif 5160 5161 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5162 predicate(UseSSE >= 1 && UseAVX == 0); 5163 match(Set dst (MulReductionVF dst src2)); 5164 effect(TEMP dst, TEMP tmp); 5165 format %{ "mulss $dst,$src2\n\t" 5166 "pshufd $tmp,$src2,0x01\n\t" 5167 "mulss $dst,$tmp\t! mul reduction2F" %} 5168 ins_encode %{ 5169 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5170 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5171 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5172 %} 5173 ins_pipe( pipe_slow ); 5174 %} 5175 5176 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5177 predicate(UseAVX > 0); 5178 match(Set dst (MulReductionVF dst src2)); 5179 effect(TEMP tmp, TEMP dst); 5180 format %{ "vmulss $dst,$dst,$src2\n\t" 5181 "pshufd $tmp,$src2,0x01\n\t" 5182 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5183 ins_encode %{ 5184 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5185 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5186 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5187 %} 5188 ins_pipe( pipe_slow ); 5189 %} 5190 5191 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5192 predicate(UseSSE >= 1 && UseAVX == 0); 5193 match(Set dst (MulReductionVF dst src2)); 5194 effect(TEMP dst, TEMP tmp); 5195 format %{ "mulss $dst,$src2\n\t" 5196 "pshufd $tmp,$src2,0x01\n\t" 5197 "mulss $dst,$tmp\n\t" 5198 "pshufd $tmp,$src2,0x02\n\t" 5199 "mulss $dst,$tmp\n\t" 5200 "pshufd $tmp,$src2,0x03\n\t" 5201 "mulss $dst,$tmp\t! mul reduction4F" %} 5202 ins_encode %{ 5203 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5204 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5205 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5206 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5207 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5208 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5209 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5210 %} 5211 ins_pipe( pipe_slow ); 5212 %} 5213 5214 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5215 predicate(UseAVX > 0); 5216 match(Set dst (MulReductionVF dst src2)); 5217 effect(TEMP tmp, TEMP dst); 5218 format %{ "vmulss $dst,$dst,$src2\n\t" 5219 "pshufd $tmp,$src2,0x01\n\t" 5220 "vmulss $dst,$dst,$tmp\n\t" 5221 "pshufd $tmp,$src2,0x02\n\t" 5222 "vmulss $dst,$dst,$tmp\n\t" 5223 "pshufd $tmp,$src2,0x03\n\t" 5224 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5225 ins_encode %{ 5226 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5227 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5228 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5229 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5230 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5231 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5232 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5233 %} 5234 ins_pipe( pipe_slow ); 5235 %} 5236 5237 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5238 predicate(UseAVX > 0); 5239 match(Set dst (MulReductionVF dst src2)); 5240 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5241 format %{ "vmulss $dst,$dst,$src2\n\t" 5242 "pshufd $tmp,$src2,0x01\n\t" 5243 "vmulss $dst,$dst,$tmp\n\t" 5244 "pshufd $tmp,$src2,0x02\n\t" 5245 "vmulss $dst,$dst,$tmp\n\t" 5246 "pshufd $tmp,$src2,0x03\n\t" 5247 "vmulss $dst,$dst,$tmp\n\t" 5248 "vextractf128_high $tmp2,$src2\n\t" 5249 "vmulss $dst,$dst,$tmp2\n\t" 5250 "pshufd $tmp,$tmp2,0x01\n\t" 5251 "vmulss $dst,$dst,$tmp\n\t" 5252 "pshufd $tmp,$tmp2,0x02\n\t" 5253 "vmulss $dst,$dst,$tmp\n\t" 5254 "pshufd $tmp,$tmp2,0x03\n\t" 5255 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5256 ins_encode %{ 5257 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5258 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5259 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5260 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5261 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5262 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5263 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5264 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5265 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5266 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5267 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5268 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5269 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5270 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5271 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5272 %} 5273 ins_pipe( pipe_slow ); 5274 %} 5275 5276 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5277 predicate(UseAVX > 2); 5278 match(Set dst (MulReductionVF dst src2)); 5279 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5280 format %{ "vmulss $dst,$dst,$src2\n\t" 5281 "pshufd $tmp,$src2,0x01\n\t" 5282 "vmulss $dst,$dst,$tmp\n\t" 5283 "pshufd $tmp,$src2,0x02\n\t" 5284 "vmulss $dst,$dst,$tmp\n\t" 5285 "pshufd $tmp,$src2,0x03\n\t" 5286 "vmulss $dst,$dst,$tmp\n\t" 5287 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5288 "vmulss $dst,$dst,$tmp2\n\t" 5289 "pshufd $tmp,$tmp2,0x01\n\t" 5290 "vmulss $dst,$dst,$tmp\n\t" 5291 "pshufd $tmp,$tmp2,0x02\n\t" 5292 "vmulss $dst,$dst,$tmp\n\t" 5293 "pshufd $tmp,$tmp2,0x03\n\t" 5294 "vmulss $dst,$dst,$tmp\n\t" 5295 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5296 "vmulss $dst,$dst,$tmp2\n\t" 5297 "pshufd $tmp,$tmp2,0x01\n\t" 5298 "vmulss $dst,$dst,$tmp\n\t" 5299 "pshufd $tmp,$tmp2,0x02\n\t" 5300 "vmulss $dst,$dst,$tmp\n\t" 5301 "pshufd $tmp,$tmp2,0x03\n\t" 5302 "vmulss $dst,$dst,$tmp\n\t" 5303 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5304 "vmulss $dst,$dst,$tmp2\n\t" 5305 "pshufd $tmp,$tmp2,0x01\n\t" 5306 "vmulss $dst,$dst,$tmp\n\t" 5307 "pshufd $tmp,$tmp2,0x02\n\t" 5308 "vmulss $dst,$dst,$tmp\n\t" 5309 "pshufd $tmp,$tmp2,0x03\n\t" 5310 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5311 ins_encode %{ 5312 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5313 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5314 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5315 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5316 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5317 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5318 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5319 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5320 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5321 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5322 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5323 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5324 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5325 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5326 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5327 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5328 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5329 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5330 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5331 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5332 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5333 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5334 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5335 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5336 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5337 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5338 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5339 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5340 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5341 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5342 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5343 %} 5344 ins_pipe( pipe_slow ); 5345 %} 5346 5347 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5348 predicate(UseSSE >= 1 && UseAVX == 0); 5349 match(Set dst (MulReductionVD dst src2)); 5350 effect(TEMP dst, TEMP tmp); 5351 format %{ "mulsd $dst,$src2\n\t" 5352 "pshufd $tmp,$src2,0xE\n\t" 5353 "mulsd $dst,$tmp\t! mul reduction2D" %} 5354 ins_encode %{ 5355 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5356 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5357 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5358 %} 5359 ins_pipe( pipe_slow ); 5360 %} 5361 5362 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5363 predicate(UseAVX > 0); 5364 match(Set dst (MulReductionVD dst src2)); 5365 effect(TEMP tmp, TEMP dst); 5366 format %{ "vmulsd $dst,$dst,$src2\n\t" 5367 "pshufd $tmp,$src2,0xE\n\t" 5368 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5369 ins_encode %{ 5370 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5371 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5372 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5378 predicate(UseAVX > 0); 5379 match(Set dst (MulReductionVD dst src2)); 5380 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5381 format %{ "vmulsd $dst,$dst,$src2\n\t" 5382 "pshufd $tmp,$src2,0xE\n\t" 5383 "vmulsd $dst,$dst,$tmp\n\t" 5384 "vextractf128_high $tmp2,$src2\n\t" 5385 "vmulsd $dst,$dst,$tmp2\n\t" 5386 "pshufd $tmp,$tmp2,0xE\n\t" 5387 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5388 ins_encode %{ 5389 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5390 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5391 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5392 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5393 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5394 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5395 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5396 %} 5397 ins_pipe( pipe_slow ); 5398 %} 5399 5400 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5401 predicate(UseAVX > 2); 5402 match(Set dst (MulReductionVD dst src2)); 5403 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5404 format %{ "vmulsd $dst,$dst,$src2\n\t" 5405 "pshufd $tmp,$src2,0xE\n\t" 5406 "vmulsd $dst,$dst,$tmp\n\t" 5407 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5408 "vmulsd $dst,$dst,$tmp2\n\t" 5409 "pshufd $tmp,$src2,0xE\n\t" 5410 "vmulsd $dst,$dst,$tmp\n\t" 5411 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5412 "vmulsd $dst,$dst,$tmp2\n\t" 5413 "pshufd $tmp,$tmp2,0xE\n\t" 5414 "vmulsd $dst,$dst,$tmp\n\t" 5415 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5416 "vmulsd $dst,$dst,$tmp2\n\t" 5417 "pshufd $tmp,$tmp2,0xE\n\t" 5418 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5419 ins_encode %{ 5420 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5421 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5422 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5423 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5424 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5425 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5426 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5427 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5428 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5429 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5430 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5431 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5432 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5433 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5434 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5435 %} 5436 ins_pipe( pipe_slow ); 5437 %} 5438 5439 // ====================VECTOR ARITHMETIC======================================= 5440 5441 // --------------------------------- ADD -------------------------------------- 5442 5443 // Bytes vector add 5444 instruct vadd4B(vecS dst, vecS src) %{ 5445 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5446 match(Set dst (AddVB dst src)); 5447 format %{ "paddb $dst,$src\t! add packed4B" %} 5448 ins_encode %{ 5449 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5450 %} 5451 ins_pipe( pipe_slow ); 5452 %} 5453 5454 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5455 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5456 match(Set dst (AddVB src1 src2)); 5457 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5458 ins_encode %{ 5459 int vector_len = 0; 5460 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5461 %} 5462 ins_pipe( pipe_slow ); 5463 %} 5464 5465 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5466 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5467 match(Set dst (AddVB src1 src2)); 5468 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5469 ins_encode %{ 5470 int vector_len = 0; 5471 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5472 %} 5473 ins_pipe( pipe_slow ); 5474 %} 5475 5476 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5477 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5478 match(Set dst (AddVB dst src2)); 5479 effect(TEMP src1); 5480 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5481 ins_encode %{ 5482 int vector_len = 0; 5483 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5484 %} 5485 ins_pipe( pipe_slow ); 5486 %} 5487 5488 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5489 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5490 match(Set dst (AddVB src (LoadVector mem))); 5491 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5492 ins_encode %{ 5493 int vector_len = 0; 5494 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5500 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5501 match(Set dst (AddVB src (LoadVector mem))); 5502 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5503 ins_encode %{ 5504 int vector_len = 0; 5505 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5506 %} 5507 ins_pipe( pipe_slow ); 5508 %} 5509 5510 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5511 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5512 match(Set dst (AddVB dst (LoadVector mem))); 5513 effect(TEMP src); 5514 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5515 ins_encode %{ 5516 int vector_len = 0; 5517 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5518 %} 5519 ins_pipe( pipe_slow ); 5520 %} 5521 5522 instruct vadd8B(vecD dst, vecD src) %{ 5523 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5524 match(Set dst (AddVB dst src)); 5525 format %{ "paddb $dst,$src\t! add packed8B" %} 5526 ins_encode %{ 5527 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5528 %} 5529 ins_pipe( pipe_slow ); 5530 %} 5531 5532 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5533 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5534 match(Set dst (AddVB src1 src2)); 5535 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5536 ins_encode %{ 5537 int vector_len = 0; 5538 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5539 %} 5540 ins_pipe( pipe_slow ); 5541 %} 5542 5543 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5544 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5545 match(Set dst (AddVB src1 src2)); 5546 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5547 ins_encode %{ 5548 int vector_len = 0; 5549 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5550 %} 5551 ins_pipe( pipe_slow ); 5552 %} 5553 5554 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5555 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5556 match(Set dst (AddVB dst src2)); 5557 effect(TEMP src1); 5558 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5559 ins_encode %{ 5560 int vector_len = 0; 5561 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5562 %} 5563 ins_pipe( pipe_slow ); 5564 %} 5565 5566 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5567 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5568 match(Set dst (AddVB src (LoadVector mem))); 5569 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5570 ins_encode %{ 5571 int vector_len = 0; 5572 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5573 %} 5574 ins_pipe( pipe_slow ); 5575 %} 5576 5577 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5578 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5579 match(Set dst (AddVB src (LoadVector mem))); 5580 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5581 ins_encode %{ 5582 int vector_len = 0; 5583 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5584 %} 5585 ins_pipe( pipe_slow ); 5586 %} 5587 5588 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5589 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5590 match(Set dst (AddVB dst (LoadVector mem))); 5591 effect(TEMP src); 5592 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5593 ins_encode %{ 5594 int vector_len = 0; 5595 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5596 %} 5597 ins_pipe( pipe_slow ); 5598 %} 5599 5600 instruct vadd16B(vecX dst, vecX src) %{ 5601 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5602 match(Set dst (AddVB dst src)); 5603 format %{ "paddb $dst,$src\t! add packed16B" %} 5604 ins_encode %{ 5605 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5606 %} 5607 ins_pipe( pipe_slow ); 5608 %} 5609 5610 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5611 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5612 match(Set dst (AddVB src1 src2)); 5613 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5614 ins_encode %{ 5615 int vector_len = 0; 5616 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5617 %} 5618 ins_pipe( pipe_slow ); 5619 %} 5620 5621 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5622 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5623 match(Set dst (AddVB src1 src2)); 5624 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5625 ins_encode %{ 5626 int vector_len = 0; 5627 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5628 %} 5629 ins_pipe( pipe_slow ); 5630 %} 5631 5632 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5633 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5634 match(Set dst (AddVB dst src2)); 5635 effect(TEMP src1); 5636 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5637 ins_encode %{ 5638 int vector_len = 0; 5639 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5640 %} 5641 ins_pipe( pipe_slow ); 5642 %} 5643 5644 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5645 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5646 match(Set dst (AddVB src (LoadVector mem))); 5647 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5648 ins_encode %{ 5649 int vector_len = 0; 5650 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5651 %} 5652 ins_pipe( pipe_slow ); 5653 %} 5654 5655 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5656 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5657 match(Set dst (AddVB src (LoadVector mem))); 5658 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5659 ins_encode %{ 5660 int vector_len = 0; 5661 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5662 %} 5663 ins_pipe( pipe_slow ); 5664 %} 5665 5666 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5667 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5668 match(Set dst (AddVB dst (LoadVector mem))); 5669 effect(TEMP src); 5670 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5671 ins_encode %{ 5672 int vector_len = 0; 5673 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5674 %} 5675 ins_pipe( pipe_slow ); 5676 %} 5677 5678 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5679 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5680 match(Set dst (AddVB src1 src2)); 5681 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5682 ins_encode %{ 5683 int vector_len = 1; 5684 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5685 %} 5686 ins_pipe( pipe_slow ); 5687 %} 5688 5689 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5690 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5691 match(Set dst (AddVB src1 src2)); 5692 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5693 ins_encode %{ 5694 int vector_len = 1; 5695 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5696 %} 5697 ins_pipe( pipe_slow ); 5698 %} 5699 5700 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5701 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5702 match(Set dst (AddVB dst src2)); 5703 effect(TEMP src1); 5704 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5705 ins_encode %{ 5706 int vector_len = 1; 5707 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5708 %} 5709 ins_pipe( pipe_slow ); 5710 %} 5711 5712 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5713 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5714 match(Set dst (AddVB src (LoadVector mem))); 5715 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5716 ins_encode %{ 5717 int vector_len = 1; 5718 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5719 %} 5720 ins_pipe( pipe_slow ); 5721 %} 5722 5723 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5724 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5725 match(Set dst (AddVB src (LoadVector mem))); 5726 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5727 ins_encode %{ 5728 int vector_len = 1; 5729 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5730 %} 5731 ins_pipe( pipe_slow ); 5732 %} 5733 5734 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 5735 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5736 match(Set dst (AddVB dst (LoadVector mem))); 5737 effect(TEMP src); 5738 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5739 ins_encode %{ 5740 int vector_len = 1; 5741 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5742 %} 5743 ins_pipe( pipe_slow ); 5744 %} 5745 5746 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5747 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5748 match(Set dst (AddVB src1 src2)); 5749 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5750 ins_encode %{ 5751 int vector_len = 2; 5752 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5753 %} 5754 ins_pipe( pipe_slow ); 5755 %} 5756 5757 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5758 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5759 match(Set dst (AddVB src (LoadVector mem))); 5760 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5761 ins_encode %{ 5762 int vector_len = 2; 5763 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5764 %} 5765 ins_pipe( pipe_slow ); 5766 %} 5767 5768 // Shorts/Chars vector add 5769 instruct vadd2S(vecS dst, vecS src) %{ 5770 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 5771 match(Set dst (AddVS dst src)); 5772 format %{ "paddw $dst,$src\t! add packed2S" %} 5773 ins_encode %{ 5774 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5775 %} 5776 ins_pipe( pipe_slow ); 5777 %} 5778 5779 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5780 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5781 match(Set dst (AddVS src1 src2)); 5782 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5783 ins_encode %{ 5784 int vector_len = 0; 5785 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5786 %} 5787 ins_pipe( pipe_slow ); 5788 %} 5789 5790 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5791 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5792 match(Set dst (AddVS src1 src2)); 5793 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5794 ins_encode %{ 5795 int vector_len = 0; 5796 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5797 %} 5798 ins_pipe( pipe_slow ); 5799 %} 5800 5801 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5802 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 5803 match(Set dst (AddVS dst src2)); 5804 effect(TEMP src1); 5805 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 5806 ins_encode %{ 5807 int vector_len = 0; 5808 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5809 %} 5810 ins_pipe( pipe_slow ); 5811 %} 5812 5813 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 5814 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5815 match(Set dst (AddVS src (LoadVector mem))); 5816 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5817 ins_encode %{ 5818 int vector_len = 0; 5819 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 5825 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5826 match(Set dst (AddVS src (LoadVector mem))); 5827 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5828 ins_encode %{ 5829 int vector_len = 0; 5830 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5831 %} 5832 ins_pipe( pipe_slow ); 5833 %} 5834 5835 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5836 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5837 match(Set dst (AddVS dst (LoadVector mem))); 5838 effect(TEMP src); 5839 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5840 ins_encode %{ 5841 int vector_len = 0; 5842 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5843 %} 5844 ins_pipe( pipe_slow ); 5845 %} 5846 5847 instruct vadd4S(vecD dst, vecD src) %{ 5848 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5849 match(Set dst (AddVS dst src)); 5850 format %{ "paddw $dst,$src\t! add packed4S" %} 5851 ins_encode %{ 5852 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5853 %} 5854 ins_pipe( pipe_slow ); 5855 %} 5856 5857 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5858 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5859 match(Set dst (AddVS src1 src2)); 5860 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5861 ins_encode %{ 5862 int vector_len = 0; 5863 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5864 %} 5865 ins_pipe( pipe_slow ); 5866 %} 5867 5868 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5869 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5870 match(Set dst (AddVS src1 src2)); 5871 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5872 ins_encode %{ 5873 int vector_len = 0; 5874 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5880 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5881 match(Set dst (AddVS dst src2)); 5882 effect(TEMP src1); 5883 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 5884 ins_encode %{ 5885 int vector_len = 0; 5886 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5887 %} 5888 ins_pipe( pipe_slow ); 5889 %} 5890 5891 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 5892 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5893 match(Set dst (AddVS src (LoadVector mem))); 5894 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5895 ins_encode %{ 5896 int vector_len = 0; 5897 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 5903 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5904 match(Set dst (AddVS src (LoadVector mem))); 5905 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5906 ins_encode %{ 5907 int vector_len = 0; 5908 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5914 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5915 match(Set dst (AddVS dst (LoadVector mem))); 5916 effect(TEMP src); 5917 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5918 ins_encode %{ 5919 int vector_len = 0; 5920 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 instruct vadd8S(vecX dst, vecX src) %{ 5926 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5927 match(Set dst (AddVS dst src)); 5928 format %{ "paddw $dst,$src\t! add packed8S" %} 5929 ins_encode %{ 5930 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5931 %} 5932 ins_pipe( pipe_slow ); 5933 %} 5934 5935 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5936 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5937 match(Set dst (AddVS src1 src2)); 5938 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5939 ins_encode %{ 5940 int vector_len = 0; 5941 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5942 %} 5943 ins_pipe( pipe_slow ); 5944 %} 5945 5946 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5947 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5948 match(Set dst (AddVS src1 src2)); 5949 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5950 ins_encode %{ 5951 int vector_len = 0; 5952 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5953 %} 5954 ins_pipe( pipe_slow ); 5955 %} 5956 5957 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5958 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5959 match(Set dst (AddVS dst src2)); 5960 effect(TEMP src1); 5961 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 5962 ins_encode %{ 5963 int vector_len = 0; 5964 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5965 %} 5966 ins_pipe( pipe_slow ); 5967 %} 5968 5969 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 5970 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5971 match(Set dst (AddVS src (LoadVector mem))); 5972 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5973 ins_encode %{ 5974 int vector_len = 0; 5975 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5976 %} 5977 ins_pipe( pipe_slow ); 5978 %} 5979 5980 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 5981 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5982 match(Set dst (AddVS src (LoadVector mem))); 5983 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5984 ins_encode %{ 5985 int vector_len = 0; 5986 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5987 %} 5988 ins_pipe( pipe_slow ); 5989 %} 5990 5991 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5992 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5993 match(Set dst (AddVS dst (LoadVector mem))); 5994 effect(TEMP src); 5995 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5996 ins_encode %{ 5997 int vector_len = 0; 5998 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5999 %} 6000 ins_pipe( pipe_slow ); 6001 %} 6002 6003 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6004 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6005 match(Set dst (AddVS src1 src2)); 6006 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6007 ins_encode %{ 6008 int vector_len = 1; 6009 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6010 %} 6011 ins_pipe( pipe_slow ); 6012 %} 6013 6014 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6015 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6016 match(Set dst (AddVS src1 src2)); 6017 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6018 ins_encode %{ 6019 int vector_len = 1; 6020 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6021 %} 6022 ins_pipe( pipe_slow ); 6023 %} 6024 6025 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6026 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6027 match(Set dst (AddVS dst src2)); 6028 effect(TEMP src1); 6029 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6030 ins_encode %{ 6031 int vector_len = 1; 6032 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6033 %} 6034 ins_pipe( pipe_slow ); 6035 %} 6036 6037 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6038 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6039 match(Set dst (AddVS src (LoadVector mem))); 6040 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6041 ins_encode %{ 6042 int vector_len = 1; 6043 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6044 %} 6045 ins_pipe( pipe_slow ); 6046 %} 6047 6048 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6049 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6050 match(Set dst (AddVS src (LoadVector mem))); 6051 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6052 ins_encode %{ 6053 int vector_len = 1; 6054 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6055 %} 6056 ins_pipe( pipe_slow ); 6057 %} 6058 6059 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6060 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6061 match(Set dst (AddVS dst (LoadVector mem))); 6062 effect(TEMP src); 6063 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6064 ins_encode %{ 6065 int vector_len = 1; 6066 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6072 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6073 match(Set dst (AddVS src1 src2)); 6074 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6075 ins_encode %{ 6076 int vector_len = 2; 6077 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6078 %} 6079 ins_pipe( pipe_slow ); 6080 %} 6081 6082 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6083 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6084 match(Set dst (AddVS src (LoadVector mem))); 6085 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6086 ins_encode %{ 6087 int vector_len = 2; 6088 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6089 %} 6090 ins_pipe( pipe_slow ); 6091 %} 6092 6093 // Integers vector add 6094 instruct vadd2I(vecD dst, vecD src) %{ 6095 predicate(n->as_Vector()->length() == 2); 6096 match(Set dst (AddVI dst src)); 6097 format %{ "paddd $dst,$src\t! add packed2I" %} 6098 ins_encode %{ 6099 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6100 %} 6101 ins_pipe( pipe_slow ); 6102 %} 6103 6104 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6105 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6106 match(Set dst (AddVI src1 src2)); 6107 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6108 ins_encode %{ 6109 int vector_len = 0; 6110 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6111 %} 6112 ins_pipe( pipe_slow ); 6113 %} 6114 6115 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6116 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6117 match(Set dst (AddVI src (LoadVector mem))); 6118 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6119 ins_encode %{ 6120 int vector_len = 0; 6121 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6122 %} 6123 ins_pipe( pipe_slow ); 6124 %} 6125 6126 instruct vadd4I(vecX dst, vecX src) %{ 6127 predicate(n->as_Vector()->length() == 4); 6128 match(Set dst (AddVI dst src)); 6129 format %{ "paddd $dst,$src\t! add packed4I" %} 6130 ins_encode %{ 6131 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6132 %} 6133 ins_pipe( pipe_slow ); 6134 %} 6135 6136 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6137 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6138 match(Set dst (AddVI src1 src2)); 6139 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6140 ins_encode %{ 6141 int vector_len = 0; 6142 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6143 %} 6144 ins_pipe( pipe_slow ); 6145 %} 6146 6147 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6148 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6149 match(Set dst (AddVI src (LoadVector mem))); 6150 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6151 ins_encode %{ 6152 int vector_len = 0; 6153 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6154 %} 6155 ins_pipe( pipe_slow ); 6156 %} 6157 6158 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6159 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6160 match(Set dst (AddVI src1 src2)); 6161 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6162 ins_encode %{ 6163 int vector_len = 1; 6164 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6165 %} 6166 ins_pipe( pipe_slow ); 6167 %} 6168 6169 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6170 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6171 match(Set dst (AddVI src (LoadVector mem))); 6172 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6173 ins_encode %{ 6174 int vector_len = 1; 6175 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6176 %} 6177 ins_pipe( pipe_slow ); 6178 %} 6179 6180 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6181 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6182 match(Set dst (AddVI src1 src2)); 6183 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6184 ins_encode %{ 6185 int vector_len = 2; 6186 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6187 %} 6188 ins_pipe( pipe_slow ); 6189 %} 6190 6191 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6192 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6193 match(Set dst (AddVI src (LoadVector mem))); 6194 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6195 ins_encode %{ 6196 int vector_len = 2; 6197 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6198 %} 6199 ins_pipe( pipe_slow ); 6200 %} 6201 6202 // Longs vector add 6203 instruct vadd2L(vecX dst, vecX src) %{ 6204 predicate(n->as_Vector()->length() == 2); 6205 match(Set dst (AddVL dst src)); 6206 format %{ "paddq $dst,$src\t! add packed2L" %} 6207 ins_encode %{ 6208 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6209 %} 6210 ins_pipe( pipe_slow ); 6211 %} 6212 6213 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6214 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6215 match(Set dst (AddVL src1 src2)); 6216 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6217 ins_encode %{ 6218 int vector_len = 0; 6219 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6220 %} 6221 ins_pipe( pipe_slow ); 6222 %} 6223 6224 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6225 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6226 match(Set dst (AddVL src (LoadVector mem))); 6227 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6228 ins_encode %{ 6229 int vector_len = 0; 6230 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6231 %} 6232 ins_pipe( pipe_slow ); 6233 %} 6234 6235 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6236 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6237 match(Set dst (AddVL src1 src2)); 6238 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6239 ins_encode %{ 6240 int vector_len = 1; 6241 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6242 %} 6243 ins_pipe( pipe_slow ); 6244 %} 6245 6246 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6247 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6248 match(Set dst (AddVL src (LoadVector mem))); 6249 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6250 ins_encode %{ 6251 int vector_len = 1; 6252 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6253 %} 6254 ins_pipe( pipe_slow ); 6255 %} 6256 6257 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6258 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6259 match(Set dst (AddVL src1 src2)); 6260 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6261 ins_encode %{ 6262 int vector_len = 2; 6263 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6264 %} 6265 ins_pipe( pipe_slow ); 6266 %} 6267 6268 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6269 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6270 match(Set dst (AddVL src (LoadVector mem))); 6271 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6272 ins_encode %{ 6273 int vector_len = 2; 6274 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6275 %} 6276 ins_pipe( pipe_slow ); 6277 %} 6278 6279 // Floats vector add 6280 instruct vadd2F(vecD dst, vecD src) %{ 6281 predicate(n->as_Vector()->length() == 2); 6282 match(Set dst (AddVF dst src)); 6283 format %{ "addps $dst,$src\t! add packed2F" %} 6284 ins_encode %{ 6285 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6286 %} 6287 ins_pipe( pipe_slow ); 6288 %} 6289 6290 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6291 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6292 match(Set dst (AddVF src1 src2)); 6293 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6294 ins_encode %{ 6295 int vector_len = 0; 6296 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6297 %} 6298 ins_pipe( pipe_slow ); 6299 %} 6300 6301 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6302 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6303 match(Set dst (AddVF src (LoadVector mem))); 6304 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6305 ins_encode %{ 6306 int vector_len = 0; 6307 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6308 %} 6309 ins_pipe( pipe_slow ); 6310 %} 6311 6312 instruct vadd4F(vecX dst, vecX src) %{ 6313 predicate(n->as_Vector()->length() == 4); 6314 match(Set dst (AddVF dst src)); 6315 format %{ "addps $dst,$src\t! add packed4F" %} 6316 ins_encode %{ 6317 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6318 %} 6319 ins_pipe( pipe_slow ); 6320 %} 6321 6322 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6323 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6324 match(Set dst (AddVF src1 src2)); 6325 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6326 ins_encode %{ 6327 int vector_len = 0; 6328 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6329 %} 6330 ins_pipe( pipe_slow ); 6331 %} 6332 6333 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6334 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6335 match(Set dst (AddVF src (LoadVector mem))); 6336 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6337 ins_encode %{ 6338 int vector_len = 0; 6339 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6340 %} 6341 ins_pipe( pipe_slow ); 6342 %} 6343 6344 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6345 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6346 match(Set dst (AddVF src1 src2)); 6347 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6348 ins_encode %{ 6349 int vector_len = 1; 6350 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6351 %} 6352 ins_pipe( pipe_slow ); 6353 %} 6354 6355 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6356 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6357 match(Set dst (AddVF src (LoadVector mem))); 6358 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6359 ins_encode %{ 6360 int vector_len = 1; 6361 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6362 %} 6363 ins_pipe( pipe_slow ); 6364 %} 6365 6366 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6367 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6368 match(Set dst (AddVF src1 src2)); 6369 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6370 ins_encode %{ 6371 int vector_len = 2; 6372 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6378 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6379 match(Set dst (AddVF src (LoadVector mem))); 6380 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6381 ins_encode %{ 6382 int vector_len = 2; 6383 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6384 %} 6385 ins_pipe( pipe_slow ); 6386 %} 6387 6388 // Doubles vector add 6389 instruct vadd2D(vecX dst, vecX src) %{ 6390 predicate(n->as_Vector()->length() == 2); 6391 match(Set dst (AddVD dst src)); 6392 format %{ "addpd $dst,$src\t! add packed2D" %} 6393 ins_encode %{ 6394 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6395 %} 6396 ins_pipe( pipe_slow ); 6397 %} 6398 6399 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6400 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6401 match(Set dst (AddVD src1 src2)); 6402 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6403 ins_encode %{ 6404 int vector_len = 0; 6405 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6406 %} 6407 ins_pipe( pipe_slow ); 6408 %} 6409 6410 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6411 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6412 match(Set dst (AddVD src (LoadVector mem))); 6413 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6414 ins_encode %{ 6415 int vector_len = 0; 6416 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6417 %} 6418 ins_pipe( pipe_slow ); 6419 %} 6420 6421 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6422 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6423 match(Set dst (AddVD src1 src2)); 6424 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6425 ins_encode %{ 6426 int vector_len = 1; 6427 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6428 %} 6429 ins_pipe( pipe_slow ); 6430 %} 6431 6432 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6433 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6434 match(Set dst (AddVD src (LoadVector mem))); 6435 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6436 ins_encode %{ 6437 int vector_len = 1; 6438 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6439 %} 6440 ins_pipe( pipe_slow ); 6441 %} 6442 6443 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6444 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6445 match(Set dst (AddVD src1 src2)); 6446 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6447 ins_encode %{ 6448 int vector_len = 2; 6449 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6450 %} 6451 ins_pipe( pipe_slow ); 6452 %} 6453 6454 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6455 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6456 match(Set dst (AddVD src (LoadVector mem))); 6457 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6458 ins_encode %{ 6459 int vector_len = 2; 6460 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6461 %} 6462 ins_pipe( pipe_slow ); 6463 %} 6464 6465 // --------------------------------- SUB -------------------------------------- 6466 6467 // Bytes vector sub 6468 instruct vsub4B(vecS dst, vecS src) %{ 6469 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6470 match(Set dst (SubVB dst src)); 6471 format %{ "psubb $dst,$src\t! sub packed4B" %} 6472 ins_encode %{ 6473 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6474 %} 6475 ins_pipe( pipe_slow ); 6476 %} 6477 6478 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6479 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6480 match(Set dst (SubVB src1 src2)); 6481 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6482 ins_encode %{ 6483 int vector_len = 0; 6484 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6485 %} 6486 ins_pipe( pipe_slow ); 6487 %} 6488 6489 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6490 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6491 match(Set dst (SubVB src1 src2)); 6492 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6493 ins_encode %{ 6494 int vector_len = 0; 6495 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6496 %} 6497 ins_pipe( pipe_slow ); 6498 %} 6499 6500 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6501 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6502 match(Set dst (SubVB dst src2)); 6503 effect(TEMP src1); 6504 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6505 ins_encode %{ 6506 int vector_len = 0; 6507 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6508 %} 6509 ins_pipe( pipe_slow ); 6510 %} 6511 6512 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6513 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6514 match(Set dst (SubVB src (LoadVector mem))); 6515 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6516 ins_encode %{ 6517 int vector_len = 0; 6518 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6519 %} 6520 ins_pipe( pipe_slow ); 6521 %} 6522 6523 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6524 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6525 match(Set dst (SubVB src (LoadVector mem))); 6526 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6527 ins_encode %{ 6528 int vector_len = 0; 6529 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6530 %} 6531 ins_pipe( pipe_slow ); 6532 %} 6533 6534 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6535 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6536 match(Set dst (SubVB dst (LoadVector mem))); 6537 effect(TEMP src); 6538 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6539 ins_encode %{ 6540 int vector_len = 0; 6541 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6542 %} 6543 ins_pipe( pipe_slow ); 6544 %} 6545 6546 instruct vsub8B(vecD dst, vecD src) %{ 6547 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6548 match(Set dst (SubVB dst src)); 6549 format %{ "psubb $dst,$src\t! sub packed8B" %} 6550 ins_encode %{ 6551 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6552 %} 6553 ins_pipe( pipe_slow ); 6554 %} 6555 6556 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6557 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6558 match(Set dst (SubVB src1 src2)); 6559 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6560 ins_encode %{ 6561 int vector_len = 0; 6562 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6563 %} 6564 ins_pipe( pipe_slow ); 6565 %} 6566 6567 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6568 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6569 match(Set dst (SubVB src1 src2)); 6570 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6571 ins_encode %{ 6572 int vector_len = 0; 6573 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6574 %} 6575 ins_pipe( pipe_slow ); 6576 %} 6577 6578 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6579 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6580 match(Set dst (SubVB dst src2)); 6581 effect(TEMP src1); 6582 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6583 ins_encode %{ 6584 int vector_len = 0; 6585 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6586 %} 6587 ins_pipe( pipe_slow ); 6588 %} 6589 6590 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6591 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6592 match(Set dst (SubVB src (LoadVector mem))); 6593 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6594 ins_encode %{ 6595 int vector_len = 0; 6596 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6602 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6603 match(Set dst (SubVB src (LoadVector mem))); 6604 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6605 ins_encode %{ 6606 int vector_len = 0; 6607 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6608 %} 6609 ins_pipe( pipe_slow ); 6610 %} 6611 6612 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6613 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6614 match(Set dst (SubVB dst (LoadVector mem))); 6615 effect(TEMP src); 6616 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6617 ins_encode %{ 6618 int vector_len = 0; 6619 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6620 %} 6621 ins_pipe( pipe_slow ); 6622 %} 6623 6624 instruct vsub16B(vecX dst, vecX src) %{ 6625 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6626 match(Set dst (SubVB dst src)); 6627 format %{ "psubb $dst,$src\t! sub packed16B" %} 6628 ins_encode %{ 6629 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6635 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6636 match(Set dst (SubVB src1 src2)); 6637 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6638 ins_encode %{ 6639 int vector_len = 0; 6640 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6641 %} 6642 ins_pipe( pipe_slow ); 6643 %} 6644 6645 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6646 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6647 match(Set dst (SubVB src1 src2)); 6648 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6649 ins_encode %{ 6650 int vector_len = 0; 6651 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6657 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6658 match(Set dst (SubVB dst src2)); 6659 effect(TEMP src1); 6660 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6661 ins_encode %{ 6662 int vector_len = 0; 6663 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6664 %} 6665 ins_pipe( pipe_slow ); 6666 %} 6667 6668 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6669 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6670 match(Set dst (SubVB src (LoadVector mem))); 6671 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6672 ins_encode %{ 6673 int vector_len = 0; 6674 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6675 %} 6676 ins_pipe( pipe_slow ); 6677 %} 6678 6679 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6680 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6681 match(Set dst (SubVB src (LoadVector mem))); 6682 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6683 ins_encode %{ 6684 int vector_len = 0; 6685 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6686 %} 6687 ins_pipe( pipe_slow ); 6688 %} 6689 6690 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6691 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6692 match(Set dst (SubVB dst (LoadVector mem))); 6693 effect(TEMP src); 6694 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6695 ins_encode %{ 6696 int vector_len = 0; 6697 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6698 %} 6699 ins_pipe( pipe_slow ); 6700 %} 6701 6702 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6703 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6704 match(Set dst (SubVB src1 src2)); 6705 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6706 ins_encode %{ 6707 int vector_len = 1; 6708 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6709 %} 6710 ins_pipe( pipe_slow ); 6711 %} 6712 6713 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6714 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6715 match(Set dst (SubVB src1 src2)); 6716 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6717 ins_encode %{ 6718 int vector_len = 1; 6719 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6720 %} 6721 ins_pipe( pipe_slow ); 6722 %} 6723 6724 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6725 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6726 match(Set dst (SubVB dst src2)); 6727 effect(TEMP src1); 6728 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6729 ins_encode %{ 6730 int vector_len = 1; 6731 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6732 %} 6733 ins_pipe( pipe_slow ); 6734 %} 6735 6736 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6737 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6738 match(Set dst (SubVB src (LoadVector mem))); 6739 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6740 ins_encode %{ 6741 int vector_len = 1; 6742 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6743 %} 6744 ins_pipe( pipe_slow ); 6745 %} 6746 6747 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6748 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6749 match(Set dst (SubVB src (LoadVector mem))); 6750 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6751 ins_encode %{ 6752 int vector_len = 1; 6753 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6754 %} 6755 ins_pipe( pipe_slow ); 6756 %} 6757 6758 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6759 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6760 match(Set dst (SubVB dst (LoadVector mem))); 6761 effect(TEMP src); 6762 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6763 ins_encode %{ 6764 int vector_len = 1; 6765 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6771 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6772 match(Set dst (SubVB src1 src2)); 6773 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6774 ins_encode %{ 6775 int vector_len = 2; 6776 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6777 %} 6778 ins_pipe( pipe_slow ); 6779 %} 6780 6781 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6782 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6783 match(Set dst (SubVB src (LoadVector mem))); 6784 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6785 ins_encode %{ 6786 int vector_len = 2; 6787 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6788 %} 6789 ins_pipe( pipe_slow ); 6790 %} 6791 6792 // Shorts/Chars vector sub 6793 instruct vsub2S(vecS dst, vecS src) %{ 6794 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6795 match(Set dst (SubVS dst src)); 6796 format %{ "psubw $dst,$src\t! sub packed2S" %} 6797 ins_encode %{ 6798 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6799 %} 6800 ins_pipe( pipe_slow ); 6801 %} 6802 6803 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6804 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6805 match(Set dst (SubVS src1 src2)); 6806 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6807 ins_encode %{ 6808 int vector_len = 0; 6809 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6810 %} 6811 ins_pipe( pipe_slow ); 6812 %} 6813 6814 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6815 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6816 match(Set dst (SubVS src1 src2)); 6817 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6818 ins_encode %{ 6819 int vector_len = 0; 6820 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6821 %} 6822 ins_pipe( pipe_slow ); 6823 %} 6824 6825 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6826 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6827 match(Set dst (SubVS dst src2)); 6828 effect(TEMP src1); 6829 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6830 ins_encode %{ 6831 int vector_len = 0; 6832 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6833 %} 6834 ins_pipe( pipe_slow ); 6835 %} 6836 6837 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6838 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6839 match(Set dst (SubVS src (LoadVector mem))); 6840 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6841 ins_encode %{ 6842 int vector_len = 0; 6843 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6844 %} 6845 ins_pipe( pipe_slow ); 6846 %} 6847 6848 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6849 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6850 match(Set dst (SubVS src (LoadVector mem))); 6851 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6852 ins_encode %{ 6853 int vector_len = 0; 6854 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6855 %} 6856 ins_pipe( pipe_slow ); 6857 %} 6858 6859 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6860 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6861 match(Set dst (SubVS dst (LoadVector mem))); 6862 effect(TEMP src); 6863 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6864 ins_encode %{ 6865 int vector_len = 0; 6866 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6867 %} 6868 ins_pipe( pipe_slow ); 6869 %} 6870 6871 instruct vsub4S(vecD dst, vecD src) %{ 6872 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6873 match(Set dst (SubVS dst src)); 6874 format %{ "psubw $dst,$src\t! sub packed4S" %} 6875 ins_encode %{ 6876 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6877 %} 6878 ins_pipe( pipe_slow ); 6879 %} 6880 6881 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6882 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6883 match(Set dst (SubVS src1 src2)); 6884 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6885 ins_encode %{ 6886 int vector_len = 0; 6887 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6888 %} 6889 ins_pipe( pipe_slow ); 6890 %} 6891 6892 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6893 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6894 match(Set dst (SubVS src1 src2)); 6895 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6896 ins_encode %{ 6897 int vector_len = 0; 6898 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6899 %} 6900 ins_pipe( pipe_slow ); 6901 %} 6902 6903 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6904 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6905 match(Set dst (SubVS dst src2)); 6906 effect(TEMP src1); 6907 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6908 ins_encode %{ 6909 int vector_len = 0; 6910 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6911 %} 6912 ins_pipe( pipe_slow ); 6913 %} 6914 6915 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6916 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6917 match(Set dst (SubVS src (LoadVector mem))); 6918 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6919 ins_encode %{ 6920 int vector_len = 0; 6921 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6922 %} 6923 ins_pipe( pipe_slow ); 6924 %} 6925 6926 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6927 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6928 match(Set dst (SubVS src (LoadVector mem))); 6929 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6930 ins_encode %{ 6931 int vector_len = 0; 6932 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6933 %} 6934 ins_pipe( pipe_slow ); 6935 %} 6936 6937 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6938 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6939 match(Set dst (SubVS dst (LoadVector mem))); 6940 effect(TEMP src); 6941 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6942 ins_encode %{ 6943 int vector_len = 0; 6944 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6945 %} 6946 ins_pipe( pipe_slow ); 6947 %} 6948 6949 instruct vsub8S(vecX dst, vecX src) %{ 6950 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6951 match(Set dst (SubVS dst src)); 6952 format %{ "psubw $dst,$src\t! sub packed8S" %} 6953 ins_encode %{ 6954 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6960 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6961 match(Set dst (SubVS src1 src2)); 6962 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6963 ins_encode %{ 6964 int vector_len = 0; 6965 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6966 %} 6967 ins_pipe( pipe_slow ); 6968 %} 6969 6970 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6971 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6972 match(Set dst (SubVS src1 src2)); 6973 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6974 ins_encode %{ 6975 int vector_len = 0; 6976 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6977 %} 6978 ins_pipe( pipe_slow ); 6979 %} 6980 6981 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6982 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6983 match(Set dst (SubVS dst src2)); 6984 effect(TEMP src1); 6985 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6986 ins_encode %{ 6987 int vector_len = 0; 6988 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6989 %} 6990 ins_pipe( pipe_slow ); 6991 %} 6992 6993 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6994 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6995 match(Set dst (SubVS src (LoadVector mem))); 6996 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6997 ins_encode %{ 6998 int vector_len = 0; 6999 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7000 %} 7001 ins_pipe( pipe_slow ); 7002 %} 7003 7004 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7005 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7006 match(Set dst (SubVS src (LoadVector mem))); 7007 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7008 ins_encode %{ 7009 int vector_len = 0; 7010 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7011 %} 7012 ins_pipe( pipe_slow ); 7013 %} 7014 7015 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7016 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7017 match(Set dst (SubVS dst (LoadVector mem))); 7018 effect(TEMP src); 7019 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7020 ins_encode %{ 7021 int vector_len = 0; 7022 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7023 %} 7024 ins_pipe( pipe_slow ); 7025 %} 7026 7027 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7028 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7029 match(Set dst (SubVS src1 src2)); 7030 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7031 ins_encode %{ 7032 int vector_len = 1; 7033 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7034 %} 7035 ins_pipe( pipe_slow ); 7036 %} 7037 7038 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7039 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7040 match(Set dst (SubVS src1 src2)); 7041 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7042 ins_encode %{ 7043 int vector_len = 1; 7044 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7045 %} 7046 ins_pipe( pipe_slow ); 7047 %} 7048 7049 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7050 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7051 match(Set dst (SubVS dst src2)); 7052 effect(TEMP src1); 7053 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7054 ins_encode %{ 7055 int vector_len = 1; 7056 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7057 %} 7058 ins_pipe( pipe_slow ); 7059 %} 7060 7061 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7062 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7063 match(Set dst (SubVS src (LoadVector mem))); 7064 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7065 ins_encode %{ 7066 int vector_len = 1; 7067 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7068 %} 7069 ins_pipe( pipe_slow ); 7070 %} 7071 7072 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7073 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7074 match(Set dst (SubVS src (LoadVector mem))); 7075 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7076 ins_encode %{ 7077 int vector_len = 1; 7078 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7079 %} 7080 ins_pipe( pipe_slow ); 7081 %} 7082 7083 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7084 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7085 match(Set dst (SubVS dst (LoadVector mem))); 7086 effect(TEMP src); 7087 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7088 ins_encode %{ 7089 int vector_len = 1; 7090 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7091 %} 7092 ins_pipe( pipe_slow ); 7093 %} 7094 7095 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7096 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7097 match(Set dst (SubVS src1 src2)); 7098 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7099 ins_encode %{ 7100 int vector_len = 2; 7101 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7107 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7108 match(Set dst (SubVS src (LoadVector mem))); 7109 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7110 ins_encode %{ 7111 int vector_len = 2; 7112 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7113 %} 7114 ins_pipe( pipe_slow ); 7115 %} 7116 7117 // Integers vector sub 7118 instruct vsub2I(vecD dst, vecD src) %{ 7119 predicate(n->as_Vector()->length() == 2); 7120 match(Set dst (SubVI dst src)); 7121 format %{ "psubd $dst,$src\t! sub packed2I" %} 7122 ins_encode %{ 7123 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7124 %} 7125 ins_pipe( pipe_slow ); 7126 %} 7127 7128 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7129 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7130 match(Set dst (SubVI src1 src2)); 7131 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7132 ins_encode %{ 7133 int vector_len = 0; 7134 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7135 %} 7136 ins_pipe( pipe_slow ); 7137 %} 7138 7139 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7140 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7141 match(Set dst (SubVI src (LoadVector mem))); 7142 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7143 ins_encode %{ 7144 int vector_len = 0; 7145 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7146 %} 7147 ins_pipe( pipe_slow ); 7148 %} 7149 7150 instruct vsub4I(vecX dst, vecX src) %{ 7151 predicate(n->as_Vector()->length() == 4); 7152 match(Set dst (SubVI dst src)); 7153 format %{ "psubd $dst,$src\t! sub packed4I" %} 7154 ins_encode %{ 7155 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7161 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7162 match(Set dst (SubVI src1 src2)); 7163 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7164 ins_encode %{ 7165 int vector_len = 0; 7166 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7167 %} 7168 ins_pipe( pipe_slow ); 7169 %} 7170 7171 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7172 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7173 match(Set dst (SubVI src (LoadVector mem))); 7174 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7175 ins_encode %{ 7176 int vector_len = 0; 7177 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7178 %} 7179 ins_pipe( pipe_slow ); 7180 %} 7181 7182 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7183 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7184 match(Set dst (SubVI src1 src2)); 7185 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7186 ins_encode %{ 7187 int vector_len = 1; 7188 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7189 %} 7190 ins_pipe( pipe_slow ); 7191 %} 7192 7193 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7194 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7195 match(Set dst (SubVI src (LoadVector mem))); 7196 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7197 ins_encode %{ 7198 int vector_len = 1; 7199 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7200 %} 7201 ins_pipe( pipe_slow ); 7202 %} 7203 7204 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7205 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7206 match(Set dst (SubVI src1 src2)); 7207 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7208 ins_encode %{ 7209 int vector_len = 2; 7210 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7216 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7217 match(Set dst (SubVI src (LoadVector mem))); 7218 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7219 ins_encode %{ 7220 int vector_len = 2; 7221 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7222 %} 7223 ins_pipe( pipe_slow ); 7224 %} 7225 7226 // Longs vector sub 7227 instruct vsub2L(vecX dst, vecX src) %{ 7228 predicate(n->as_Vector()->length() == 2); 7229 match(Set dst (SubVL dst src)); 7230 format %{ "psubq $dst,$src\t! sub packed2L" %} 7231 ins_encode %{ 7232 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7233 %} 7234 ins_pipe( pipe_slow ); 7235 %} 7236 7237 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7238 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7239 match(Set dst (SubVL src1 src2)); 7240 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7241 ins_encode %{ 7242 int vector_len = 0; 7243 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7244 %} 7245 ins_pipe( pipe_slow ); 7246 %} 7247 7248 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7249 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7250 match(Set dst (SubVL src (LoadVector mem))); 7251 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7252 ins_encode %{ 7253 int vector_len = 0; 7254 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7255 %} 7256 ins_pipe( pipe_slow ); 7257 %} 7258 7259 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7260 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7261 match(Set dst (SubVL src1 src2)); 7262 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7263 ins_encode %{ 7264 int vector_len = 1; 7265 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7266 %} 7267 ins_pipe( pipe_slow ); 7268 %} 7269 7270 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7271 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7272 match(Set dst (SubVL src (LoadVector mem))); 7273 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7274 ins_encode %{ 7275 int vector_len = 1; 7276 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7277 %} 7278 ins_pipe( pipe_slow ); 7279 %} 7280 7281 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7282 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7283 match(Set dst (SubVL src1 src2)); 7284 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7285 ins_encode %{ 7286 int vector_len = 2; 7287 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7288 %} 7289 ins_pipe( pipe_slow ); 7290 %} 7291 7292 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7293 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7294 match(Set dst (SubVL src (LoadVector mem))); 7295 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7296 ins_encode %{ 7297 int vector_len = 2; 7298 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7299 %} 7300 ins_pipe( pipe_slow ); 7301 %} 7302 7303 // Floats vector sub 7304 instruct vsub2F(vecD dst, vecD src) %{ 7305 predicate(n->as_Vector()->length() == 2); 7306 match(Set dst (SubVF dst src)); 7307 format %{ "subps $dst,$src\t! sub packed2F" %} 7308 ins_encode %{ 7309 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7310 %} 7311 ins_pipe( pipe_slow ); 7312 %} 7313 7314 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7315 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7316 match(Set dst (SubVF src1 src2)); 7317 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7318 ins_encode %{ 7319 int vector_len = 0; 7320 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7321 %} 7322 ins_pipe( pipe_slow ); 7323 %} 7324 7325 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7326 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7327 match(Set dst (SubVF src (LoadVector mem))); 7328 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7329 ins_encode %{ 7330 int vector_len = 0; 7331 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7332 %} 7333 ins_pipe( pipe_slow ); 7334 %} 7335 7336 instruct vsub4F(vecX dst, vecX src) %{ 7337 predicate(n->as_Vector()->length() == 4); 7338 match(Set dst (SubVF dst src)); 7339 format %{ "subps $dst,$src\t! sub packed4F" %} 7340 ins_encode %{ 7341 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7342 %} 7343 ins_pipe( pipe_slow ); 7344 %} 7345 7346 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7347 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7348 match(Set dst (SubVF src1 src2)); 7349 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7350 ins_encode %{ 7351 int vector_len = 0; 7352 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7353 %} 7354 ins_pipe( pipe_slow ); 7355 %} 7356 7357 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7358 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7359 match(Set dst (SubVF src (LoadVector mem))); 7360 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7361 ins_encode %{ 7362 int vector_len = 0; 7363 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7364 %} 7365 ins_pipe( pipe_slow ); 7366 %} 7367 7368 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7369 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7370 match(Set dst (SubVF src1 src2)); 7371 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7372 ins_encode %{ 7373 int vector_len = 1; 7374 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7375 %} 7376 ins_pipe( pipe_slow ); 7377 %} 7378 7379 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7380 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7381 match(Set dst (SubVF src (LoadVector mem))); 7382 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7383 ins_encode %{ 7384 int vector_len = 1; 7385 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7386 %} 7387 ins_pipe( pipe_slow ); 7388 %} 7389 7390 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7391 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7392 match(Set dst (SubVF src1 src2)); 7393 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7394 ins_encode %{ 7395 int vector_len = 2; 7396 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7397 %} 7398 ins_pipe( pipe_slow ); 7399 %} 7400 7401 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7402 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7403 match(Set dst (SubVF src (LoadVector mem))); 7404 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7405 ins_encode %{ 7406 int vector_len = 2; 7407 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7408 %} 7409 ins_pipe( pipe_slow ); 7410 %} 7411 7412 // Doubles vector sub 7413 instruct vsub2D(vecX dst, vecX src) %{ 7414 predicate(n->as_Vector()->length() == 2); 7415 match(Set dst (SubVD dst src)); 7416 format %{ "subpd $dst,$src\t! sub packed2D" %} 7417 ins_encode %{ 7418 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7419 %} 7420 ins_pipe( pipe_slow ); 7421 %} 7422 7423 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7424 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7425 match(Set dst (SubVD src1 src2)); 7426 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7427 ins_encode %{ 7428 int vector_len = 0; 7429 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7430 %} 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7435 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7436 match(Set dst (SubVD src (LoadVector mem))); 7437 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7438 ins_encode %{ 7439 int vector_len = 0; 7440 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7441 %} 7442 ins_pipe( pipe_slow ); 7443 %} 7444 7445 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7446 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7447 match(Set dst (SubVD src1 src2)); 7448 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7449 ins_encode %{ 7450 int vector_len = 1; 7451 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7452 %} 7453 ins_pipe( pipe_slow ); 7454 %} 7455 7456 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7458 match(Set dst (SubVD src (LoadVector mem))); 7459 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7460 ins_encode %{ 7461 int vector_len = 1; 7462 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7463 %} 7464 ins_pipe( pipe_slow ); 7465 %} 7466 7467 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7468 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7469 match(Set dst (SubVD src1 src2)); 7470 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7471 ins_encode %{ 7472 int vector_len = 2; 7473 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7474 %} 7475 ins_pipe( pipe_slow ); 7476 %} 7477 7478 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7479 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7480 match(Set dst (SubVD src (LoadVector mem))); 7481 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7482 ins_encode %{ 7483 int vector_len = 2; 7484 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7485 %} 7486 ins_pipe( pipe_slow ); 7487 %} 7488 7489 // --------------------------------- MUL -------------------------------------- 7490 7491 // Shorts/Chars vector mul 7492 instruct vmul2S(vecS dst, vecS src) %{ 7493 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7494 match(Set dst (MulVS dst src)); 7495 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7496 ins_encode %{ 7497 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7498 %} 7499 ins_pipe( pipe_slow ); 7500 %} 7501 7502 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7503 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7504 match(Set dst (MulVS src1 src2)); 7505 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7506 ins_encode %{ 7507 int vector_len = 0; 7508 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7509 %} 7510 ins_pipe( pipe_slow ); 7511 %} 7512 7513 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7514 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7515 match(Set dst (MulVS src1 src2)); 7516 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7517 ins_encode %{ 7518 int vector_len = 0; 7519 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7520 %} 7521 ins_pipe( pipe_slow ); 7522 %} 7523 7524 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7525 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7526 match(Set dst (MulVS dst src2)); 7527 effect(TEMP src1); 7528 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7529 ins_encode %{ 7530 int vector_len = 0; 7531 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7532 %} 7533 ins_pipe( pipe_slow ); 7534 %} 7535 7536 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7537 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7538 match(Set dst (MulVS src (LoadVector mem))); 7539 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7540 ins_encode %{ 7541 int vector_len = 0; 7542 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7543 %} 7544 ins_pipe( pipe_slow ); 7545 %} 7546 7547 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7548 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7549 match(Set dst (MulVS src (LoadVector mem))); 7550 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7551 ins_encode %{ 7552 int vector_len = 0; 7553 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7554 %} 7555 ins_pipe( pipe_slow ); 7556 %} 7557 7558 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7559 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7560 match(Set dst (MulVS dst (LoadVector mem))); 7561 effect(TEMP src); 7562 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7563 ins_encode %{ 7564 int vector_len = 0; 7565 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 instruct vmul4S(vecD dst, vecD src) %{ 7571 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7572 match(Set dst (MulVS dst src)); 7573 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7574 ins_encode %{ 7575 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7576 %} 7577 ins_pipe( pipe_slow ); 7578 %} 7579 7580 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7581 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7582 match(Set dst (MulVS src1 src2)); 7583 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7584 ins_encode %{ 7585 int vector_len = 0; 7586 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7587 %} 7588 ins_pipe( pipe_slow ); 7589 %} 7590 7591 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7592 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7593 match(Set dst (MulVS src1 src2)); 7594 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7595 ins_encode %{ 7596 int vector_len = 0; 7597 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7598 %} 7599 ins_pipe( pipe_slow ); 7600 %} 7601 7602 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7603 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7604 match(Set dst (MulVS dst src2)); 7605 effect(TEMP src1); 7606 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7607 ins_encode %{ 7608 int vector_len = 0; 7609 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7615 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7616 match(Set dst (MulVS src (LoadVector mem))); 7617 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7618 ins_encode %{ 7619 int vector_len = 0; 7620 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7626 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7627 match(Set dst (MulVS src (LoadVector mem))); 7628 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7629 ins_encode %{ 7630 int vector_len = 0; 7631 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7632 %} 7633 ins_pipe( pipe_slow ); 7634 %} 7635 7636 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7637 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7638 match(Set dst (MulVS dst (LoadVector mem))); 7639 effect(TEMP src); 7640 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7641 ins_encode %{ 7642 int vector_len = 0; 7643 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7644 %} 7645 ins_pipe( pipe_slow ); 7646 %} 7647 7648 instruct vmul8S(vecX dst, vecX src) %{ 7649 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7650 match(Set dst (MulVS dst src)); 7651 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7652 ins_encode %{ 7653 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7654 %} 7655 ins_pipe( pipe_slow ); 7656 %} 7657 7658 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7659 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7660 match(Set dst (MulVS src1 src2)); 7661 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7662 ins_encode %{ 7663 int vector_len = 0; 7664 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7665 %} 7666 ins_pipe( pipe_slow ); 7667 %} 7668 7669 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7670 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7671 match(Set dst (MulVS src1 src2)); 7672 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7673 ins_encode %{ 7674 int vector_len = 0; 7675 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7676 %} 7677 ins_pipe( pipe_slow ); 7678 %} 7679 7680 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7681 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7682 match(Set dst (MulVS dst src2)); 7683 effect(TEMP src1); 7684 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7685 ins_encode %{ 7686 int vector_len = 0; 7687 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7688 %} 7689 ins_pipe( pipe_slow ); 7690 %} 7691 7692 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7693 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7694 match(Set dst (MulVS src (LoadVector mem))); 7695 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7696 ins_encode %{ 7697 int vector_len = 0; 7698 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7699 %} 7700 ins_pipe( pipe_slow ); 7701 %} 7702 7703 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7704 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7705 match(Set dst (MulVS src (LoadVector mem))); 7706 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7707 ins_encode %{ 7708 int vector_len = 0; 7709 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7710 %} 7711 ins_pipe( pipe_slow ); 7712 %} 7713 7714 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7715 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7716 match(Set dst (MulVS dst (LoadVector mem))); 7717 effect(TEMP src); 7718 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7719 ins_encode %{ 7720 int vector_len = 0; 7721 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7722 %} 7723 ins_pipe( pipe_slow ); 7724 %} 7725 7726 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7727 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7728 match(Set dst (MulVS src1 src2)); 7729 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7730 ins_encode %{ 7731 int vector_len = 1; 7732 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7733 %} 7734 ins_pipe( pipe_slow ); 7735 %} 7736 7737 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7738 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7739 match(Set dst (MulVS src1 src2)); 7740 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7741 ins_encode %{ 7742 int vector_len = 1; 7743 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7744 %} 7745 ins_pipe( pipe_slow ); 7746 %} 7747 7748 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7749 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7750 match(Set dst (MulVS dst src2)); 7751 effect(TEMP src1); 7752 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7753 ins_encode %{ 7754 int vector_len = 1; 7755 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7756 %} 7757 ins_pipe( pipe_slow ); 7758 %} 7759 7760 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7761 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7762 match(Set dst (MulVS src (LoadVector mem))); 7763 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7764 ins_encode %{ 7765 int vector_len = 1; 7766 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7767 %} 7768 ins_pipe( pipe_slow ); 7769 %} 7770 7771 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7772 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7773 match(Set dst (MulVS src (LoadVector mem))); 7774 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7775 ins_encode %{ 7776 int vector_len = 1; 7777 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7778 %} 7779 ins_pipe( pipe_slow ); 7780 %} 7781 7782 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7783 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7784 match(Set dst (MulVS dst (LoadVector mem))); 7785 effect(TEMP src); 7786 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7787 ins_encode %{ 7788 int vector_len = 1; 7789 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7790 %} 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7795 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7796 match(Set dst (MulVS src1 src2)); 7797 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7798 ins_encode %{ 7799 int vector_len = 2; 7800 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7801 %} 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7806 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7807 match(Set dst (MulVS src (LoadVector mem))); 7808 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7809 ins_encode %{ 7810 int vector_len = 2; 7811 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7812 %} 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 // Integers vector mul (sse4_1) 7817 instruct vmul2I(vecD dst, vecD src) %{ 7818 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7819 match(Set dst (MulVI dst src)); 7820 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7821 ins_encode %{ 7822 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7823 %} 7824 ins_pipe( pipe_slow ); 7825 %} 7826 7827 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7828 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7829 match(Set dst (MulVI src1 src2)); 7830 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7831 ins_encode %{ 7832 int vector_len = 0; 7833 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7834 %} 7835 ins_pipe( pipe_slow ); 7836 %} 7837 7838 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7839 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7840 match(Set dst (MulVI src (LoadVector mem))); 7841 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7842 ins_encode %{ 7843 int vector_len = 0; 7844 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7845 %} 7846 ins_pipe( pipe_slow ); 7847 %} 7848 7849 instruct vmul4I(vecX dst, vecX src) %{ 7850 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7851 match(Set dst (MulVI dst src)); 7852 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7853 ins_encode %{ 7854 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7855 %} 7856 ins_pipe( pipe_slow ); 7857 %} 7858 7859 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7860 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7861 match(Set dst (MulVI src1 src2)); 7862 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7863 ins_encode %{ 7864 int vector_len = 0; 7865 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7866 %} 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7871 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7872 match(Set dst (MulVI src (LoadVector mem))); 7873 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7874 ins_encode %{ 7875 int vector_len = 0; 7876 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7877 %} 7878 ins_pipe( pipe_slow ); 7879 %} 7880 7881 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7882 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7883 match(Set dst (MulVL src1 src2)); 7884 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7885 ins_encode %{ 7886 int vector_len = 0; 7887 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7888 %} 7889 ins_pipe( pipe_slow ); 7890 %} 7891 7892 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7893 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7894 match(Set dst (MulVL src (LoadVector mem))); 7895 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7896 ins_encode %{ 7897 int vector_len = 0; 7898 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7904 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7905 match(Set dst (MulVL src1 src2)); 7906 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7907 ins_encode %{ 7908 int vector_len = 1; 7909 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7910 %} 7911 ins_pipe( pipe_slow ); 7912 %} 7913 7914 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7915 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7916 match(Set dst (MulVL src (LoadVector mem))); 7917 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7918 ins_encode %{ 7919 int vector_len = 1; 7920 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7921 %} 7922 ins_pipe( pipe_slow ); 7923 %} 7924 7925 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7926 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7927 match(Set dst (MulVL src1 src2)); 7928 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7929 ins_encode %{ 7930 int vector_len = 2; 7931 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7932 %} 7933 ins_pipe( pipe_slow ); 7934 %} 7935 7936 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7937 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7938 match(Set dst (MulVL src (LoadVector mem))); 7939 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7940 ins_encode %{ 7941 int vector_len = 2; 7942 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7943 %} 7944 ins_pipe( pipe_slow ); 7945 %} 7946 7947 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7948 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7949 match(Set dst (MulVI src1 src2)); 7950 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7951 ins_encode %{ 7952 int vector_len = 1; 7953 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7954 %} 7955 ins_pipe( pipe_slow ); 7956 %} 7957 7958 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7959 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7960 match(Set dst (MulVI src (LoadVector mem))); 7961 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7962 ins_encode %{ 7963 int vector_len = 1; 7964 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7965 %} 7966 ins_pipe( pipe_slow ); 7967 %} 7968 7969 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7970 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7971 match(Set dst (MulVI src1 src2)); 7972 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7973 ins_encode %{ 7974 int vector_len = 2; 7975 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7976 %} 7977 ins_pipe( pipe_slow ); 7978 %} 7979 7980 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7981 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7982 match(Set dst (MulVI src (LoadVector mem))); 7983 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7984 ins_encode %{ 7985 int vector_len = 2; 7986 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7987 %} 7988 ins_pipe( pipe_slow ); 7989 %} 7990 7991 // Floats vector mul 7992 instruct vmul2F(vecD dst, vecD src) %{ 7993 predicate(n->as_Vector()->length() == 2); 7994 match(Set dst (MulVF dst src)); 7995 format %{ "mulps $dst,$src\t! mul packed2F" %} 7996 ins_encode %{ 7997 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7998 %} 7999 ins_pipe( pipe_slow ); 8000 %} 8001 8002 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8003 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8004 match(Set dst (MulVF src1 src2)); 8005 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8006 ins_encode %{ 8007 int vector_len = 0; 8008 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8009 %} 8010 ins_pipe( pipe_slow ); 8011 %} 8012 8013 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8014 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8015 match(Set dst (MulVF src (LoadVector mem))); 8016 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8017 ins_encode %{ 8018 int vector_len = 0; 8019 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8020 %} 8021 ins_pipe( pipe_slow ); 8022 %} 8023 8024 instruct vmul4F(vecX dst, vecX src) %{ 8025 predicate(n->as_Vector()->length() == 4); 8026 match(Set dst (MulVF dst src)); 8027 format %{ "mulps $dst,$src\t! mul packed4F" %} 8028 ins_encode %{ 8029 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8030 %} 8031 ins_pipe( pipe_slow ); 8032 %} 8033 8034 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8035 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8036 match(Set dst (MulVF src1 src2)); 8037 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8038 ins_encode %{ 8039 int vector_len = 0; 8040 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8041 %} 8042 ins_pipe( pipe_slow ); 8043 %} 8044 8045 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8046 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8047 match(Set dst (MulVF src (LoadVector mem))); 8048 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8049 ins_encode %{ 8050 int vector_len = 0; 8051 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8052 %} 8053 ins_pipe( pipe_slow ); 8054 %} 8055 8056 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8057 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8058 match(Set dst (MulVF src1 src2)); 8059 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8060 ins_encode %{ 8061 int vector_len = 1; 8062 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8063 %} 8064 ins_pipe( pipe_slow ); 8065 %} 8066 8067 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8068 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8069 match(Set dst (MulVF src (LoadVector mem))); 8070 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8071 ins_encode %{ 8072 int vector_len = 1; 8073 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8074 %} 8075 ins_pipe( pipe_slow ); 8076 %} 8077 8078 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8079 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8080 match(Set dst (MulVF src1 src2)); 8081 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8082 ins_encode %{ 8083 int vector_len = 2; 8084 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8085 %} 8086 ins_pipe( pipe_slow ); 8087 %} 8088 8089 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8090 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8091 match(Set dst (MulVF src (LoadVector mem))); 8092 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8093 ins_encode %{ 8094 int vector_len = 2; 8095 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8096 %} 8097 ins_pipe( pipe_slow ); 8098 %} 8099 8100 // Doubles vector mul 8101 instruct vmul2D(vecX dst, vecX src) %{ 8102 predicate(n->as_Vector()->length() == 2); 8103 match(Set dst (MulVD dst src)); 8104 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8105 ins_encode %{ 8106 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8107 %} 8108 ins_pipe( pipe_slow ); 8109 %} 8110 8111 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8112 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8113 match(Set dst (MulVD src1 src2)); 8114 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8115 ins_encode %{ 8116 int vector_len = 0; 8117 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8118 %} 8119 ins_pipe( pipe_slow ); 8120 %} 8121 8122 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8123 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8124 match(Set dst (MulVD src (LoadVector mem))); 8125 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8126 ins_encode %{ 8127 int vector_len = 0; 8128 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8129 %} 8130 ins_pipe( pipe_slow ); 8131 %} 8132 8133 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8134 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8135 match(Set dst (MulVD src1 src2)); 8136 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8137 ins_encode %{ 8138 int vector_len = 1; 8139 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8140 %} 8141 ins_pipe( pipe_slow ); 8142 %} 8143 8144 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8145 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8146 match(Set dst (MulVD src (LoadVector mem))); 8147 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8148 ins_encode %{ 8149 int vector_len = 1; 8150 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8151 %} 8152 ins_pipe( pipe_slow ); 8153 %} 8154 8155 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8156 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8157 match(Set dst (MulVD src1 src2)); 8158 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8159 ins_encode %{ 8160 int vector_len = 2; 8161 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8162 %} 8163 ins_pipe( pipe_slow ); 8164 %} 8165 8166 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8167 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8168 match(Set dst (MulVD src (LoadVector mem))); 8169 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8170 ins_encode %{ 8171 int vector_len = 2; 8172 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8173 %} 8174 ins_pipe( pipe_slow ); 8175 %} 8176 8177 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8178 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 8179 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 8180 effect(TEMP dst, USE src1, USE src2); 8181 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 8182 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 8183 %} 8184 ins_encode %{ 8185 int vector_len = 1; 8186 int cond = (Assembler::Condition)($copnd$$cmpcode); 8187 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8188 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8189 %} 8190 ins_pipe( pipe_slow ); 8191 %} 8192 8193 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8194 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8195 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8196 effect(TEMP dst, USE src1, USE src2); 8197 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8198 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8199 %} 8200 ins_encode %{ 8201 int vector_len = 1; 8202 int cond = (Assembler::Condition)($copnd$$cmpcode); 8203 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8204 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8205 %} 8206 ins_pipe( pipe_slow ); 8207 %} 8208 8209 // --------------------------------- DIV -------------------------------------- 8210 8211 // Floats vector div 8212 instruct vdiv2F(vecD dst, vecD src) %{ 8213 predicate(n->as_Vector()->length() == 2); 8214 match(Set dst (DivVF dst src)); 8215 format %{ "divps $dst,$src\t! div packed2F" %} 8216 ins_encode %{ 8217 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8218 %} 8219 ins_pipe( pipe_slow ); 8220 %} 8221 8222 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8223 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8224 match(Set dst (DivVF src1 src2)); 8225 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8226 ins_encode %{ 8227 int vector_len = 0; 8228 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8229 %} 8230 ins_pipe( pipe_slow ); 8231 %} 8232 8233 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8234 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8235 match(Set dst (DivVF src (LoadVector mem))); 8236 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8237 ins_encode %{ 8238 int vector_len = 0; 8239 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8240 %} 8241 ins_pipe( pipe_slow ); 8242 %} 8243 8244 instruct vdiv4F(vecX dst, vecX src) %{ 8245 predicate(n->as_Vector()->length() == 4); 8246 match(Set dst (DivVF dst src)); 8247 format %{ "divps $dst,$src\t! div packed4F" %} 8248 ins_encode %{ 8249 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8250 %} 8251 ins_pipe( pipe_slow ); 8252 %} 8253 8254 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8255 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8256 match(Set dst (DivVF src1 src2)); 8257 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8258 ins_encode %{ 8259 int vector_len = 0; 8260 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8266 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8267 match(Set dst (DivVF src (LoadVector mem))); 8268 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8269 ins_encode %{ 8270 int vector_len = 0; 8271 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8272 %} 8273 ins_pipe( pipe_slow ); 8274 %} 8275 8276 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8277 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8278 match(Set dst (DivVF src1 src2)); 8279 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8280 ins_encode %{ 8281 int vector_len = 1; 8282 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8283 %} 8284 ins_pipe( pipe_slow ); 8285 %} 8286 8287 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8288 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8289 match(Set dst (DivVF src (LoadVector mem))); 8290 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8291 ins_encode %{ 8292 int vector_len = 1; 8293 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8294 %} 8295 ins_pipe( pipe_slow ); 8296 %} 8297 8298 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8299 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8300 match(Set dst (DivVF src1 src2)); 8301 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8302 ins_encode %{ 8303 int vector_len = 2; 8304 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8310 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8311 match(Set dst (DivVF src (LoadVector mem))); 8312 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8313 ins_encode %{ 8314 int vector_len = 2; 8315 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8316 %} 8317 ins_pipe( pipe_slow ); 8318 %} 8319 8320 // Doubles vector div 8321 instruct vdiv2D(vecX dst, vecX src) %{ 8322 predicate(n->as_Vector()->length() == 2); 8323 match(Set dst (DivVD dst src)); 8324 format %{ "divpd $dst,$src\t! div packed2D" %} 8325 ins_encode %{ 8326 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8327 %} 8328 ins_pipe( pipe_slow ); 8329 %} 8330 8331 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8332 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8333 match(Set dst (DivVD src1 src2)); 8334 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8335 ins_encode %{ 8336 int vector_len = 0; 8337 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8338 %} 8339 ins_pipe( pipe_slow ); 8340 %} 8341 8342 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8343 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8344 match(Set dst (DivVD src (LoadVector mem))); 8345 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8346 ins_encode %{ 8347 int vector_len = 0; 8348 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8349 %} 8350 ins_pipe( pipe_slow ); 8351 %} 8352 8353 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8354 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8355 match(Set dst (DivVD src1 src2)); 8356 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8357 ins_encode %{ 8358 int vector_len = 1; 8359 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8360 %} 8361 ins_pipe( pipe_slow ); 8362 %} 8363 8364 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8365 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8366 match(Set dst (DivVD src (LoadVector mem))); 8367 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8368 ins_encode %{ 8369 int vector_len = 1; 8370 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8371 %} 8372 ins_pipe( pipe_slow ); 8373 %} 8374 8375 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8376 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8377 match(Set dst (DivVD src1 src2)); 8378 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8379 ins_encode %{ 8380 int vector_len = 2; 8381 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8382 %} 8383 ins_pipe( pipe_slow ); 8384 %} 8385 8386 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8387 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8388 match(Set dst (DivVD src (LoadVector mem))); 8389 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8390 ins_encode %{ 8391 int vector_len = 2; 8392 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8393 %} 8394 ins_pipe( pipe_slow ); 8395 %} 8396 8397 // ------------------------------ Shift --------------------------------------- 8398 8399 // Left and right shift count vectors are the same on x86 8400 // (only lowest bits of xmm reg are used for count). 8401 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8402 match(Set dst (LShiftCntV cnt)); 8403 match(Set dst (RShiftCntV cnt)); 8404 format %{ "movd $dst,$cnt\t! load shift count" %} 8405 ins_encode %{ 8406 __ movdl($dst$$XMMRegister, $cnt$$Register); 8407 %} 8408 ins_pipe( pipe_slow ); 8409 %} 8410 8411 // --------------------------------- Sqrt -------------------------------------- 8412 8413 // Floating point vector sqrt 8414 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8415 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8416 match(Set dst (SqrtVD src)); 8417 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8418 ins_encode %{ 8419 int vector_len = 0; 8420 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8421 %} 8422 ins_pipe( pipe_slow ); 8423 %} 8424 8425 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8426 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8427 match(Set dst (SqrtVD (LoadVector mem))); 8428 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8429 ins_encode %{ 8430 int vector_len = 0; 8431 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8432 %} 8433 ins_pipe( pipe_slow ); 8434 %} 8435 8436 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8437 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8438 match(Set dst (SqrtVD src)); 8439 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8440 ins_encode %{ 8441 int vector_len = 1; 8442 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8443 %} 8444 ins_pipe( pipe_slow ); 8445 %} 8446 8447 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8448 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8449 match(Set dst (SqrtVD (LoadVector mem))); 8450 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8451 ins_encode %{ 8452 int vector_len = 1; 8453 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8454 %} 8455 ins_pipe( pipe_slow ); 8456 %} 8457 8458 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8459 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8460 match(Set dst (SqrtVD src)); 8461 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8462 ins_encode %{ 8463 int vector_len = 2; 8464 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8465 %} 8466 ins_pipe( pipe_slow ); 8467 %} 8468 8469 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8470 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8471 match(Set dst (SqrtVD (LoadVector mem))); 8472 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8473 ins_encode %{ 8474 int vector_len = 2; 8475 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8476 %} 8477 ins_pipe( pipe_slow ); 8478 %} 8479 8480 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8481 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8482 match(Set dst (SqrtVF src)); 8483 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8484 ins_encode %{ 8485 int vector_len = 0; 8486 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8487 %} 8488 ins_pipe( pipe_slow ); 8489 %} 8490 8491 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8492 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8493 match(Set dst (SqrtVF (LoadVector mem))); 8494 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8495 ins_encode %{ 8496 int vector_len = 0; 8497 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8498 %} 8499 ins_pipe( pipe_slow ); 8500 %} 8501 8502 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8503 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8504 match(Set dst (SqrtVF src)); 8505 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8506 ins_encode %{ 8507 int vector_len = 0; 8508 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8509 %} 8510 ins_pipe( pipe_slow ); 8511 %} 8512 8513 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8514 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8515 match(Set dst (SqrtVF (LoadVector mem))); 8516 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8517 ins_encode %{ 8518 int vector_len = 0; 8519 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8520 %} 8521 ins_pipe( pipe_slow ); 8522 %} 8523 8524 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8525 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8526 match(Set dst (SqrtVF src)); 8527 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8528 ins_encode %{ 8529 int vector_len = 1; 8530 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8531 %} 8532 ins_pipe( pipe_slow ); 8533 %} 8534 8535 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8536 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8537 match(Set dst (SqrtVF (LoadVector mem))); 8538 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8539 ins_encode %{ 8540 int vector_len = 1; 8541 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8542 %} 8543 ins_pipe( pipe_slow ); 8544 %} 8545 8546 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8547 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8548 match(Set dst (SqrtVF src)); 8549 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8550 ins_encode %{ 8551 int vector_len = 2; 8552 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8553 %} 8554 ins_pipe( pipe_slow ); 8555 %} 8556 8557 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8558 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8559 match(Set dst (SqrtVF (LoadVector mem))); 8560 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8561 ins_encode %{ 8562 int vector_len = 2; 8563 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 // ------------------------------ LeftShift ----------------------------------- 8569 8570 // Shorts/Chars vector left shift 8571 instruct vsll2S(vecS dst, vecS shift) %{ 8572 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8573 match(Set dst (LShiftVS dst shift)); 8574 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8575 ins_encode %{ 8576 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8577 %} 8578 ins_pipe( pipe_slow ); 8579 %} 8580 8581 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8582 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8583 match(Set dst (LShiftVS dst shift)); 8584 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8585 ins_encode %{ 8586 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8587 %} 8588 ins_pipe( pipe_slow ); 8589 %} 8590 8591 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8592 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8593 match(Set dst (LShiftVS src shift)); 8594 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8595 ins_encode %{ 8596 int vector_len = 0; 8597 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8598 %} 8599 ins_pipe( pipe_slow ); 8600 %} 8601 8602 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8603 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8604 match(Set dst (LShiftVS src shift)); 8605 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8606 ins_encode %{ 8607 int vector_len = 0; 8608 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8609 %} 8610 ins_pipe( pipe_slow ); 8611 %} 8612 8613 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8614 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8615 match(Set dst (LShiftVS dst shift)); 8616 effect(TEMP src); 8617 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8618 ins_encode %{ 8619 int vector_len = 0; 8620 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8621 %} 8622 ins_pipe( pipe_slow ); 8623 %} 8624 8625 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8626 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8627 match(Set dst (LShiftVS src shift)); 8628 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8629 ins_encode %{ 8630 int vector_len = 0; 8631 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8632 %} 8633 ins_pipe( pipe_slow ); 8634 %} 8635 8636 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8637 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8638 match(Set dst (LShiftVS src shift)); 8639 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8640 ins_encode %{ 8641 int vector_len = 0; 8642 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8643 %} 8644 ins_pipe( pipe_slow ); 8645 %} 8646 8647 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8648 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8649 match(Set dst (LShiftVS dst shift)); 8650 effect(TEMP src); 8651 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8652 ins_encode %{ 8653 int vector_len = 0; 8654 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8655 %} 8656 ins_pipe( pipe_slow ); 8657 %} 8658 8659 instruct vsll4S(vecD dst, vecS shift) %{ 8660 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8661 match(Set dst (LShiftVS dst shift)); 8662 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8663 ins_encode %{ 8664 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8665 %} 8666 ins_pipe( pipe_slow ); 8667 %} 8668 8669 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8670 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8671 match(Set dst (LShiftVS dst shift)); 8672 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8673 ins_encode %{ 8674 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8680 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8681 match(Set dst (LShiftVS src shift)); 8682 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8683 ins_encode %{ 8684 int vector_len = 0; 8685 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8691 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8692 match(Set dst (LShiftVS src shift)); 8693 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8694 ins_encode %{ 8695 int vector_len = 0; 8696 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8702 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8703 match(Set dst (LShiftVS dst shift)); 8704 effect(TEMP src); 8705 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8706 ins_encode %{ 8707 int vector_len = 0; 8708 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8709 %} 8710 ins_pipe( pipe_slow ); 8711 %} 8712 8713 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8714 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8715 match(Set dst (LShiftVS src shift)); 8716 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8717 ins_encode %{ 8718 int vector_len = 0; 8719 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8720 %} 8721 ins_pipe( pipe_slow ); 8722 %} 8723 8724 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8725 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8726 match(Set dst (LShiftVS src shift)); 8727 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8728 ins_encode %{ 8729 int vector_len = 0; 8730 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8731 %} 8732 ins_pipe( pipe_slow ); 8733 %} 8734 8735 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8736 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8737 match(Set dst (LShiftVS dst shift)); 8738 effect(TEMP src); 8739 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8740 ins_encode %{ 8741 int vector_len = 0; 8742 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8743 %} 8744 ins_pipe( pipe_slow ); 8745 %} 8746 8747 instruct vsll8S(vecX dst, vecS shift) %{ 8748 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8749 match(Set dst (LShiftVS dst shift)); 8750 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8751 ins_encode %{ 8752 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8753 %} 8754 ins_pipe( pipe_slow ); 8755 %} 8756 8757 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8758 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8759 match(Set dst (LShiftVS dst shift)); 8760 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8761 ins_encode %{ 8762 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8763 %} 8764 ins_pipe( pipe_slow ); 8765 %} 8766 8767 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8768 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8769 match(Set dst (LShiftVS src shift)); 8770 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8771 ins_encode %{ 8772 int vector_len = 0; 8773 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8774 %} 8775 ins_pipe( pipe_slow ); 8776 %} 8777 8778 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8779 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8780 match(Set dst (LShiftVS src shift)); 8781 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8782 ins_encode %{ 8783 int vector_len = 0; 8784 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8785 %} 8786 ins_pipe( pipe_slow ); 8787 %} 8788 8789 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8790 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8791 match(Set dst (LShiftVS dst shift)); 8792 effect(TEMP src); 8793 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8794 ins_encode %{ 8795 int vector_len = 0; 8796 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8797 %} 8798 ins_pipe( pipe_slow ); 8799 %} 8800 8801 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8802 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8803 match(Set dst (LShiftVS src shift)); 8804 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8805 ins_encode %{ 8806 int vector_len = 0; 8807 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8808 %} 8809 ins_pipe( pipe_slow ); 8810 %} 8811 8812 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8813 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8814 match(Set dst (LShiftVS src shift)); 8815 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8816 ins_encode %{ 8817 int vector_len = 0; 8818 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8819 %} 8820 ins_pipe( pipe_slow ); 8821 %} 8822 8823 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8824 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8825 match(Set dst (LShiftVS dst shift)); 8826 effect(TEMP src); 8827 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8828 ins_encode %{ 8829 int vector_len = 0; 8830 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8831 %} 8832 ins_pipe( pipe_slow ); 8833 %} 8834 8835 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8836 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8837 match(Set dst (LShiftVS src shift)); 8838 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8839 ins_encode %{ 8840 int vector_len = 1; 8841 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8842 %} 8843 ins_pipe( pipe_slow ); 8844 %} 8845 8846 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 8847 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8848 match(Set dst (LShiftVS src shift)); 8849 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8850 ins_encode %{ 8851 int vector_len = 1; 8852 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8853 %} 8854 ins_pipe( pipe_slow ); 8855 %} 8856 8857 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 8858 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8859 match(Set dst (LShiftVS dst shift)); 8860 effect(TEMP src); 8861 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8862 ins_encode %{ 8863 int vector_len = 1; 8864 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8865 %} 8866 ins_pipe( pipe_slow ); 8867 %} 8868 8869 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 8870 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8871 match(Set dst (LShiftVS src shift)); 8872 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8873 ins_encode %{ 8874 int vector_len = 1; 8875 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8876 %} 8877 ins_pipe( pipe_slow ); 8878 %} 8879 8880 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 8881 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8882 match(Set dst (LShiftVS src shift)); 8883 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8884 ins_encode %{ 8885 int vector_len = 1; 8886 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8887 %} 8888 ins_pipe( pipe_slow ); 8889 %} 8890 8891 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 8892 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8893 match(Set dst (LShiftVS dst shift)); 8894 effect(TEMP src); 8895 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8896 ins_encode %{ 8897 int vector_len = 1; 8898 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8899 %} 8900 ins_pipe( pipe_slow ); 8901 %} 8902 8903 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8904 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8905 match(Set dst (LShiftVS src shift)); 8906 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8907 ins_encode %{ 8908 int vector_len = 2; 8909 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8910 %} 8911 ins_pipe( pipe_slow ); 8912 %} 8913 8914 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8915 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8916 match(Set dst (LShiftVS src shift)); 8917 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8918 ins_encode %{ 8919 int vector_len = 2; 8920 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8921 %} 8922 ins_pipe( pipe_slow ); 8923 %} 8924 8925 // Integers vector left shift 8926 instruct vsll2I(vecD dst, vecS shift) %{ 8927 predicate(n->as_Vector()->length() == 2); 8928 match(Set dst (LShiftVI dst shift)); 8929 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8930 ins_encode %{ 8931 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8932 %} 8933 ins_pipe( pipe_slow ); 8934 %} 8935 8936 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8937 predicate(n->as_Vector()->length() == 2); 8938 match(Set dst (LShiftVI dst shift)); 8939 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8940 ins_encode %{ 8941 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8942 %} 8943 ins_pipe( pipe_slow ); 8944 %} 8945 8946 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8947 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8948 match(Set dst (LShiftVI src shift)); 8949 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8950 ins_encode %{ 8951 int vector_len = 0; 8952 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8953 %} 8954 ins_pipe( pipe_slow ); 8955 %} 8956 8957 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8958 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8959 match(Set dst (LShiftVI src shift)); 8960 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8961 ins_encode %{ 8962 int vector_len = 0; 8963 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8964 %} 8965 ins_pipe( pipe_slow ); 8966 %} 8967 8968 instruct vsll4I(vecX dst, vecS shift) %{ 8969 predicate(n->as_Vector()->length() == 4); 8970 match(Set dst (LShiftVI dst shift)); 8971 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8972 ins_encode %{ 8973 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8974 %} 8975 ins_pipe( pipe_slow ); 8976 %} 8977 8978 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8979 predicate(n->as_Vector()->length() == 4); 8980 match(Set dst (LShiftVI dst shift)); 8981 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8982 ins_encode %{ 8983 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8984 %} 8985 ins_pipe( pipe_slow ); 8986 %} 8987 8988 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8989 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8990 match(Set dst (LShiftVI src shift)); 8991 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8992 ins_encode %{ 8993 int vector_len = 0; 8994 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8995 %} 8996 ins_pipe( pipe_slow ); 8997 %} 8998 8999 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9000 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9001 match(Set dst (LShiftVI src shift)); 9002 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9003 ins_encode %{ 9004 int vector_len = 0; 9005 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9006 %} 9007 ins_pipe( pipe_slow ); 9008 %} 9009 9010 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9011 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9012 match(Set dst (LShiftVI src shift)); 9013 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9014 ins_encode %{ 9015 int vector_len = 1; 9016 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9017 %} 9018 ins_pipe( pipe_slow ); 9019 %} 9020 9021 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9022 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9023 match(Set dst (LShiftVI src shift)); 9024 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9025 ins_encode %{ 9026 int vector_len = 1; 9027 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9028 %} 9029 ins_pipe( pipe_slow ); 9030 %} 9031 9032 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9033 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9034 match(Set dst (LShiftVI src shift)); 9035 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9036 ins_encode %{ 9037 int vector_len = 2; 9038 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9039 %} 9040 ins_pipe( pipe_slow ); 9041 %} 9042 9043 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9044 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9045 match(Set dst (LShiftVI src shift)); 9046 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9047 ins_encode %{ 9048 int vector_len = 2; 9049 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9050 %} 9051 ins_pipe( pipe_slow ); 9052 %} 9053 9054 // Longs vector left shift 9055 instruct vsll2L(vecX dst, vecS shift) %{ 9056 predicate(n->as_Vector()->length() == 2); 9057 match(Set dst (LShiftVL dst shift)); 9058 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9059 ins_encode %{ 9060 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9061 %} 9062 ins_pipe( pipe_slow ); 9063 %} 9064 9065 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9066 predicate(n->as_Vector()->length() == 2); 9067 match(Set dst (LShiftVL dst shift)); 9068 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9069 ins_encode %{ 9070 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9071 %} 9072 ins_pipe( pipe_slow ); 9073 %} 9074 9075 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9076 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9077 match(Set dst (LShiftVL src shift)); 9078 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9079 ins_encode %{ 9080 int vector_len = 0; 9081 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9082 %} 9083 ins_pipe( pipe_slow ); 9084 %} 9085 9086 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9087 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9088 match(Set dst (LShiftVL src shift)); 9089 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9090 ins_encode %{ 9091 int vector_len = 0; 9092 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9093 %} 9094 ins_pipe( pipe_slow ); 9095 %} 9096 9097 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9098 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9099 match(Set dst (LShiftVL src shift)); 9100 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9101 ins_encode %{ 9102 int vector_len = 1; 9103 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9104 %} 9105 ins_pipe( pipe_slow ); 9106 %} 9107 9108 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9109 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9110 match(Set dst (LShiftVL src shift)); 9111 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9112 ins_encode %{ 9113 int vector_len = 1; 9114 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9115 %} 9116 ins_pipe( pipe_slow ); 9117 %} 9118 9119 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9120 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9121 match(Set dst (LShiftVL src shift)); 9122 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9123 ins_encode %{ 9124 int vector_len = 2; 9125 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9126 %} 9127 ins_pipe( pipe_slow ); 9128 %} 9129 9130 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9131 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9132 match(Set dst (LShiftVL src shift)); 9133 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9134 ins_encode %{ 9135 int vector_len = 2; 9136 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9137 %} 9138 ins_pipe( pipe_slow ); 9139 %} 9140 9141 // ----------------------- LogicalRightShift ----------------------------------- 9142 9143 // Shorts vector logical right shift produces incorrect Java result 9144 // for negative data because java code convert short value into int with 9145 // sign extension before a shift. But char vectors are fine since chars are 9146 // unsigned values. 9147 9148 instruct vsrl2S(vecS dst, vecS shift) %{ 9149 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9150 match(Set dst (URShiftVS dst shift)); 9151 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9152 ins_encode %{ 9153 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9154 %} 9155 ins_pipe( pipe_slow ); 9156 %} 9157 9158 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9159 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9160 match(Set dst (URShiftVS dst shift)); 9161 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9162 ins_encode %{ 9163 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9164 %} 9165 ins_pipe( pipe_slow ); 9166 %} 9167 9168 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9169 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9170 match(Set dst (URShiftVS src shift)); 9171 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9172 ins_encode %{ 9173 int vector_len = 0; 9174 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9175 %} 9176 ins_pipe( pipe_slow ); 9177 %} 9178 9179 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9180 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9181 match(Set dst (URShiftVS src shift)); 9182 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9183 ins_encode %{ 9184 int vector_len = 0; 9185 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9186 %} 9187 ins_pipe( pipe_slow ); 9188 %} 9189 9190 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9191 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9192 match(Set dst (URShiftVS dst shift)); 9193 effect(TEMP src); 9194 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9195 ins_encode %{ 9196 int vector_len = 0; 9197 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9198 %} 9199 ins_pipe( pipe_slow ); 9200 %} 9201 9202 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9203 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9204 match(Set dst (URShiftVS src shift)); 9205 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9206 ins_encode %{ 9207 int vector_len = 0; 9208 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9209 %} 9210 ins_pipe( pipe_slow ); 9211 %} 9212 9213 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9214 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9215 match(Set dst (URShiftVS src shift)); 9216 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9217 ins_encode %{ 9218 int vector_len = 0; 9219 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9220 %} 9221 ins_pipe( pipe_slow ); 9222 %} 9223 9224 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9225 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9226 match(Set dst (URShiftVS dst shift)); 9227 effect(TEMP src); 9228 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9229 ins_encode %{ 9230 int vector_len = 0; 9231 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9232 %} 9233 ins_pipe( pipe_slow ); 9234 %} 9235 9236 instruct vsrl4S(vecD dst, vecS shift) %{ 9237 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9238 match(Set dst (URShiftVS dst shift)); 9239 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9240 ins_encode %{ 9241 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9242 %} 9243 ins_pipe( pipe_slow ); 9244 %} 9245 9246 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9247 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9248 match(Set dst (URShiftVS dst shift)); 9249 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9250 ins_encode %{ 9251 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9252 %} 9253 ins_pipe( pipe_slow ); 9254 %} 9255 9256 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9257 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9258 match(Set dst (URShiftVS src shift)); 9259 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9260 ins_encode %{ 9261 int vector_len = 0; 9262 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9263 %} 9264 ins_pipe( pipe_slow ); 9265 %} 9266 9267 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9268 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9269 match(Set dst (URShiftVS src shift)); 9270 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9271 ins_encode %{ 9272 int vector_len = 0; 9273 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9274 %} 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9279 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9280 match(Set dst (URShiftVS dst shift)); 9281 effect(TEMP src); 9282 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9283 ins_encode %{ 9284 int vector_len = 0; 9285 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9286 %} 9287 ins_pipe( pipe_slow ); 9288 %} 9289 9290 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9291 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9292 match(Set dst (URShiftVS src shift)); 9293 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9294 ins_encode %{ 9295 int vector_len = 0; 9296 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9297 %} 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9302 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9303 match(Set dst (URShiftVS src shift)); 9304 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9305 ins_encode %{ 9306 int vector_len = 0; 9307 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9313 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9314 match(Set dst (URShiftVS dst shift)); 9315 effect(TEMP src); 9316 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9317 ins_encode %{ 9318 int vector_len = 0; 9319 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9320 %} 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 instruct vsrl8S(vecX dst, vecS shift) %{ 9325 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9326 match(Set dst (URShiftVS dst shift)); 9327 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9328 ins_encode %{ 9329 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9330 %} 9331 ins_pipe( pipe_slow ); 9332 %} 9333 9334 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9335 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9336 match(Set dst (URShiftVS dst shift)); 9337 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9338 ins_encode %{ 9339 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9340 %} 9341 ins_pipe( pipe_slow ); 9342 %} 9343 9344 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9345 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9346 match(Set dst (URShiftVS src shift)); 9347 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9348 ins_encode %{ 9349 int vector_len = 0; 9350 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9351 %} 9352 ins_pipe( pipe_slow ); 9353 %} 9354 9355 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9356 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9357 match(Set dst (URShiftVS src shift)); 9358 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9359 ins_encode %{ 9360 int vector_len = 0; 9361 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9362 %} 9363 ins_pipe( pipe_slow ); 9364 %} 9365 9366 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9367 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9368 match(Set dst (URShiftVS dst shift)); 9369 effect(TEMP src); 9370 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9371 ins_encode %{ 9372 int vector_len = 0; 9373 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9374 %} 9375 ins_pipe( pipe_slow ); 9376 %} 9377 9378 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9379 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9380 match(Set dst (URShiftVS src shift)); 9381 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9382 ins_encode %{ 9383 int vector_len = 0; 9384 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9385 %} 9386 ins_pipe( pipe_slow ); 9387 %} 9388 9389 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9390 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9391 match(Set dst (URShiftVS src shift)); 9392 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9393 ins_encode %{ 9394 int vector_len = 0; 9395 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9396 %} 9397 ins_pipe( pipe_slow ); 9398 %} 9399 9400 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9401 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9402 match(Set dst (URShiftVS dst shift)); 9403 effect(TEMP src); 9404 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9405 ins_encode %{ 9406 int vector_len = 0; 9407 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9408 %} 9409 ins_pipe( pipe_slow ); 9410 %} 9411 9412 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9413 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9414 match(Set dst (URShiftVS src shift)); 9415 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9416 ins_encode %{ 9417 int vector_len = 1; 9418 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9419 %} 9420 ins_pipe( pipe_slow ); 9421 %} 9422 9423 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9424 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9425 match(Set dst (URShiftVS src shift)); 9426 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9427 ins_encode %{ 9428 int vector_len = 1; 9429 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9430 %} 9431 ins_pipe( pipe_slow ); 9432 %} 9433 9434 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9435 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9436 match(Set dst (URShiftVS dst shift)); 9437 effect(TEMP src); 9438 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9439 ins_encode %{ 9440 int vector_len = 1; 9441 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9442 %} 9443 ins_pipe( pipe_slow ); 9444 %} 9445 9446 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9447 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9448 match(Set dst (URShiftVS src shift)); 9449 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9450 ins_encode %{ 9451 int vector_len = 1; 9452 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9453 %} 9454 ins_pipe( pipe_slow ); 9455 %} 9456 9457 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9458 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9459 match(Set dst (URShiftVS src shift)); 9460 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9461 ins_encode %{ 9462 int vector_len = 1; 9463 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9464 %} 9465 ins_pipe( pipe_slow ); 9466 %} 9467 9468 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9469 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9470 match(Set dst (URShiftVS dst shift)); 9471 effect(TEMP src); 9472 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9473 ins_encode %{ 9474 int vector_len = 1; 9475 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9476 %} 9477 ins_pipe( pipe_slow ); 9478 %} 9479 9480 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9481 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9482 match(Set dst (URShiftVS src shift)); 9483 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9484 ins_encode %{ 9485 int vector_len = 2; 9486 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9487 %} 9488 ins_pipe( pipe_slow ); 9489 %} 9490 9491 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9492 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9493 match(Set dst (URShiftVS src shift)); 9494 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9495 ins_encode %{ 9496 int vector_len = 2; 9497 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9498 %} 9499 ins_pipe( pipe_slow ); 9500 %} 9501 9502 // Integers vector logical right shift 9503 instruct vsrl2I(vecD dst, vecS shift) %{ 9504 predicate(n->as_Vector()->length() == 2); 9505 match(Set dst (URShiftVI dst shift)); 9506 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9507 ins_encode %{ 9508 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9509 %} 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9514 predicate(n->as_Vector()->length() == 2); 9515 match(Set dst (URShiftVI dst shift)); 9516 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9517 ins_encode %{ 9518 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9519 %} 9520 ins_pipe( pipe_slow ); 9521 %} 9522 9523 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9524 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9525 match(Set dst (URShiftVI src shift)); 9526 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9527 ins_encode %{ 9528 int vector_len = 0; 9529 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9530 %} 9531 ins_pipe( pipe_slow ); 9532 %} 9533 9534 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9535 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9536 match(Set dst (URShiftVI src shift)); 9537 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9538 ins_encode %{ 9539 int vector_len = 0; 9540 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9541 %} 9542 ins_pipe( pipe_slow ); 9543 %} 9544 9545 instruct vsrl4I(vecX dst, vecS shift) %{ 9546 predicate(n->as_Vector()->length() == 4); 9547 match(Set dst (URShiftVI dst shift)); 9548 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9549 ins_encode %{ 9550 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9551 %} 9552 ins_pipe( pipe_slow ); 9553 %} 9554 9555 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9556 predicate(n->as_Vector()->length() == 4); 9557 match(Set dst (URShiftVI dst shift)); 9558 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9559 ins_encode %{ 9560 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9561 %} 9562 ins_pipe( pipe_slow ); 9563 %} 9564 9565 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9566 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9567 match(Set dst (URShiftVI src shift)); 9568 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9569 ins_encode %{ 9570 int vector_len = 0; 9571 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9572 %} 9573 ins_pipe( pipe_slow ); 9574 %} 9575 9576 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9577 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9578 match(Set dst (URShiftVI src shift)); 9579 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9580 ins_encode %{ 9581 int vector_len = 0; 9582 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9583 %} 9584 ins_pipe( pipe_slow ); 9585 %} 9586 9587 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9588 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9589 match(Set dst (URShiftVI src shift)); 9590 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9591 ins_encode %{ 9592 int vector_len = 1; 9593 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9594 %} 9595 ins_pipe( pipe_slow ); 9596 %} 9597 9598 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9599 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9600 match(Set dst (URShiftVI src shift)); 9601 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9602 ins_encode %{ 9603 int vector_len = 1; 9604 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9605 %} 9606 ins_pipe( pipe_slow ); 9607 %} 9608 9609 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9610 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9611 match(Set dst (URShiftVI src shift)); 9612 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9613 ins_encode %{ 9614 int vector_len = 2; 9615 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9621 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9622 match(Set dst (URShiftVI src shift)); 9623 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9624 ins_encode %{ 9625 int vector_len = 2; 9626 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9627 %} 9628 ins_pipe( pipe_slow ); 9629 %} 9630 9631 // Longs vector logical right shift 9632 instruct vsrl2L(vecX dst, vecS shift) %{ 9633 predicate(n->as_Vector()->length() == 2); 9634 match(Set dst (URShiftVL dst shift)); 9635 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9636 ins_encode %{ 9637 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9638 %} 9639 ins_pipe( pipe_slow ); 9640 %} 9641 9642 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9643 predicate(n->as_Vector()->length() == 2); 9644 match(Set dst (URShiftVL dst shift)); 9645 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9646 ins_encode %{ 9647 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9648 %} 9649 ins_pipe( pipe_slow ); 9650 %} 9651 9652 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9653 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9654 match(Set dst (URShiftVL src shift)); 9655 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9656 ins_encode %{ 9657 int vector_len = 0; 9658 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9659 %} 9660 ins_pipe( pipe_slow ); 9661 %} 9662 9663 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9664 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9665 match(Set dst (URShiftVL src shift)); 9666 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9667 ins_encode %{ 9668 int vector_len = 0; 9669 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9670 %} 9671 ins_pipe( pipe_slow ); 9672 %} 9673 9674 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9675 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9676 match(Set dst (URShiftVL src shift)); 9677 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9678 ins_encode %{ 9679 int vector_len = 1; 9680 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9681 %} 9682 ins_pipe( pipe_slow ); 9683 %} 9684 9685 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9686 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9687 match(Set dst (URShiftVL src shift)); 9688 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9689 ins_encode %{ 9690 int vector_len = 1; 9691 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9692 %} 9693 ins_pipe( pipe_slow ); 9694 %} 9695 9696 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9697 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9698 match(Set dst (URShiftVL src shift)); 9699 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9700 ins_encode %{ 9701 int vector_len = 2; 9702 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9703 %} 9704 ins_pipe( pipe_slow ); 9705 %} 9706 9707 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9708 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9709 match(Set dst (URShiftVL src shift)); 9710 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9711 ins_encode %{ 9712 int vector_len = 2; 9713 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9714 %} 9715 ins_pipe( pipe_slow ); 9716 %} 9717 9718 // ------------------- ArithmeticRightShift ----------------------------------- 9719 9720 // Shorts/Chars vector arithmetic right shift 9721 instruct vsra2S(vecS dst, vecS shift) %{ 9722 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9723 match(Set dst (RShiftVS dst shift)); 9724 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9725 ins_encode %{ 9726 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9727 %} 9728 ins_pipe( pipe_slow ); 9729 %} 9730 9731 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9732 predicate(n->as_Vector()->length() == 2); 9733 match(Set dst (RShiftVS dst shift)); 9734 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9735 ins_encode %{ 9736 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9737 %} 9738 ins_pipe( pipe_slow ); 9739 %} 9740 9741 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9742 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9743 match(Set dst (RShiftVS src shift)); 9744 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9745 ins_encode %{ 9746 int vector_len = 0; 9747 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9748 %} 9749 ins_pipe( pipe_slow ); 9750 %} 9751 9752 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9753 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9754 match(Set dst (RShiftVS src shift)); 9755 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9756 ins_encode %{ 9757 int vector_len = 0; 9758 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9759 %} 9760 ins_pipe( pipe_slow ); 9761 %} 9762 9763 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9764 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9765 match(Set dst (RShiftVS dst shift)); 9766 effect(TEMP src); 9767 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9768 ins_encode %{ 9769 int vector_len = 0; 9770 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9771 %} 9772 ins_pipe( pipe_slow ); 9773 %} 9774 9775 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9776 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9777 match(Set dst (RShiftVS src shift)); 9778 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9779 ins_encode %{ 9780 int vector_len = 0; 9781 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9782 %} 9783 ins_pipe( pipe_slow ); 9784 %} 9785 9786 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9787 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9788 match(Set dst (RShiftVS src shift)); 9789 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9790 ins_encode %{ 9791 int vector_len = 0; 9792 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9798 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9799 match(Set dst (RShiftVS dst shift)); 9800 effect(TEMP src); 9801 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9802 ins_encode %{ 9803 int vector_len = 0; 9804 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9805 %} 9806 ins_pipe( pipe_slow ); 9807 %} 9808 9809 instruct vsra4S(vecD dst, vecS shift) %{ 9810 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9811 match(Set dst (RShiftVS dst shift)); 9812 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9813 ins_encode %{ 9814 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9815 %} 9816 ins_pipe( pipe_slow ); 9817 %} 9818 9819 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9820 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9821 match(Set dst (RShiftVS dst shift)); 9822 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9823 ins_encode %{ 9824 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9825 %} 9826 ins_pipe( pipe_slow ); 9827 %} 9828 9829 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9830 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9831 match(Set dst (RShiftVS src shift)); 9832 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9833 ins_encode %{ 9834 int vector_len = 0; 9835 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9836 %} 9837 ins_pipe( pipe_slow ); 9838 %} 9839 9840 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9841 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9842 match(Set dst (RShiftVS src shift)); 9843 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9844 ins_encode %{ 9845 int vector_len = 0; 9846 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9847 %} 9848 ins_pipe( pipe_slow ); 9849 %} 9850 9851 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9852 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9853 match(Set dst (RShiftVS dst shift)); 9854 effect(TEMP src); 9855 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9856 ins_encode %{ 9857 int vector_len = 0; 9858 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9859 %} 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9864 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9865 match(Set dst (RShiftVS src shift)); 9866 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9867 ins_encode %{ 9868 int vector_len = 0; 9869 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9875 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9876 match(Set dst (RShiftVS src shift)); 9877 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9878 ins_encode %{ 9879 int vector_len = 0; 9880 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9881 %} 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9886 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9887 match(Set dst (RShiftVS dst shift)); 9888 effect(TEMP src); 9889 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9890 ins_encode %{ 9891 int vector_len = 0; 9892 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9893 %} 9894 ins_pipe( pipe_slow ); 9895 %} 9896 9897 instruct vsra8S(vecX dst, vecS shift) %{ 9898 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9899 match(Set dst (RShiftVS dst shift)); 9900 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9901 ins_encode %{ 9902 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9903 %} 9904 ins_pipe( pipe_slow ); 9905 %} 9906 9907 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9908 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9909 match(Set dst (RShiftVS dst shift)); 9910 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9911 ins_encode %{ 9912 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9913 %} 9914 ins_pipe( pipe_slow ); 9915 %} 9916 9917 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9918 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9919 match(Set dst (RShiftVS src shift)); 9920 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9921 ins_encode %{ 9922 int vector_len = 0; 9923 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9924 %} 9925 ins_pipe( pipe_slow ); 9926 %} 9927 9928 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9929 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9930 match(Set dst (RShiftVS src shift)); 9931 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9932 ins_encode %{ 9933 int vector_len = 0; 9934 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9935 %} 9936 ins_pipe( pipe_slow ); 9937 %} 9938 9939 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9940 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9941 match(Set dst (RShiftVS dst shift)); 9942 effect(TEMP src); 9943 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9944 ins_encode %{ 9945 int vector_len = 0; 9946 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9947 %} 9948 ins_pipe( pipe_slow ); 9949 %} 9950 9951 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9952 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9953 match(Set dst (RShiftVS src shift)); 9954 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9955 ins_encode %{ 9956 int vector_len = 0; 9957 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9958 %} 9959 ins_pipe( pipe_slow ); 9960 %} 9961 9962 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9963 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9964 match(Set dst (RShiftVS src shift)); 9965 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9966 ins_encode %{ 9967 int vector_len = 0; 9968 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9969 %} 9970 ins_pipe( pipe_slow ); 9971 %} 9972 9973 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9974 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9975 match(Set dst (RShiftVS dst shift)); 9976 effect(TEMP src); 9977 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9978 ins_encode %{ 9979 int vector_len = 0; 9980 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9981 %} 9982 ins_pipe( pipe_slow ); 9983 %} 9984 9985 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9986 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9987 match(Set dst (RShiftVS src shift)); 9988 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9989 ins_encode %{ 9990 int vector_len = 1; 9991 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9992 %} 9993 ins_pipe( pipe_slow ); 9994 %} 9995 9996 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9997 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9998 match(Set dst (RShiftVS src shift)); 9999 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10000 ins_encode %{ 10001 int vector_len = 1; 10002 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10003 %} 10004 ins_pipe( pipe_slow ); 10005 %} 10006 10007 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10008 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10009 match(Set dst (RShiftVS dst shift)); 10010 effect(TEMP src); 10011 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10012 ins_encode %{ 10013 int vector_len = 1; 10014 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10015 %} 10016 ins_pipe( pipe_slow ); 10017 %} 10018 10019 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10020 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10021 match(Set dst (RShiftVS src shift)); 10022 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10023 ins_encode %{ 10024 int vector_len = 1; 10025 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10026 %} 10027 ins_pipe( pipe_slow ); 10028 %} 10029 10030 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10031 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10032 match(Set dst (RShiftVS src shift)); 10033 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10034 ins_encode %{ 10035 int vector_len = 1; 10036 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10037 %} 10038 ins_pipe( pipe_slow ); 10039 %} 10040 10041 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10042 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10043 match(Set dst (RShiftVS dst shift)); 10044 effect(TEMP src); 10045 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10046 ins_encode %{ 10047 int vector_len = 1; 10048 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10049 %} 10050 ins_pipe( pipe_slow ); 10051 %} 10052 10053 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10054 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10055 match(Set dst (RShiftVS src shift)); 10056 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10057 ins_encode %{ 10058 int vector_len = 2; 10059 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10060 %} 10061 ins_pipe( pipe_slow ); 10062 %} 10063 10064 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10065 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10066 match(Set dst (RShiftVS src shift)); 10067 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10068 ins_encode %{ 10069 int vector_len = 2; 10070 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10071 %} 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 // Integers vector arithmetic right shift 10076 instruct vsra2I(vecD dst, vecS shift) %{ 10077 predicate(n->as_Vector()->length() == 2); 10078 match(Set dst (RShiftVI dst shift)); 10079 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10080 ins_encode %{ 10081 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10082 %} 10083 ins_pipe( pipe_slow ); 10084 %} 10085 10086 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10087 predicate(n->as_Vector()->length() == 2); 10088 match(Set dst (RShiftVI dst shift)); 10089 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10090 ins_encode %{ 10091 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10092 %} 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10097 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10098 match(Set dst (RShiftVI src shift)); 10099 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10100 ins_encode %{ 10101 int vector_len = 0; 10102 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10103 %} 10104 ins_pipe( pipe_slow ); 10105 %} 10106 10107 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10108 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10109 match(Set dst (RShiftVI src shift)); 10110 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10111 ins_encode %{ 10112 int vector_len = 0; 10113 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10114 %} 10115 ins_pipe( pipe_slow ); 10116 %} 10117 10118 instruct vsra4I(vecX dst, vecS shift) %{ 10119 predicate(n->as_Vector()->length() == 4); 10120 match(Set dst (RShiftVI dst shift)); 10121 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10122 ins_encode %{ 10123 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10124 %} 10125 ins_pipe( pipe_slow ); 10126 %} 10127 10128 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10129 predicate(n->as_Vector()->length() == 4); 10130 match(Set dst (RShiftVI dst shift)); 10131 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10132 ins_encode %{ 10133 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10134 %} 10135 ins_pipe( pipe_slow ); 10136 %} 10137 10138 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10139 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10140 match(Set dst (RShiftVI src shift)); 10141 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10142 ins_encode %{ 10143 int vector_len = 0; 10144 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10145 %} 10146 ins_pipe( pipe_slow ); 10147 %} 10148 10149 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10150 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10151 match(Set dst (RShiftVI src shift)); 10152 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10153 ins_encode %{ 10154 int vector_len = 0; 10155 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10156 %} 10157 ins_pipe( pipe_slow ); 10158 %} 10159 10160 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10161 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10162 match(Set dst (RShiftVI src shift)); 10163 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10164 ins_encode %{ 10165 int vector_len = 1; 10166 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10167 %} 10168 ins_pipe( pipe_slow ); 10169 %} 10170 10171 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10172 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10173 match(Set dst (RShiftVI src shift)); 10174 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10175 ins_encode %{ 10176 int vector_len = 1; 10177 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10178 %} 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10183 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10184 match(Set dst (RShiftVI src shift)); 10185 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10186 ins_encode %{ 10187 int vector_len = 2; 10188 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10189 %} 10190 ins_pipe( pipe_slow ); 10191 %} 10192 10193 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10194 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10195 match(Set dst (RShiftVI src shift)); 10196 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10197 ins_encode %{ 10198 int vector_len = 2; 10199 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10200 %} 10201 ins_pipe( pipe_slow ); 10202 %} 10203 10204 // There are no longs vector arithmetic right shift instructions. 10205 10206 10207 // --------------------------------- AND -------------------------------------- 10208 10209 instruct vand4B(vecS dst, vecS src) %{ 10210 predicate(n->as_Vector()->length_in_bytes() == 4); 10211 match(Set dst (AndV dst src)); 10212 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10213 ins_encode %{ 10214 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10215 %} 10216 ins_pipe( pipe_slow ); 10217 %} 10218 10219 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10220 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10221 match(Set dst (AndV src1 src2)); 10222 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10223 ins_encode %{ 10224 int vector_len = 0; 10225 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10226 %} 10227 ins_pipe( pipe_slow ); 10228 %} 10229 10230 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10231 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10232 match(Set dst (AndV src (LoadVector mem))); 10233 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10234 ins_encode %{ 10235 int vector_len = 0; 10236 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10237 %} 10238 ins_pipe( pipe_slow ); 10239 %} 10240 10241 instruct vand8B(vecD dst, vecD src) %{ 10242 predicate(n->as_Vector()->length_in_bytes() == 8); 10243 match(Set dst (AndV dst src)); 10244 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10245 ins_encode %{ 10246 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10247 %} 10248 ins_pipe( pipe_slow ); 10249 %} 10250 10251 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10252 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10253 match(Set dst (AndV src1 src2)); 10254 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10255 ins_encode %{ 10256 int vector_len = 0; 10257 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10258 %} 10259 ins_pipe( pipe_slow ); 10260 %} 10261 10262 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10263 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10264 match(Set dst (AndV src (LoadVector mem))); 10265 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10266 ins_encode %{ 10267 int vector_len = 0; 10268 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10269 %} 10270 ins_pipe( pipe_slow ); 10271 %} 10272 10273 instruct vand16B(vecX dst, vecX src) %{ 10274 predicate(n->as_Vector()->length_in_bytes() == 16); 10275 match(Set dst (AndV dst src)); 10276 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10277 ins_encode %{ 10278 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10279 %} 10280 ins_pipe( pipe_slow ); 10281 %} 10282 10283 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10284 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10285 match(Set dst (AndV src1 src2)); 10286 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10287 ins_encode %{ 10288 int vector_len = 0; 10289 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10290 %} 10291 ins_pipe( pipe_slow ); 10292 %} 10293 10294 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10295 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10296 match(Set dst (AndV src (LoadVector mem))); 10297 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10298 ins_encode %{ 10299 int vector_len = 0; 10300 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10301 %} 10302 ins_pipe( pipe_slow ); 10303 %} 10304 10305 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10306 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10307 match(Set dst (AndV src1 src2)); 10308 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10309 ins_encode %{ 10310 int vector_len = 1; 10311 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10312 %} 10313 ins_pipe( pipe_slow ); 10314 %} 10315 10316 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10317 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10318 match(Set dst (AndV src (LoadVector mem))); 10319 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10320 ins_encode %{ 10321 int vector_len = 1; 10322 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10323 %} 10324 ins_pipe( pipe_slow ); 10325 %} 10326 10327 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10328 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10329 match(Set dst (AndV src1 src2)); 10330 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10331 ins_encode %{ 10332 int vector_len = 2; 10333 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10334 %} 10335 ins_pipe( pipe_slow ); 10336 %} 10337 10338 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10339 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10340 match(Set dst (AndV src (LoadVector mem))); 10341 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10342 ins_encode %{ 10343 int vector_len = 2; 10344 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10345 %} 10346 ins_pipe( pipe_slow ); 10347 %} 10348 10349 // --------------------------------- OR --------------------------------------- 10350 10351 instruct vor4B(vecS dst, vecS src) %{ 10352 predicate(n->as_Vector()->length_in_bytes() == 4); 10353 match(Set dst (OrV dst src)); 10354 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10355 ins_encode %{ 10356 __ por($dst$$XMMRegister, $src$$XMMRegister); 10357 %} 10358 ins_pipe( pipe_slow ); 10359 %} 10360 10361 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10362 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10363 match(Set dst (OrV src1 src2)); 10364 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10365 ins_encode %{ 10366 int vector_len = 0; 10367 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10368 %} 10369 ins_pipe( pipe_slow ); 10370 %} 10371 10372 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10373 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10374 match(Set dst (OrV src (LoadVector mem))); 10375 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10376 ins_encode %{ 10377 int vector_len = 0; 10378 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10379 %} 10380 ins_pipe( pipe_slow ); 10381 %} 10382 10383 instruct vor8B(vecD dst, vecD src) %{ 10384 predicate(n->as_Vector()->length_in_bytes() == 8); 10385 match(Set dst (OrV dst src)); 10386 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10387 ins_encode %{ 10388 __ por($dst$$XMMRegister, $src$$XMMRegister); 10389 %} 10390 ins_pipe( pipe_slow ); 10391 %} 10392 10393 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10394 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10395 match(Set dst (OrV src1 src2)); 10396 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10397 ins_encode %{ 10398 int vector_len = 0; 10399 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10400 %} 10401 ins_pipe( pipe_slow ); 10402 %} 10403 10404 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10405 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10406 match(Set dst (OrV src (LoadVector mem))); 10407 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10408 ins_encode %{ 10409 int vector_len = 0; 10410 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10411 %} 10412 ins_pipe( pipe_slow ); 10413 %} 10414 10415 instruct vor16B(vecX dst, vecX src) %{ 10416 predicate(n->as_Vector()->length_in_bytes() == 16); 10417 match(Set dst (OrV dst src)); 10418 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10419 ins_encode %{ 10420 __ por($dst$$XMMRegister, $src$$XMMRegister); 10421 %} 10422 ins_pipe( pipe_slow ); 10423 %} 10424 10425 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10426 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10427 match(Set dst (OrV src1 src2)); 10428 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10429 ins_encode %{ 10430 int vector_len = 0; 10431 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10432 %} 10433 ins_pipe( pipe_slow ); 10434 %} 10435 10436 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10437 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10438 match(Set dst (OrV src (LoadVector mem))); 10439 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10440 ins_encode %{ 10441 int vector_len = 0; 10442 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10443 %} 10444 ins_pipe( pipe_slow ); 10445 %} 10446 10447 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10448 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10449 match(Set dst (OrV src1 src2)); 10450 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10451 ins_encode %{ 10452 int vector_len = 1; 10453 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10454 %} 10455 ins_pipe( pipe_slow ); 10456 %} 10457 10458 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10459 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10460 match(Set dst (OrV src (LoadVector mem))); 10461 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10462 ins_encode %{ 10463 int vector_len = 1; 10464 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10465 %} 10466 ins_pipe( pipe_slow ); 10467 %} 10468 10469 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10470 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10471 match(Set dst (OrV src1 src2)); 10472 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10473 ins_encode %{ 10474 int vector_len = 2; 10475 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10476 %} 10477 ins_pipe( pipe_slow ); 10478 %} 10479 10480 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10481 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10482 match(Set dst (OrV src (LoadVector mem))); 10483 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10484 ins_encode %{ 10485 int vector_len = 2; 10486 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10487 %} 10488 ins_pipe( pipe_slow ); 10489 %} 10490 10491 // --------------------------------- XOR -------------------------------------- 10492 10493 instruct vxor4B(vecS dst, vecS src) %{ 10494 predicate(n->as_Vector()->length_in_bytes() == 4); 10495 match(Set dst (XorV dst src)); 10496 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10497 ins_encode %{ 10498 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10499 %} 10500 ins_pipe( pipe_slow ); 10501 %} 10502 10503 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10504 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10505 match(Set dst (XorV src1 src2)); 10506 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10507 ins_encode %{ 10508 int vector_len = 0; 10509 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10510 %} 10511 ins_pipe( pipe_slow ); 10512 %} 10513 10514 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10515 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10516 match(Set dst (XorV src (LoadVector mem))); 10517 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10518 ins_encode %{ 10519 int vector_len = 0; 10520 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10521 %} 10522 ins_pipe( pipe_slow ); 10523 %} 10524 10525 instruct vxor8B(vecD dst, vecD src) %{ 10526 predicate(n->as_Vector()->length_in_bytes() == 8); 10527 match(Set dst (XorV dst src)); 10528 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10529 ins_encode %{ 10530 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10531 %} 10532 ins_pipe( pipe_slow ); 10533 %} 10534 10535 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10536 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10537 match(Set dst (XorV src1 src2)); 10538 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10539 ins_encode %{ 10540 int vector_len = 0; 10541 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10542 %} 10543 ins_pipe( pipe_slow ); 10544 %} 10545 10546 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10547 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10548 match(Set dst (XorV src (LoadVector mem))); 10549 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10550 ins_encode %{ 10551 int vector_len = 0; 10552 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10553 %} 10554 ins_pipe( pipe_slow ); 10555 %} 10556 10557 instruct vxor16B(vecX dst, vecX src) %{ 10558 predicate(n->as_Vector()->length_in_bytes() == 16); 10559 match(Set dst (XorV dst src)); 10560 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10561 ins_encode %{ 10562 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10563 %} 10564 ins_pipe( pipe_slow ); 10565 %} 10566 10567 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10568 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10569 match(Set dst (XorV src1 src2)); 10570 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10571 ins_encode %{ 10572 int vector_len = 0; 10573 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10574 %} 10575 ins_pipe( pipe_slow ); 10576 %} 10577 10578 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10579 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10580 match(Set dst (XorV src (LoadVector mem))); 10581 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10582 ins_encode %{ 10583 int vector_len = 0; 10584 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10585 %} 10586 ins_pipe( pipe_slow ); 10587 %} 10588 10589 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10590 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10591 match(Set dst (XorV src1 src2)); 10592 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10593 ins_encode %{ 10594 int vector_len = 1; 10595 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10596 %} 10597 ins_pipe( pipe_slow ); 10598 %} 10599 10600 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10601 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10602 match(Set dst (XorV src (LoadVector mem))); 10603 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10604 ins_encode %{ 10605 int vector_len = 1; 10606 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10607 %} 10608 ins_pipe( pipe_slow ); 10609 %} 10610 10611 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10612 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10613 match(Set dst (XorV src1 src2)); 10614 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10615 ins_encode %{ 10616 int vector_len = 2; 10617 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10618 %} 10619 ins_pipe( pipe_slow ); 10620 %} 10621 10622 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10623 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10624 match(Set dst (XorV src (LoadVector mem))); 10625 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10626 ins_encode %{ 10627 int vector_len = 2; 10628 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10629 %} 10630 ins_pipe( pipe_slow ); 10631 %} 10632 10633 // --------------------------------- FMA -------------------------------------- 10634 10635 // a * b + c 10636 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 10637 predicate(UseFMA && n->as_Vector()->length() == 2); 10638 match(Set c (FmaVD c (Binary a b))); 10639 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10640 ins_cost(150); 10641 ins_encode %{ 10642 int vector_len = 0; 10643 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10644 %} 10645 ins_pipe( pipe_slow ); 10646 %} 10647 10648 // a * b + c 10649 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 10650 predicate(UseFMA && n->as_Vector()->length() == 2); 10651 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10652 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10653 ins_cost(150); 10654 ins_encode %{ 10655 int vector_len = 0; 10656 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10657 %} 10658 ins_pipe( pipe_slow ); 10659 %} 10660 10661 10662 // a * b + c 10663 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 10664 predicate(UseFMA && n->as_Vector()->length() == 4); 10665 match(Set c (FmaVD c (Binary a b))); 10666 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10667 ins_cost(150); 10668 ins_encode %{ 10669 int vector_len = 1; 10670 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10671 %} 10672 ins_pipe( pipe_slow ); 10673 %} 10674 10675 // a * b + c 10676 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 10677 predicate(UseFMA && n->as_Vector()->length() == 4); 10678 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10679 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10680 ins_cost(150); 10681 ins_encode %{ 10682 int vector_len = 1; 10683 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10684 %} 10685 ins_pipe( pipe_slow ); 10686 %} 10687 10688 // a * b + c 10689 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 10690 predicate(UseFMA && n->as_Vector()->length() == 8); 10691 match(Set c (FmaVD c (Binary a b))); 10692 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10693 ins_cost(150); 10694 ins_encode %{ 10695 int vector_len = 2; 10696 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10697 %} 10698 ins_pipe( pipe_slow ); 10699 %} 10700 10701 // a * b + c 10702 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 10703 predicate(UseFMA && n->as_Vector()->length() == 8); 10704 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10705 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10706 ins_cost(150); 10707 ins_encode %{ 10708 int vector_len = 2; 10709 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10710 %} 10711 ins_pipe( pipe_slow ); 10712 %} 10713 10714 // a * b + c 10715 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 10716 predicate(UseFMA && n->as_Vector()->length() == 4); 10717 match(Set c (FmaVF c (Binary a b))); 10718 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10719 ins_cost(150); 10720 ins_encode %{ 10721 int vector_len = 0; 10722 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10723 %} 10724 ins_pipe( pipe_slow ); 10725 %} 10726 10727 // a * b + c 10728 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 10729 predicate(UseFMA && n->as_Vector()->length() == 4); 10730 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10731 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10732 ins_cost(150); 10733 ins_encode %{ 10734 int vector_len = 0; 10735 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10736 %} 10737 ins_pipe( pipe_slow ); 10738 %} 10739 10740 // a * b + c 10741 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 10742 predicate(UseFMA && n->as_Vector()->length() == 8); 10743 match(Set c (FmaVF c (Binary a b))); 10744 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10745 ins_cost(150); 10746 ins_encode %{ 10747 int vector_len = 1; 10748 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10749 %} 10750 ins_pipe( pipe_slow ); 10751 %} 10752 10753 // a * b + c 10754 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 10755 predicate(UseFMA && n->as_Vector()->length() == 8); 10756 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10757 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10758 ins_cost(150); 10759 ins_encode %{ 10760 int vector_len = 1; 10761 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10762 %} 10763 ins_pipe( pipe_slow ); 10764 %} 10765 10766 // a * b + c 10767 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 10768 predicate(UseFMA && n->as_Vector()->length() == 16); 10769 match(Set c (FmaVF c (Binary a b))); 10770 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10771 ins_cost(150); 10772 ins_encode %{ 10773 int vector_len = 2; 10774 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10775 %} 10776 ins_pipe( pipe_slow ); 10777 %} 10778 10779 // a * b + c 10780 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 10781 predicate(UseFMA && n->as_Vector()->length() == 16); 10782 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10783 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10784 ins_cost(150); 10785 ins_encode %{ 10786 int vector_len = 2; 10787 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10788 %} 10789 ins_pipe( pipe_slow ); 10790 %}