1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 %} 1071 1072 1073 //----------SOURCE BLOCK------------------------------------------------------- 1074 // This is a block of C++ code which provides values, functions, and 1075 // definitions necessary in the rest of the architecture description 1076 1077 source_hpp %{ 1078 // Header information of the source block. 1079 // Method declarations/definitions which are used outside 1080 // the ad-scope can conveniently be defined here. 1081 // 1082 // To keep related declarations/definitions/uses close together, 1083 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1084 1085 class NativeJump; 1086 1087 class CallStubImpl { 1088 1089 //-------------------------------------------------------------- 1090 //---< Used for optimization in Compile::shorten_branches >--- 1091 //-------------------------------------------------------------- 1092 1093 public: 1094 // Size of call trampoline stub. 1095 static uint size_call_trampoline() { 1096 return 0; // no call trampolines on this platform 1097 } 1098 1099 // number of relocations needed by a call trampoline stub 1100 static uint reloc_call_trampoline() { 1101 return 0; // no call trampolines on this platform 1102 } 1103 }; 1104 1105 class HandlerImpl { 1106 1107 public: 1108 1109 static int emit_exception_handler(CodeBuffer &cbuf); 1110 static int emit_deopt_handler(CodeBuffer& cbuf); 1111 1112 static uint size_exception_handler() { 1113 // NativeCall instruction size is the same as NativeJump. 1114 // exception handler starts out as jump and can be patched to 1115 // a call be deoptimization. (4932387) 1116 // Note that this value is also credited (in output.cpp) to 1117 // the size of the code section. 1118 return NativeJump::instruction_size; 1119 } 1120 1121 #ifdef _LP64 1122 static uint size_deopt_handler() { 1123 // three 5 byte instructions 1124 return 15; 1125 } 1126 #else 1127 static uint size_deopt_handler() { 1128 // NativeCall instruction size is the same as NativeJump. 1129 // exception handler starts out as jump and can be patched to 1130 // a call be deoptimization. (4932387) 1131 // Note that this value is also credited (in output.cpp) to 1132 // the size of the code section. 1133 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1134 } 1135 #endif 1136 }; 1137 1138 %} // end source_hpp 1139 1140 source %{ 1141 1142 #include "opto/addnode.hpp" 1143 1144 // Emit exception handler code. 1145 // Stuff framesize into a register and call a VM stub routine. 1146 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1147 1148 // Note that the code buffer's insts_mark is always relative to insts. 1149 // That's why we must use the macroassembler to generate a handler. 1150 MacroAssembler _masm(&cbuf); 1151 address base = __ start_a_stub(size_exception_handler()); 1152 if (base == NULL) { 1153 ciEnv::current()->record_failure("CodeCache is full"); 1154 return 0; // CodeBuffer::expand failed 1155 } 1156 int offset = __ offset(); 1157 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1158 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1159 __ end_a_stub(); 1160 return offset; 1161 } 1162 1163 // Emit deopt handler code. 1164 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1165 1166 // Note that the code buffer's insts_mark is always relative to insts. 1167 // That's why we must use the macroassembler to generate a handler. 1168 MacroAssembler _masm(&cbuf); 1169 address base = __ start_a_stub(size_deopt_handler()); 1170 if (base == NULL) { 1171 ciEnv::current()->record_failure("CodeCache is full"); 1172 return 0; // CodeBuffer::expand failed 1173 } 1174 int offset = __ offset(); 1175 1176 #ifdef _LP64 1177 address the_pc = (address) __ pc(); 1178 Label next; 1179 // push a "the_pc" on the stack without destroying any registers 1180 // as they all may be live. 1181 1182 // push address of "next" 1183 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1184 __ bind(next); 1185 // adjust it so it matches "the_pc" 1186 __ subptr(Address(rsp, 0), __ offset() - offset); 1187 #else 1188 InternalAddress here(__ pc()); 1189 __ pushptr(here.addr()); 1190 #endif 1191 1192 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1193 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1194 __ end_a_stub(); 1195 return offset; 1196 } 1197 1198 1199 //============================================================================= 1200 1201 // Float masks come from different places depending on platform. 1202 #ifdef _LP64 1203 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1204 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1205 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1206 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1207 #else 1208 static address float_signmask() { return (address)float_signmask_pool; } 1209 static address float_signflip() { return (address)float_signflip_pool; } 1210 static address double_signmask() { return (address)double_signmask_pool; } 1211 static address double_signflip() { return (address)double_signflip_pool; } 1212 #endif 1213 1214 1215 const bool Matcher::match_rule_supported(int opcode) { 1216 if (!has_match_rule(opcode)) 1217 return false; 1218 1219 bool ret_value = true; 1220 switch (opcode) { 1221 case Op_PopCountI: 1222 case Op_PopCountL: 1223 if (!UsePopCountInstruction) 1224 ret_value = false; 1225 break; 1226 case Op_PopCountVI: 1227 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1228 ret_value = false; 1229 break; 1230 case Op_MulVI: 1231 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1232 ret_value = false; 1233 break; 1234 case Op_MulVL: 1235 case Op_MulReductionVL: 1236 if (VM_Version::supports_avx512dq() == false) 1237 ret_value = false; 1238 break; 1239 case Op_AddReductionVL: 1240 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1241 ret_value = false; 1242 break; 1243 case Op_AddReductionVI: 1244 if (UseSSE < 3) // requires at least SSE3 1245 ret_value = false; 1246 break; 1247 case Op_MulReductionVI: 1248 if (UseSSE < 4) // requires at least SSE4 1249 ret_value = false; 1250 break; 1251 case Op_AddReductionVF: 1252 case Op_AddReductionVD: 1253 case Op_MulReductionVF: 1254 case Op_MulReductionVD: 1255 if (UseSSE < 1) // requires at least SSE 1256 ret_value = false; 1257 break; 1258 case Op_SqrtVD: 1259 case Op_SqrtVF: 1260 if (UseAVX < 1) // enabled for AVX only 1261 ret_value = false; 1262 break; 1263 case Op_CompareAndSwapL: 1264 #ifdef _LP64 1265 case Op_CompareAndSwapP: 1266 #endif 1267 if (!VM_Version::supports_cx8()) 1268 ret_value = false; 1269 break; 1270 case Op_CMoveVF: 1271 case Op_CMoveVD: 1272 if (UseAVX < 1 || UseAVX > 2) 1273 ret_value = false; 1274 break; 1275 case Op_StrIndexOf: 1276 if (!UseSSE42Intrinsics) 1277 ret_value = false; 1278 break; 1279 case Op_StrIndexOfChar: 1280 if (!UseSSE42Intrinsics) 1281 ret_value = false; 1282 break; 1283 case Op_OnSpinWait: 1284 if (VM_Version::supports_on_spin_wait() == false) 1285 ret_value = false; 1286 break; 1287 } 1288 1289 return ret_value; // Per default match rules are supported. 1290 } 1291 1292 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1293 // identify extra cases that we might want to provide match rules for 1294 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1295 bool ret_value = match_rule_supported(opcode); 1296 if (ret_value) { 1297 switch (opcode) { 1298 case Op_AddVB: 1299 case Op_SubVB: 1300 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1301 ret_value = false; 1302 break; 1303 case Op_URShiftVS: 1304 case Op_RShiftVS: 1305 case Op_LShiftVS: 1306 case Op_MulVS: 1307 case Op_AddVS: 1308 case Op_SubVS: 1309 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1310 ret_value = false; 1311 break; 1312 case Op_CMoveVF: 1313 if (vlen != 8) 1314 ret_value = false; 1315 case Op_CMoveVD: 1316 if (vlen != 4) 1317 ret_value = false; 1318 break; 1319 } 1320 } 1321 1322 return ret_value; // Per default match rules are supported. 1323 } 1324 1325 const bool Matcher::has_predicated_vectors(void) { 1326 bool ret_value = false; 1327 if (UseAVX > 2) { 1328 ret_value = VM_Version::supports_avx512vl(); 1329 } 1330 1331 return ret_value; 1332 } 1333 1334 const int Matcher::float_pressure(int default_pressure_threshold) { 1335 int float_pressure_threshold = default_pressure_threshold; 1336 #ifdef _LP64 1337 if (UseAVX > 2) { 1338 // Increase pressure threshold on machines with AVX3 which have 1339 // 2x more XMM registers. 1340 float_pressure_threshold = default_pressure_threshold * 2; 1341 } 1342 #endif 1343 return float_pressure_threshold; 1344 } 1345 1346 // Max vector size in bytes. 0 if not supported. 1347 const int Matcher::vector_width_in_bytes(BasicType bt) { 1348 assert(is_java_primitive(bt), "only primitive type vectors"); 1349 if (UseSSE < 2) return 0; 1350 // SSE2 supports 128bit vectors for all types. 1351 // AVX2 supports 256bit vectors for all types. 1352 // AVX2/EVEX supports 512bit vectors for all types. 1353 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1354 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1355 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1356 size = (UseAVX > 2) ? 64 : 32; 1357 // Use flag to limit vector size. 1358 size = MIN2(size,(int)MaxVectorSize); 1359 // Minimum 2 values in vector (or 4 for bytes). 1360 switch (bt) { 1361 case T_DOUBLE: 1362 case T_LONG: 1363 if (size < 16) return 0; 1364 break; 1365 case T_FLOAT: 1366 case T_INT: 1367 if (size < 8) return 0; 1368 break; 1369 case T_BOOLEAN: 1370 if (size < 4) return 0; 1371 break; 1372 case T_CHAR: 1373 if (size < 4) return 0; 1374 break; 1375 case T_BYTE: 1376 if (size < 4) return 0; 1377 break; 1378 case T_SHORT: 1379 if (size < 4) return 0; 1380 break; 1381 default: 1382 ShouldNotReachHere(); 1383 } 1384 return size; 1385 } 1386 1387 // Limits on vector size (number of elements) loaded into vector. 1388 const int Matcher::max_vector_size(const BasicType bt) { 1389 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1390 } 1391 const int Matcher::min_vector_size(const BasicType bt) { 1392 int max_size = max_vector_size(bt); 1393 // Min size which can be loaded into vector is 4 bytes. 1394 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1395 return MIN2(size,max_size); 1396 } 1397 1398 // Vector ideal reg corresponding to specidied size in bytes 1399 const uint Matcher::vector_ideal_reg(int size) { 1400 assert(MaxVectorSize >= size, ""); 1401 switch(size) { 1402 case 4: return Op_VecS; 1403 case 8: return Op_VecD; 1404 case 16: return Op_VecX; 1405 case 32: return Op_VecY; 1406 case 64: return Op_VecZ; 1407 } 1408 ShouldNotReachHere(); 1409 return 0; 1410 } 1411 1412 // Only lowest bits of xmm reg are used for vector shift count. 1413 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1414 return Op_VecS; 1415 } 1416 1417 // x86 supports misaligned vectors store/load. 1418 const bool Matcher::misaligned_vectors_ok() { 1419 return !AlignVector; // can be changed by flag 1420 } 1421 1422 // x86 AES instructions are compatible with SunJCE expanded 1423 // keys, hence we do not need to pass the original key to stubs 1424 const bool Matcher::pass_original_key_for_aes() { 1425 return false; 1426 } 1427 1428 1429 const bool Matcher::convi2l_type_required = true; 1430 1431 // Check for shift by small constant as well 1432 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1433 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1434 shift->in(2)->get_int() <= 3 && 1435 // Are there other uses besides address expressions? 1436 !matcher->is_visited(shift)) { 1437 address_visited.set(shift->_idx); // Flag as address_visited 1438 mstack.push(shift->in(2), Matcher::Visit); 1439 Node *conv = shift->in(1); 1440 #ifdef _LP64 1441 // Allow Matcher to match the rule which bypass 1442 // ConvI2L operation for an array index on LP64 1443 // if the index value is positive. 1444 if (conv->Opcode() == Op_ConvI2L && 1445 conv->as_Type()->type()->is_long()->_lo >= 0 && 1446 // Are there other uses besides address expressions? 1447 !matcher->is_visited(conv)) { 1448 address_visited.set(conv->_idx); // Flag as address_visited 1449 mstack.push(conv->in(1), Matcher::Pre_Visit); 1450 } else 1451 #endif 1452 mstack.push(conv, Matcher::Pre_Visit); 1453 return true; 1454 } 1455 return false; 1456 } 1457 1458 // Should the Matcher clone shifts on addressing modes, expecting them 1459 // to be subsumed into complex addressing expressions or compute them 1460 // into registers? 1461 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1462 Node *off = m->in(AddPNode::Offset); 1463 if (off->is_Con()) { 1464 address_visited.test_set(m->_idx); // Flag as address_visited 1465 Node *adr = m->in(AddPNode::Address); 1466 1467 // Intel can handle 2 adds in addressing mode 1468 // AtomicAdd is not an addressing expression. 1469 // Cheap to find it by looking for screwy base. 1470 if (adr->is_AddP() && 1471 !adr->in(AddPNode::Base)->is_top() && 1472 // Are there other uses besides address expressions? 1473 !is_visited(adr)) { 1474 address_visited.set(adr->_idx); // Flag as address_visited 1475 Node *shift = adr->in(AddPNode::Offset); 1476 if (!clone_shift(shift, this, mstack, address_visited)) { 1477 mstack.push(shift, Pre_Visit); 1478 } 1479 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1480 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1481 } else { 1482 mstack.push(adr, Pre_Visit); 1483 } 1484 1485 // Clone X+offset as it also folds into most addressing expressions 1486 mstack.push(off, Visit); 1487 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1488 return true; 1489 } else if (clone_shift(off, this, mstack, address_visited)) { 1490 address_visited.test_set(m->_idx); // Flag as address_visited 1491 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1492 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1493 return true; 1494 } 1495 return false; 1496 } 1497 1498 void Compile::reshape_address(AddPNode* addp) { 1499 } 1500 1501 // Helper methods for MachSpillCopyNode::implementation(). 1502 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1503 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1504 // In 64-bit VM size calculation is very complex. Emitting instructions 1505 // into scratch buffer is used to get size in 64-bit VM. 1506 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1507 assert(ireg == Op_VecS || // 32bit vector 1508 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1509 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1510 "no non-adjacent vector moves" ); 1511 if (cbuf) { 1512 MacroAssembler _masm(cbuf); 1513 int offset = __ offset(); 1514 switch (ireg) { 1515 case Op_VecS: // copy whole register 1516 case Op_VecD: 1517 case Op_VecX: 1518 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1519 break; 1520 case Op_VecY: 1521 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1522 break; 1523 case Op_VecZ: 1524 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1525 break; 1526 default: 1527 ShouldNotReachHere(); 1528 } 1529 int size = __ offset() - offset; 1530 #ifdef ASSERT 1531 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1532 assert(!do_size || size == 4, "incorrect size calculattion"); 1533 #endif 1534 return size; 1535 #ifndef PRODUCT 1536 } else if (!do_size) { 1537 switch (ireg) { 1538 case Op_VecS: 1539 case Op_VecD: 1540 case Op_VecX: 1541 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1542 break; 1543 case Op_VecY: 1544 case Op_VecZ: 1545 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1546 break; 1547 default: 1548 ShouldNotReachHere(); 1549 } 1550 #endif 1551 } 1552 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1553 return (UseAVX > 2) ? 6 : 4; 1554 } 1555 1556 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1557 int stack_offset, int reg, uint ireg, outputStream* st) { 1558 // In 64-bit VM size calculation is very complex. Emitting instructions 1559 // into scratch buffer is used to get size in 64-bit VM. 1560 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1561 if (cbuf) { 1562 MacroAssembler _masm(cbuf); 1563 int offset = __ offset(); 1564 if (is_load) { 1565 switch (ireg) { 1566 case Op_VecS: 1567 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1568 break; 1569 case Op_VecD: 1570 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1571 break; 1572 case Op_VecX: 1573 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1574 break; 1575 case Op_VecY: 1576 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1577 break; 1578 case Op_VecZ: 1579 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1580 break; 1581 default: 1582 ShouldNotReachHere(); 1583 } 1584 } else { // store 1585 switch (ireg) { 1586 case Op_VecS: 1587 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1588 break; 1589 case Op_VecD: 1590 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1591 break; 1592 case Op_VecX: 1593 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1594 break; 1595 case Op_VecY: 1596 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1597 break; 1598 case Op_VecZ: 1599 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1600 break; 1601 default: 1602 ShouldNotReachHere(); 1603 } 1604 } 1605 int size = __ offset() - offset; 1606 #ifdef ASSERT 1607 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1608 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1609 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1610 #endif 1611 return size; 1612 #ifndef PRODUCT 1613 } else if (!do_size) { 1614 if (is_load) { 1615 switch (ireg) { 1616 case Op_VecS: 1617 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1618 break; 1619 case Op_VecD: 1620 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1621 break; 1622 case Op_VecX: 1623 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1624 break; 1625 case Op_VecY: 1626 case Op_VecZ: 1627 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1628 break; 1629 default: 1630 ShouldNotReachHere(); 1631 } 1632 } else { // store 1633 switch (ireg) { 1634 case Op_VecS: 1635 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1636 break; 1637 case Op_VecD: 1638 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1639 break; 1640 case Op_VecX: 1641 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1642 break; 1643 case Op_VecY: 1644 case Op_VecZ: 1645 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1646 break; 1647 default: 1648 ShouldNotReachHere(); 1649 } 1650 } 1651 #endif 1652 } 1653 bool is_single_byte = false; 1654 int vec_len = 0; 1655 if ((UseAVX > 2) && (stack_offset != 0)) { 1656 int tuple_type = Assembler::EVEX_FVM; 1657 int input_size = Assembler::EVEX_32bit; 1658 switch (ireg) { 1659 case Op_VecS: 1660 tuple_type = Assembler::EVEX_T1S; 1661 break; 1662 case Op_VecD: 1663 tuple_type = Assembler::EVEX_T1S; 1664 input_size = Assembler::EVEX_64bit; 1665 break; 1666 case Op_VecX: 1667 break; 1668 case Op_VecY: 1669 vec_len = 1; 1670 break; 1671 case Op_VecZ: 1672 vec_len = 2; 1673 break; 1674 } 1675 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1676 } 1677 int offset_size = 0; 1678 int size = 5; 1679 if (UseAVX > 2 ) { 1680 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1681 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1682 size += 2; // Need an additional two bytes for EVEX encoding 1683 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1684 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1685 } else { 1686 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1687 size += 2; // Need an additional two bytes for EVEX encodding 1688 } 1689 } else { 1690 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1691 } 1692 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1693 return size+offset_size; 1694 } 1695 1696 static inline jint replicate4_imm(int con, int width) { 1697 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1698 assert(width == 1 || width == 2, "only byte or short types here"); 1699 int bit_width = width * 8; 1700 jint val = con; 1701 val &= (1 << bit_width) - 1; // mask off sign bits 1702 while(bit_width < 32) { 1703 val |= (val << bit_width); 1704 bit_width <<= 1; 1705 } 1706 return val; 1707 } 1708 1709 static inline jlong replicate8_imm(int con, int width) { 1710 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1711 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1712 int bit_width = width * 8; 1713 jlong val = con; 1714 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1715 while(bit_width < 64) { 1716 val |= (val << bit_width); 1717 bit_width <<= 1; 1718 } 1719 return val; 1720 } 1721 1722 #ifndef PRODUCT 1723 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1724 st->print("nop \t# %d bytes pad for loops and calls", _count); 1725 } 1726 #endif 1727 1728 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1729 MacroAssembler _masm(&cbuf); 1730 __ nop(_count); 1731 } 1732 1733 uint MachNopNode::size(PhaseRegAlloc*) const { 1734 return _count; 1735 } 1736 1737 #ifndef PRODUCT 1738 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1739 st->print("# breakpoint"); 1740 } 1741 #endif 1742 1743 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1744 MacroAssembler _masm(&cbuf); 1745 __ int3(); 1746 } 1747 1748 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1749 return MachNode::size(ra_); 1750 } 1751 1752 %} 1753 1754 encode %{ 1755 1756 enc_class call_epilog %{ 1757 if (VerifyStackAtCalls) { 1758 // Check that stack depth is unchanged: find majik cookie on stack 1759 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1760 MacroAssembler _masm(&cbuf); 1761 Label L; 1762 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1763 __ jccb(Assembler::equal, L); 1764 // Die if stack mismatch 1765 __ int3(); 1766 __ bind(L); 1767 } 1768 %} 1769 1770 %} 1771 1772 1773 //----------OPERANDS----------------------------------------------------------- 1774 // Operand definitions must precede instruction definitions for correct parsing 1775 // in the ADLC because operands constitute user defined types which are used in 1776 // instruction definitions. 1777 1778 // This one generically applies only for evex, so only one version 1779 operand vecZ() %{ 1780 constraint(ALLOC_IN_RC(vectorz_reg)); 1781 match(VecZ); 1782 1783 format %{ %} 1784 interface(REG_INTER); 1785 %} 1786 1787 // Comparison Code for FP conditional move 1788 operand cmpOp_vcmppd() %{ 1789 match(Bool); 1790 1791 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1792 n->as_Bool()->_test._test != BoolTest::no_overflow); 1793 format %{ "" %} 1794 interface(COND_INTER) %{ 1795 equal (0x0, "eq"); 1796 less (0x1, "lt"); 1797 less_equal (0x2, "le"); 1798 not_equal (0xC, "ne"); 1799 greater_equal(0xD, "ge"); 1800 greater (0xE, "gt"); 1801 //TODO cannot compile (adlc breaks) without two next lines with error: 1802 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1803 // equal' for overflow. 1804 overflow (0x20, "o"); // not really supported by the instruction 1805 no_overflow (0x21, "no"); // not really supported by the instruction 1806 %} 1807 %} 1808 1809 1810 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1811 1812 // ============================================================================ 1813 1814 instruct ShouldNotReachHere() %{ 1815 match(Halt); 1816 format %{ "ud2\t# ShouldNotReachHere" %} 1817 ins_encode %{ 1818 __ ud2(); 1819 %} 1820 ins_pipe(pipe_slow); 1821 %} 1822 1823 // =================================EVEX special=============================== 1824 1825 instruct setMask(rRegI dst, rRegI src) %{ 1826 predicate(Matcher::has_predicated_vectors()); 1827 match(Set dst (SetVectMaskI src)); 1828 effect(TEMP dst); 1829 format %{ "setvectmask $dst, $src" %} 1830 ins_encode %{ 1831 __ setvectmask($dst$$Register, $src$$Register); 1832 %} 1833 ins_pipe(pipe_slow); 1834 %} 1835 1836 // ============================================================================ 1837 1838 instruct addF_reg(regF dst, regF src) %{ 1839 predicate((UseSSE>=1) && (UseAVX == 0)); 1840 match(Set dst (AddF dst src)); 1841 1842 format %{ "addss $dst, $src" %} 1843 ins_cost(150); 1844 ins_encode %{ 1845 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1846 %} 1847 ins_pipe(pipe_slow); 1848 %} 1849 1850 instruct addF_mem(regF dst, memory src) %{ 1851 predicate((UseSSE>=1) && (UseAVX == 0)); 1852 match(Set dst (AddF dst (LoadF src))); 1853 1854 format %{ "addss $dst, $src" %} 1855 ins_cost(150); 1856 ins_encode %{ 1857 __ addss($dst$$XMMRegister, $src$$Address); 1858 %} 1859 ins_pipe(pipe_slow); 1860 %} 1861 1862 instruct addF_imm(regF dst, immF con) %{ 1863 predicate((UseSSE>=1) && (UseAVX == 0)); 1864 match(Set dst (AddF dst con)); 1865 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1866 ins_cost(150); 1867 ins_encode %{ 1868 __ addss($dst$$XMMRegister, $constantaddress($con)); 1869 %} 1870 ins_pipe(pipe_slow); 1871 %} 1872 1873 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1874 predicate(UseAVX > 0); 1875 match(Set dst (AddF src1 src2)); 1876 1877 format %{ "vaddss $dst, $src1, $src2" %} 1878 ins_cost(150); 1879 ins_encode %{ 1880 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1881 %} 1882 ins_pipe(pipe_slow); 1883 %} 1884 1885 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1886 predicate(UseAVX > 0); 1887 match(Set dst (AddF src1 (LoadF src2))); 1888 1889 format %{ "vaddss $dst, $src1, $src2" %} 1890 ins_cost(150); 1891 ins_encode %{ 1892 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1893 %} 1894 ins_pipe(pipe_slow); 1895 %} 1896 1897 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1898 predicate(UseAVX > 0); 1899 match(Set dst (AddF src con)); 1900 1901 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1902 ins_cost(150); 1903 ins_encode %{ 1904 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1905 %} 1906 ins_pipe(pipe_slow); 1907 %} 1908 1909 instruct addD_reg(regD dst, regD src) %{ 1910 predicate((UseSSE>=2) && (UseAVX == 0)); 1911 match(Set dst (AddD dst src)); 1912 1913 format %{ "addsd $dst, $src" %} 1914 ins_cost(150); 1915 ins_encode %{ 1916 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1917 %} 1918 ins_pipe(pipe_slow); 1919 %} 1920 1921 instruct addD_mem(regD dst, memory src) %{ 1922 predicate((UseSSE>=2) && (UseAVX == 0)); 1923 match(Set dst (AddD dst (LoadD src))); 1924 1925 format %{ "addsd $dst, $src" %} 1926 ins_cost(150); 1927 ins_encode %{ 1928 __ addsd($dst$$XMMRegister, $src$$Address); 1929 %} 1930 ins_pipe(pipe_slow); 1931 %} 1932 1933 instruct addD_imm(regD dst, immD con) %{ 1934 predicate((UseSSE>=2) && (UseAVX == 0)); 1935 match(Set dst (AddD dst con)); 1936 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1937 ins_cost(150); 1938 ins_encode %{ 1939 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1940 %} 1941 ins_pipe(pipe_slow); 1942 %} 1943 1944 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1945 predicate(UseAVX > 0); 1946 match(Set dst (AddD src1 src2)); 1947 1948 format %{ "vaddsd $dst, $src1, $src2" %} 1949 ins_cost(150); 1950 ins_encode %{ 1951 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1952 %} 1953 ins_pipe(pipe_slow); 1954 %} 1955 1956 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1957 predicate(UseAVX > 0); 1958 match(Set dst (AddD src1 (LoadD src2))); 1959 1960 format %{ "vaddsd $dst, $src1, $src2" %} 1961 ins_cost(150); 1962 ins_encode %{ 1963 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1964 %} 1965 ins_pipe(pipe_slow); 1966 %} 1967 1968 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1969 predicate(UseAVX > 0); 1970 match(Set dst (AddD src con)); 1971 1972 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1973 ins_cost(150); 1974 ins_encode %{ 1975 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1976 %} 1977 ins_pipe(pipe_slow); 1978 %} 1979 1980 instruct subF_reg(regF dst, regF src) %{ 1981 predicate((UseSSE>=1) && (UseAVX == 0)); 1982 match(Set dst (SubF dst src)); 1983 1984 format %{ "subss $dst, $src" %} 1985 ins_cost(150); 1986 ins_encode %{ 1987 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1988 %} 1989 ins_pipe(pipe_slow); 1990 %} 1991 1992 instruct subF_mem(regF dst, memory src) %{ 1993 predicate((UseSSE>=1) && (UseAVX == 0)); 1994 match(Set dst (SubF dst (LoadF src))); 1995 1996 format %{ "subss $dst, $src" %} 1997 ins_cost(150); 1998 ins_encode %{ 1999 __ subss($dst$$XMMRegister, $src$$Address); 2000 %} 2001 ins_pipe(pipe_slow); 2002 %} 2003 2004 instruct subF_imm(regF dst, immF con) %{ 2005 predicate((UseSSE>=1) && (UseAVX == 0)); 2006 match(Set dst (SubF dst con)); 2007 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2008 ins_cost(150); 2009 ins_encode %{ 2010 __ subss($dst$$XMMRegister, $constantaddress($con)); 2011 %} 2012 ins_pipe(pipe_slow); 2013 %} 2014 2015 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2016 predicate(UseAVX > 0); 2017 match(Set dst (SubF src1 src2)); 2018 2019 format %{ "vsubss $dst, $src1, $src2" %} 2020 ins_cost(150); 2021 ins_encode %{ 2022 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2023 %} 2024 ins_pipe(pipe_slow); 2025 %} 2026 2027 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2028 predicate(UseAVX > 0); 2029 match(Set dst (SubF src1 (LoadF src2))); 2030 2031 format %{ "vsubss $dst, $src1, $src2" %} 2032 ins_cost(150); 2033 ins_encode %{ 2034 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2035 %} 2036 ins_pipe(pipe_slow); 2037 %} 2038 2039 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2040 predicate(UseAVX > 0); 2041 match(Set dst (SubF src con)); 2042 2043 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2044 ins_cost(150); 2045 ins_encode %{ 2046 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2047 %} 2048 ins_pipe(pipe_slow); 2049 %} 2050 2051 instruct subD_reg(regD dst, regD src) %{ 2052 predicate((UseSSE>=2) && (UseAVX == 0)); 2053 match(Set dst (SubD dst src)); 2054 2055 format %{ "subsd $dst, $src" %} 2056 ins_cost(150); 2057 ins_encode %{ 2058 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2059 %} 2060 ins_pipe(pipe_slow); 2061 %} 2062 2063 instruct subD_mem(regD dst, memory src) %{ 2064 predicate((UseSSE>=2) && (UseAVX == 0)); 2065 match(Set dst (SubD dst (LoadD src))); 2066 2067 format %{ "subsd $dst, $src" %} 2068 ins_cost(150); 2069 ins_encode %{ 2070 __ subsd($dst$$XMMRegister, $src$$Address); 2071 %} 2072 ins_pipe(pipe_slow); 2073 %} 2074 2075 instruct subD_imm(regD dst, immD con) %{ 2076 predicate((UseSSE>=2) && (UseAVX == 0)); 2077 match(Set dst (SubD dst con)); 2078 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2079 ins_cost(150); 2080 ins_encode %{ 2081 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2082 %} 2083 ins_pipe(pipe_slow); 2084 %} 2085 2086 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2087 predicate(UseAVX > 0); 2088 match(Set dst (SubD src1 src2)); 2089 2090 format %{ "vsubsd $dst, $src1, $src2" %} 2091 ins_cost(150); 2092 ins_encode %{ 2093 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2094 %} 2095 ins_pipe(pipe_slow); 2096 %} 2097 2098 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2099 predicate(UseAVX > 0); 2100 match(Set dst (SubD src1 (LoadD src2))); 2101 2102 format %{ "vsubsd $dst, $src1, $src2" %} 2103 ins_cost(150); 2104 ins_encode %{ 2105 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2106 %} 2107 ins_pipe(pipe_slow); 2108 %} 2109 2110 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2111 predicate(UseAVX > 0); 2112 match(Set dst (SubD src con)); 2113 2114 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2115 ins_cost(150); 2116 ins_encode %{ 2117 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2118 %} 2119 ins_pipe(pipe_slow); 2120 %} 2121 2122 instruct mulF_reg(regF dst, regF src) %{ 2123 predicate((UseSSE>=1) && (UseAVX == 0)); 2124 match(Set dst (MulF dst src)); 2125 2126 format %{ "mulss $dst, $src" %} 2127 ins_cost(150); 2128 ins_encode %{ 2129 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2130 %} 2131 ins_pipe(pipe_slow); 2132 %} 2133 2134 instruct mulF_mem(regF dst, memory src) %{ 2135 predicate((UseSSE>=1) && (UseAVX == 0)); 2136 match(Set dst (MulF dst (LoadF src))); 2137 2138 format %{ "mulss $dst, $src" %} 2139 ins_cost(150); 2140 ins_encode %{ 2141 __ mulss($dst$$XMMRegister, $src$$Address); 2142 %} 2143 ins_pipe(pipe_slow); 2144 %} 2145 2146 instruct mulF_imm(regF dst, immF con) %{ 2147 predicate((UseSSE>=1) && (UseAVX == 0)); 2148 match(Set dst (MulF dst con)); 2149 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2150 ins_cost(150); 2151 ins_encode %{ 2152 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2153 %} 2154 ins_pipe(pipe_slow); 2155 %} 2156 2157 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2158 predicate(UseAVX > 0); 2159 match(Set dst (MulF src1 src2)); 2160 2161 format %{ "vmulss $dst, $src1, $src2" %} 2162 ins_cost(150); 2163 ins_encode %{ 2164 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2165 %} 2166 ins_pipe(pipe_slow); 2167 %} 2168 2169 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2170 predicate(UseAVX > 0); 2171 match(Set dst (MulF src1 (LoadF src2))); 2172 2173 format %{ "vmulss $dst, $src1, $src2" %} 2174 ins_cost(150); 2175 ins_encode %{ 2176 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2177 %} 2178 ins_pipe(pipe_slow); 2179 %} 2180 2181 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2182 predicate(UseAVX > 0); 2183 match(Set dst (MulF src con)); 2184 2185 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2186 ins_cost(150); 2187 ins_encode %{ 2188 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2189 %} 2190 ins_pipe(pipe_slow); 2191 %} 2192 2193 instruct mulD_reg(regD dst, regD src) %{ 2194 predicate((UseSSE>=2) && (UseAVX == 0)); 2195 match(Set dst (MulD dst src)); 2196 2197 format %{ "mulsd $dst, $src" %} 2198 ins_cost(150); 2199 ins_encode %{ 2200 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2201 %} 2202 ins_pipe(pipe_slow); 2203 %} 2204 2205 instruct mulD_mem(regD dst, memory src) %{ 2206 predicate((UseSSE>=2) && (UseAVX == 0)); 2207 match(Set dst (MulD dst (LoadD src))); 2208 2209 format %{ "mulsd $dst, $src" %} 2210 ins_cost(150); 2211 ins_encode %{ 2212 __ mulsd($dst$$XMMRegister, $src$$Address); 2213 %} 2214 ins_pipe(pipe_slow); 2215 %} 2216 2217 instruct mulD_imm(regD dst, immD con) %{ 2218 predicate((UseSSE>=2) && (UseAVX == 0)); 2219 match(Set dst (MulD dst con)); 2220 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2221 ins_cost(150); 2222 ins_encode %{ 2223 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2224 %} 2225 ins_pipe(pipe_slow); 2226 %} 2227 2228 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2229 predicate(UseAVX > 0); 2230 match(Set dst (MulD src1 src2)); 2231 2232 format %{ "vmulsd $dst, $src1, $src2" %} 2233 ins_cost(150); 2234 ins_encode %{ 2235 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2236 %} 2237 ins_pipe(pipe_slow); 2238 %} 2239 2240 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2241 predicate(UseAVX > 0); 2242 match(Set dst (MulD src1 (LoadD src2))); 2243 2244 format %{ "vmulsd $dst, $src1, $src2" %} 2245 ins_cost(150); 2246 ins_encode %{ 2247 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2248 %} 2249 ins_pipe(pipe_slow); 2250 %} 2251 2252 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2253 predicate(UseAVX > 0); 2254 match(Set dst (MulD src con)); 2255 2256 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2257 ins_cost(150); 2258 ins_encode %{ 2259 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2260 %} 2261 ins_pipe(pipe_slow); 2262 %} 2263 2264 instruct divF_reg(regF dst, regF src) %{ 2265 predicate((UseSSE>=1) && (UseAVX == 0)); 2266 match(Set dst (DivF dst src)); 2267 2268 format %{ "divss $dst, $src" %} 2269 ins_cost(150); 2270 ins_encode %{ 2271 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2272 %} 2273 ins_pipe(pipe_slow); 2274 %} 2275 2276 instruct divF_mem(regF dst, memory src) %{ 2277 predicate((UseSSE>=1) && (UseAVX == 0)); 2278 match(Set dst (DivF dst (LoadF src))); 2279 2280 format %{ "divss $dst, $src" %} 2281 ins_cost(150); 2282 ins_encode %{ 2283 __ divss($dst$$XMMRegister, $src$$Address); 2284 %} 2285 ins_pipe(pipe_slow); 2286 %} 2287 2288 instruct divF_imm(regF dst, immF con) %{ 2289 predicate((UseSSE>=1) && (UseAVX == 0)); 2290 match(Set dst (DivF dst con)); 2291 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2292 ins_cost(150); 2293 ins_encode %{ 2294 __ divss($dst$$XMMRegister, $constantaddress($con)); 2295 %} 2296 ins_pipe(pipe_slow); 2297 %} 2298 2299 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2300 predicate(UseAVX > 0); 2301 match(Set dst (DivF src1 src2)); 2302 2303 format %{ "vdivss $dst, $src1, $src2" %} 2304 ins_cost(150); 2305 ins_encode %{ 2306 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2307 %} 2308 ins_pipe(pipe_slow); 2309 %} 2310 2311 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2312 predicate(UseAVX > 0); 2313 match(Set dst (DivF src1 (LoadF src2))); 2314 2315 format %{ "vdivss $dst, $src1, $src2" %} 2316 ins_cost(150); 2317 ins_encode %{ 2318 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2319 %} 2320 ins_pipe(pipe_slow); 2321 %} 2322 2323 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2324 predicate(UseAVX > 0); 2325 match(Set dst (DivF src con)); 2326 2327 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2328 ins_cost(150); 2329 ins_encode %{ 2330 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2331 %} 2332 ins_pipe(pipe_slow); 2333 %} 2334 2335 instruct divD_reg(regD dst, regD src) %{ 2336 predicate((UseSSE>=2) && (UseAVX == 0)); 2337 match(Set dst (DivD dst src)); 2338 2339 format %{ "divsd $dst, $src" %} 2340 ins_cost(150); 2341 ins_encode %{ 2342 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2343 %} 2344 ins_pipe(pipe_slow); 2345 %} 2346 2347 instruct divD_mem(regD dst, memory src) %{ 2348 predicate((UseSSE>=2) && (UseAVX == 0)); 2349 match(Set dst (DivD dst (LoadD src))); 2350 2351 format %{ "divsd $dst, $src" %} 2352 ins_cost(150); 2353 ins_encode %{ 2354 __ divsd($dst$$XMMRegister, $src$$Address); 2355 %} 2356 ins_pipe(pipe_slow); 2357 %} 2358 2359 instruct divD_imm(regD dst, immD con) %{ 2360 predicate((UseSSE>=2) && (UseAVX == 0)); 2361 match(Set dst (DivD dst con)); 2362 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2363 ins_cost(150); 2364 ins_encode %{ 2365 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2366 %} 2367 ins_pipe(pipe_slow); 2368 %} 2369 2370 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2371 predicate(UseAVX > 0); 2372 match(Set dst (DivD src1 src2)); 2373 2374 format %{ "vdivsd $dst, $src1, $src2" %} 2375 ins_cost(150); 2376 ins_encode %{ 2377 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2378 %} 2379 ins_pipe(pipe_slow); 2380 %} 2381 2382 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2383 predicate(UseAVX > 0); 2384 match(Set dst (DivD src1 (LoadD src2))); 2385 2386 format %{ "vdivsd $dst, $src1, $src2" %} 2387 ins_cost(150); 2388 ins_encode %{ 2389 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2390 %} 2391 ins_pipe(pipe_slow); 2392 %} 2393 2394 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2395 predicate(UseAVX > 0); 2396 match(Set dst (DivD src con)); 2397 2398 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2399 ins_cost(150); 2400 ins_encode %{ 2401 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2402 %} 2403 ins_pipe(pipe_slow); 2404 %} 2405 2406 instruct absF_reg(regF dst) %{ 2407 predicate((UseSSE>=1) && (UseAVX == 0)); 2408 match(Set dst (AbsF dst)); 2409 ins_cost(150); 2410 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2411 ins_encode %{ 2412 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2413 %} 2414 ins_pipe(pipe_slow); 2415 %} 2416 2417 instruct absF_reg_reg(regF dst, regF src) %{ 2418 predicate(VM_Version::supports_avxonly()); 2419 match(Set dst (AbsF src)); 2420 ins_cost(150); 2421 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2422 ins_encode %{ 2423 int vector_len = 0; 2424 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2425 ExternalAddress(float_signmask()), vector_len); 2426 %} 2427 ins_pipe(pipe_slow); 2428 %} 2429 2430 #ifdef _LP64 2431 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2432 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2433 match(Set dst (AbsF src)); 2434 ins_cost(150); 2435 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2436 ins_encode %{ 2437 int vector_len = 0; 2438 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2439 ExternalAddress(float_signmask()), vector_len); 2440 %} 2441 ins_pipe(pipe_slow); 2442 %} 2443 2444 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2445 predicate(VM_Version::supports_avx512novl()); 2446 match(Set dst (AbsF src1)); 2447 effect(TEMP src2); 2448 ins_cost(150); 2449 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2450 ins_encode %{ 2451 int vector_len = 0; 2452 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2453 ExternalAddress(float_signmask()), vector_len); 2454 %} 2455 ins_pipe(pipe_slow); 2456 %} 2457 #else // _LP64 2458 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2459 predicate(UseAVX > 2); 2460 match(Set dst (AbsF src)); 2461 ins_cost(150); 2462 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2463 ins_encode %{ 2464 int vector_len = 0; 2465 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2466 ExternalAddress(float_signmask()), vector_len); 2467 %} 2468 ins_pipe(pipe_slow); 2469 %} 2470 #endif 2471 2472 instruct absD_reg(regD dst) %{ 2473 predicate((UseSSE>=2) && (UseAVX == 0)); 2474 match(Set dst (AbsD dst)); 2475 ins_cost(150); 2476 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2477 "# abs double by sign masking" %} 2478 ins_encode %{ 2479 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2480 %} 2481 ins_pipe(pipe_slow); 2482 %} 2483 2484 instruct absD_reg_reg(regD dst, regD src) %{ 2485 predicate(VM_Version::supports_avxonly()); 2486 match(Set dst (AbsD src)); 2487 ins_cost(150); 2488 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2489 "# abs double by sign masking" %} 2490 ins_encode %{ 2491 int vector_len = 0; 2492 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2493 ExternalAddress(double_signmask()), vector_len); 2494 %} 2495 ins_pipe(pipe_slow); 2496 %} 2497 2498 #ifdef _LP64 2499 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2500 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2501 match(Set dst (AbsD src)); 2502 ins_cost(150); 2503 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2504 "# abs double by sign masking" %} 2505 ins_encode %{ 2506 int vector_len = 0; 2507 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2508 ExternalAddress(double_signmask()), vector_len); 2509 %} 2510 ins_pipe(pipe_slow); 2511 %} 2512 2513 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2514 predicate(VM_Version::supports_avx512novl()); 2515 match(Set dst (AbsD src1)); 2516 effect(TEMP src2); 2517 ins_cost(150); 2518 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2519 ins_encode %{ 2520 int vector_len = 0; 2521 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2522 ExternalAddress(double_signmask()), vector_len); 2523 %} 2524 ins_pipe(pipe_slow); 2525 %} 2526 #else // _LP64 2527 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2528 predicate(UseAVX > 2); 2529 match(Set dst (AbsD src)); 2530 ins_cost(150); 2531 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2532 "# abs double by sign masking" %} 2533 ins_encode %{ 2534 int vector_len = 0; 2535 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2536 ExternalAddress(double_signmask()), vector_len); 2537 %} 2538 ins_pipe(pipe_slow); 2539 %} 2540 #endif 2541 2542 instruct negF_reg(regF dst) %{ 2543 predicate((UseSSE>=1) && (UseAVX == 0)); 2544 match(Set dst (NegF dst)); 2545 ins_cost(150); 2546 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2547 ins_encode %{ 2548 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2549 %} 2550 ins_pipe(pipe_slow); 2551 %} 2552 2553 instruct negF_reg_reg(regF dst, regF src) %{ 2554 predicate(UseAVX > 0); 2555 match(Set dst (NegF src)); 2556 ins_cost(150); 2557 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2558 ins_encode %{ 2559 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2560 ExternalAddress(float_signflip())); 2561 %} 2562 ins_pipe(pipe_slow); 2563 %} 2564 2565 instruct negD_reg(regD dst) %{ 2566 predicate((UseSSE>=2) && (UseAVX == 0)); 2567 match(Set dst (NegD dst)); 2568 ins_cost(150); 2569 format %{ "xorpd $dst, [0x8000000000000000]\t" 2570 "# neg double by sign flipping" %} 2571 ins_encode %{ 2572 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2573 %} 2574 ins_pipe(pipe_slow); 2575 %} 2576 2577 instruct negD_reg_reg(regD dst, regD src) %{ 2578 predicate(UseAVX > 0); 2579 match(Set dst (NegD src)); 2580 ins_cost(150); 2581 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2582 "# neg double by sign flipping" %} 2583 ins_encode %{ 2584 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2585 ExternalAddress(double_signflip())); 2586 %} 2587 ins_pipe(pipe_slow); 2588 %} 2589 2590 instruct sqrtF_reg(regF dst, regF src) %{ 2591 predicate(UseSSE>=1); 2592 match(Set dst (SqrtF src)); 2593 2594 format %{ "sqrtss $dst, $src" %} 2595 ins_cost(150); 2596 ins_encode %{ 2597 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2598 %} 2599 ins_pipe(pipe_slow); 2600 %} 2601 2602 instruct sqrtF_mem(regF dst, memory src) %{ 2603 predicate(UseSSE>=1); 2604 match(Set dst (SqrtF (LoadF src))); 2605 2606 format %{ "sqrtss $dst, $src" %} 2607 ins_cost(150); 2608 ins_encode %{ 2609 __ sqrtss($dst$$XMMRegister, $src$$Address); 2610 %} 2611 ins_pipe(pipe_slow); 2612 %} 2613 2614 instruct sqrtF_imm(regF dst, immF con) %{ 2615 predicate(UseSSE>=1); 2616 match(Set dst (SqrtF con)); 2617 2618 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2619 ins_cost(150); 2620 ins_encode %{ 2621 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2622 %} 2623 ins_pipe(pipe_slow); 2624 %} 2625 2626 instruct sqrtD_reg(regD dst, regD src) %{ 2627 predicate(UseSSE>=2); 2628 match(Set dst (SqrtD src)); 2629 2630 format %{ "sqrtsd $dst, $src" %} 2631 ins_cost(150); 2632 ins_encode %{ 2633 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2634 %} 2635 ins_pipe(pipe_slow); 2636 %} 2637 2638 instruct sqrtD_mem(regD dst, memory src) %{ 2639 predicate(UseSSE>=2); 2640 match(Set dst (SqrtD (LoadD src))); 2641 2642 format %{ "sqrtsd $dst, $src" %} 2643 ins_cost(150); 2644 ins_encode %{ 2645 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2646 %} 2647 ins_pipe(pipe_slow); 2648 %} 2649 2650 instruct sqrtD_imm(regD dst, immD con) %{ 2651 predicate(UseSSE>=2); 2652 match(Set dst (SqrtD con)); 2653 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2654 ins_cost(150); 2655 ins_encode %{ 2656 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2657 %} 2658 ins_pipe(pipe_slow); 2659 %} 2660 2661 instruct onspinwait() %{ 2662 match(OnSpinWait); 2663 ins_cost(200); 2664 2665 format %{ 2666 $$template 2667 if (os::is_MP()) { 2668 $$emit$$"pause\t! membar_onspinwait" 2669 } else { 2670 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2671 } 2672 %} 2673 ins_encode %{ 2674 __ pause(); 2675 %} 2676 ins_pipe(pipe_slow); 2677 %} 2678 2679 // a * b + c 2680 instruct fmaD_reg(regD a, regD b, regD c) %{ 2681 predicate(UseFMA); 2682 match(Set c (FmaD c (Binary a b))); 2683 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2684 ins_cost(150); 2685 ins_encode %{ 2686 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2687 %} 2688 ins_pipe( pipe_slow ); 2689 %} 2690 2691 // a * b + c 2692 instruct fmaF_reg(regF a, regF b, regF c) %{ 2693 predicate(UseFMA); 2694 match(Set c (FmaF c (Binary a b))); 2695 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2696 ins_cost(150); 2697 ins_encode %{ 2698 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2699 %} 2700 ins_pipe( pipe_slow ); 2701 %} 2702 2703 // ====================VECTOR INSTRUCTIONS===================================== 2704 2705 // Load vectors (4 bytes long) 2706 instruct loadV4(vecS dst, memory mem) %{ 2707 predicate(n->as_LoadVector()->memory_size() == 4); 2708 match(Set dst (LoadVector mem)); 2709 ins_cost(125); 2710 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2711 ins_encode %{ 2712 __ movdl($dst$$XMMRegister, $mem$$Address); 2713 %} 2714 ins_pipe( pipe_slow ); 2715 %} 2716 2717 // Load vectors (8 bytes long) 2718 instruct loadV8(vecD dst, memory mem) %{ 2719 predicate(n->as_LoadVector()->memory_size() == 8); 2720 match(Set dst (LoadVector mem)); 2721 ins_cost(125); 2722 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2723 ins_encode %{ 2724 __ movq($dst$$XMMRegister, $mem$$Address); 2725 %} 2726 ins_pipe( pipe_slow ); 2727 %} 2728 2729 // Load vectors (16 bytes long) 2730 instruct loadV16(vecX dst, memory mem) %{ 2731 predicate(n->as_LoadVector()->memory_size() == 16); 2732 match(Set dst (LoadVector mem)); 2733 ins_cost(125); 2734 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2735 ins_encode %{ 2736 __ movdqu($dst$$XMMRegister, $mem$$Address); 2737 %} 2738 ins_pipe( pipe_slow ); 2739 %} 2740 2741 // Load vectors (32 bytes long) 2742 instruct loadV32(vecY dst, memory mem) %{ 2743 predicate(n->as_LoadVector()->memory_size() == 32); 2744 match(Set dst (LoadVector mem)); 2745 ins_cost(125); 2746 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2747 ins_encode %{ 2748 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2749 %} 2750 ins_pipe( pipe_slow ); 2751 %} 2752 2753 // Load vectors (64 bytes long) 2754 instruct loadV64_dword(vecZ dst, memory mem) %{ 2755 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2756 match(Set dst (LoadVector mem)); 2757 ins_cost(125); 2758 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2759 ins_encode %{ 2760 int vector_len = 2; 2761 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2762 %} 2763 ins_pipe( pipe_slow ); 2764 %} 2765 2766 // Load vectors (64 bytes long) 2767 instruct loadV64_qword(vecZ dst, memory mem) %{ 2768 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 2769 match(Set dst (LoadVector mem)); 2770 ins_cost(125); 2771 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 2772 ins_encode %{ 2773 int vector_len = 2; 2774 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 2775 %} 2776 ins_pipe( pipe_slow ); 2777 %} 2778 2779 // Store vectors 2780 instruct storeV4(memory mem, vecS src) %{ 2781 predicate(n->as_StoreVector()->memory_size() == 4); 2782 match(Set mem (StoreVector mem src)); 2783 ins_cost(145); 2784 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2785 ins_encode %{ 2786 __ movdl($mem$$Address, $src$$XMMRegister); 2787 %} 2788 ins_pipe( pipe_slow ); 2789 %} 2790 2791 instruct storeV8(memory mem, vecD src) %{ 2792 predicate(n->as_StoreVector()->memory_size() == 8); 2793 match(Set mem (StoreVector mem src)); 2794 ins_cost(145); 2795 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2796 ins_encode %{ 2797 __ movq($mem$$Address, $src$$XMMRegister); 2798 %} 2799 ins_pipe( pipe_slow ); 2800 %} 2801 2802 instruct storeV16(memory mem, vecX src) %{ 2803 predicate(n->as_StoreVector()->memory_size() == 16); 2804 match(Set mem (StoreVector mem src)); 2805 ins_cost(145); 2806 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2807 ins_encode %{ 2808 __ movdqu($mem$$Address, $src$$XMMRegister); 2809 %} 2810 ins_pipe( pipe_slow ); 2811 %} 2812 2813 instruct storeV32(memory mem, vecY src) %{ 2814 predicate(n->as_StoreVector()->memory_size() == 32); 2815 match(Set mem (StoreVector mem src)); 2816 ins_cost(145); 2817 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2818 ins_encode %{ 2819 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2820 %} 2821 ins_pipe( pipe_slow ); 2822 %} 2823 2824 instruct storeV64_dword(memory mem, vecZ src) %{ 2825 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 2826 match(Set mem (StoreVector mem src)); 2827 ins_cost(145); 2828 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 2829 ins_encode %{ 2830 int vector_len = 2; 2831 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 2832 %} 2833 ins_pipe( pipe_slow ); 2834 %} 2835 2836 instruct storeV64_qword(memory mem, vecZ src) %{ 2837 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 2838 match(Set mem (StoreVector mem src)); 2839 ins_cost(145); 2840 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 2841 ins_encode %{ 2842 int vector_len = 2; 2843 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 2844 %} 2845 ins_pipe( pipe_slow ); 2846 %} 2847 2848 // ====================LEGACY REPLICATE======================================= 2849 2850 instruct Repl4B_mem(vecS dst, memory mem) %{ 2851 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2852 match(Set dst (ReplicateB (LoadB mem))); 2853 format %{ "punpcklbw $dst,$mem\n\t" 2854 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2855 ins_encode %{ 2856 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2857 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2858 %} 2859 ins_pipe( pipe_slow ); 2860 %} 2861 2862 instruct Repl8B_mem(vecD dst, memory mem) %{ 2863 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2864 match(Set dst (ReplicateB (LoadB mem))); 2865 format %{ "punpcklbw $dst,$mem\n\t" 2866 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2867 ins_encode %{ 2868 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2869 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2870 %} 2871 ins_pipe( pipe_slow ); 2872 %} 2873 2874 instruct Repl16B(vecX dst, rRegI src) %{ 2875 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2876 match(Set dst (ReplicateB src)); 2877 format %{ "movd $dst,$src\n\t" 2878 "punpcklbw $dst,$dst\n\t" 2879 "pshuflw $dst,$dst,0x00\n\t" 2880 "punpcklqdq $dst,$dst\t! replicate16B" %} 2881 ins_encode %{ 2882 __ movdl($dst$$XMMRegister, $src$$Register); 2883 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2884 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2885 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2886 %} 2887 ins_pipe( pipe_slow ); 2888 %} 2889 2890 instruct Repl16B_mem(vecX dst, memory mem) %{ 2891 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2892 match(Set dst (ReplicateB (LoadB mem))); 2893 format %{ "punpcklbw $dst,$mem\n\t" 2894 "pshuflw $dst,$dst,0x00\n\t" 2895 "punpcklqdq $dst,$dst\t! replicate16B" %} 2896 ins_encode %{ 2897 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2898 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2899 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2900 %} 2901 ins_pipe( pipe_slow ); 2902 %} 2903 2904 instruct Repl32B(vecY dst, rRegI src) %{ 2905 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2906 match(Set dst (ReplicateB src)); 2907 format %{ "movd $dst,$src\n\t" 2908 "punpcklbw $dst,$dst\n\t" 2909 "pshuflw $dst,$dst,0x00\n\t" 2910 "punpcklqdq $dst,$dst\n\t" 2911 "vinserti128_high $dst,$dst\t! replicate32B" %} 2912 ins_encode %{ 2913 __ movdl($dst$$XMMRegister, $src$$Register); 2914 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2915 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2916 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2917 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2918 %} 2919 ins_pipe( pipe_slow ); 2920 %} 2921 2922 instruct Repl32B_mem(vecY dst, memory mem) %{ 2923 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2924 match(Set dst (ReplicateB (LoadB mem))); 2925 format %{ "punpcklbw $dst,$mem\n\t" 2926 "pshuflw $dst,$dst,0x00\n\t" 2927 "punpcklqdq $dst,$dst\n\t" 2928 "vinserti128_high $dst,$dst\t! replicate32B" %} 2929 ins_encode %{ 2930 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2931 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2932 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2933 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2934 %} 2935 ins_pipe( pipe_slow ); 2936 %} 2937 2938 instruct Repl16B_imm(vecX dst, immI con) %{ 2939 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2940 match(Set dst (ReplicateB con)); 2941 format %{ "movq $dst,[$constantaddress]\n\t" 2942 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2943 ins_encode %{ 2944 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2945 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2946 %} 2947 ins_pipe( pipe_slow ); 2948 %} 2949 2950 instruct Repl32B_imm(vecY dst, immI con) %{ 2951 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2952 match(Set dst (ReplicateB con)); 2953 format %{ "movq $dst,[$constantaddress]\n\t" 2954 "punpcklqdq $dst,$dst\n\t" 2955 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 2956 ins_encode %{ 2957 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2958 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2959 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2960 %} 2961 ins_pipe( pipe_slow ); 2962 %} 2963 2964 instruct Repl4S(vecD dst, rRegI src) %{ 2965 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 2966 match(Set dst (ReplicateS src)); 2967 format %{ "movd $dst,$src\n\t" 2968 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 2969 ins_encode %{ 2970 __ movdl($dst$$XMMRegister, $src$$Register); 2971 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2972 %} 2973 ins_pipe( pipe_slow ); 2974 %} 2975 2976 instruct Repl4S_mem(vecD dst, memory mem) %{ 2977 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2978 match(Set dst (ReplicateS (LoadS mem))); 2979 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 2980 ins_encode %{ 2981 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 2982 %} 2983 ins_pipe( pipe_slow ); 2984 %} 2985 2986 instruct Repl8S(vecX dst, rRegI src) %{ 2987 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 2988 match(Set dst (ReplicateS src)); 2989 format %{ "movd $dst,$src\n\t" 2990 "pshuflw $dst,$dst,0x00\n\t" 2991 "punpcklqdq $dst,$dst\t! replicate8S" %} 2992 ins_encode %{ 2993 __ movdl($dst$$XMMRegister, $src$$Register); 2994 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2995 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2996 %} 2997 ins_pipe( pipe_slow ); 2998 %} 2999 3000 instruct Repl8S_mem(vecX dst, memory mem) %{ 3001 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3002 match(Set dst (ReplicateS (LoadS mem))); 3003 format %{ "pshuflw $dst,$mem,0x00\n\t" 3004 "punpcklqdq $dst,$dst\t! replicate8S" %} 3005 ins_encode %{ 3006 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3007 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3008 %} 3009 ins_pipe( pipe_slow ); 3010 %} 3011 3012 instruct Repl8S_imm(vecX dst, immI con) %{ 3013 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3014 match(Set dst (ReplicateS con)); 3015 format %{ "movq $dst,[$constantaddress]\n\t" 3016 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3017 ins_encode %{ 3018 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3019 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3020 %} 3021 ins_pipe( pipe_slow ); 3022 %} 3023 3024 instruct Repl16S(vecY dst, rRegI src) %{ 3025 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3026 match(Set dst (ReplicateS src)); 3027 format %{ "movd $dst,$src\n\t" 3028 "pshuflw $dst,$dst,0x00\n\t" 3029 "punpcklqdq $dst,$dst\n\t" 3030 "vinserti128_high $dst,$dst\t! replicate16S" %} 3031 ins_encode %{ 3032 __ movdl($dst$$XMMRegister, $src$$Register); 3033 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3034 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3035 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3036 %} 3037 ins_pipe( pipe_slow ); 3038 %} 3039 3040 instruct Repl16S_mem(vecY dst, memory mem) %{ 3041 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3042 match(Set dst (ReplicateS (LoadS mem))); 3043 format %{ "pshuflw $dst,$mem,0x00\n\t" 3044 "punpcklqdq $dst,$dst\n\t" 3045 "vinserti128_high $dst,$dst\t! replicate16S" %} 3046 ins_encode %{ 3047 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3048 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3049 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3050 %} 3051 ins_pipe( pipe_slow ); 3052 %} 3053 3054 instruct Repl16S_imm(vecY dst, immI con) %{ 3055 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3056 match(Set dst (ReplicateS con)); 3057 format %{ "movq $dst,[$constantaddress]\n\t" 3058 "punpcklqdq $dst,$dst\n\t" 3059 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3060 ins_encode %{ 3061 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3062 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3063 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3064 %} 3065 ins_pipe( pipe_slow ); 3066 %} 3067 3068 instruct Repl4I(vecX dst, rRegI src) %{ 3069 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3070 match(Set dst (ReplicateI src)); 3071 format %{ "movd $dst,$src\n\t" 3072 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3073 ins_encode %{ 3074 __ movdl($dst$$XMMRegister, $src$$Register); 3075 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3076 %} 3077 ins_pipe( pipe_slow ); 3078 %} 3079 3080 instruct Repl4I_mem(vecX dst, memory mem) %{ 3081 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3082 match(Set dst (ReplicateI (LoadI mem))); 3083 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3084 ins_encode %{ 3085 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3086 %} 3087 ins_pipe( pipe_slow ); 3088 %} 3089 3090 instruct Repl8I(vecY dst, rRegI src) %{ 3091 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3092 match(Set dst (ReplicateI src)); 3093 format %{ "movd $dst,$src\n\t" 3094 "pshufd $dst,$dst,0x00\n\t" 3095 "vinserti128_high $dst,$dst\t! replicate8I" %} 3096 ins_encode %{ 3097 __ movdl($dst$$XMMRegister, $src$$Register); 3098 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3099 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3100 %} 3101 ins_pipe( pipe_slow ); 3102 %} 3103 3104 instruct Repl8I_mem(vecY dst, memory mem) %{ 3105 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3106 match(Set dst (ReplicateI (LoadI mem))); 3107 format %{ "pshufd $dst,$mem,0x00\n\t" 3108 "vinserti128_high $dst,$dst\t! replicate8I" %} 3109 ins_encode %{ 3110 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3111 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3112 %} 3113 ins_pipe( pipe_slow ); 3114 %} 3115 3116 instruct Repl4I_imm(vecX dst, immI con) %{ 3117 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3118 match(Set dst (ReplicateI con)); 3119 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3120 "punpcklqdq $dst,$dst" %} 3121 ins_encode %{ 3122 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3123 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3124 %} 3125 ins_pipe( pipe_slow ); 3126 %} 3127 3128 instruct Repl8I_imm(vecY dst, immI con) %{ 3129 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3130 match(Set dst (ReplicateI con)); 3131 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3132 "punpcklqdq $dst,$dst\n\t" 3133 "vinserti128_high $dst,$dst" %} 3134 ins_encode %{ 3135 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3136 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3137 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3138 %} 3139 ins_pipe( pipe_slow ); 3140 %} 3141 3142 // Long could be loaded into xmm register directly from memory. 3143 instruct Repl2L_mem(vecX dst, memory mem) %{ 3144 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3145 match(Set dst (ReplicateL (LoadL mem))); 3146 format %{ "movq $dst,$mem\n\t" 3147 "punpcklqdq $dst,$dst\t! replicate2L" %} 3148 ins_encode %{ 3149 __ movq($dst$$XMMRegister, $mem$$Address); 3150 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3151 %} 3152 ins_pipe( pipe_slow ); 3153 %} 3154 3155 // Replicate long (8 byte) scalar to be vector 3156 #ifdef _LP64 3157 instruct Repl4L(vecY dst, rRegL src) %{ 3158 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3159 match(Set dst (ReplicateL src)); 3160 format %{ "movdq $dst,$src\n\t" 3161 "punpcklqdq $dst,$dst\n\t" 3162 "vinserti128_high $dst,$dst\t! replicate4L" %} 3163 ins_encode %{ 3164 __ movdq($dst$$XMMRegister, $src$$Register); 3165 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3166 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3167 %} 3168 ins_pipe( pipe_slow ); 3169 %} 3170 #else // _LP64 3171 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3172 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3173 match(Set dst (ReplicateL src)); 3174 effect(TEMP dst, USE src, TEMP tmp); 3175 format %{ "movdl $dst,$src.lo\n\t" 3176 "movdl $tmp,$src.hi\n\t" 3177 "punpckldq $dst,$tmp\n\t" 3178 "punpcklqdq $dst,$dst\n\t" 3179 "vinserti128_high $dst,$dst\t! replicate4L" %} 3180 ins_encode %{ 3181 __ movdl($dst$$XMMRegister, $src$$Register); 3182 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3183 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3184 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3185 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3186 %} 3187 ins_pipe( pipe_slow ); 3188 %} 3189 #endif // _LP64 3190 3191 instruct Repl4L_imm(vecY dst, immL con) %{ 3192 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3193 match(Set dst (ReplicateL con)); 3194 format %{ "movq $dst,[$constantaddress]\n\t" 3195 "punpcklqdq $dst,$dst\n\t" 3196 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3197 ins_encode %{ 3198 __ movq($dst$$XMMRegister, $constantaddress($con)); 3199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3200 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3201 %} 3202 ins_pipe( pipe_slow ); 3203 %} 3204 3205 instruct Repl4L_mem(vecY dst, memory mem) %{ 3206 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3207 match(Set dst (ReplicateL (LoadL mem))); 3208 format %{ "movq $dst,$mem\n\t" 3209 "punpcklqdq $dst,$dst\n\t" 3210 "vinserti128_high $dst,$dst\t! replicate4L" %} 3211 ins_encode %{ 3212 __ movq($dst$$XMMRegister, $mem$$Address); 3213 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3214 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3215 %} 3216 ins_pipe( pipe_slow ); 3217 %} 3218 3219 instruct Repl2F_mem(vecD dst, memory mem) %{ 3220 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3221 match(Set dst (ReplicateF (LoadF mem))); 3222 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3223 ins_encode %{ 3224 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3225 %} 3226 ins_pipe( pipe_slow ); 3227 %} 3228 3229 instruct Repl4F_mem(vecX dst, memory mem) %{ 3230 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3231 match(Set dst (ReplicateF (LoadF mem))); 3232 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3233 ins_encode %{ 3234 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3235 %} 3236 ins_pipe( pipe_slow ); 3237 %} 3238 3239 instruct Repl8F(vecY dst, regF src) %{ 3240 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3241 match(Set dst (ReplicateF src)); 3242 format %{ "pshufd $dst,$src,0x00\n\t" 3243 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3244 ins_encode %{ 3245 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3246 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3247 %} 3248 ins_pipe( pipe_slow ); 3249 %} 3250 3251 instruct Repl8F_mem(vecY dst, memory mem) %{ 3252 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3253 match(Set dst (ReplicateF (LoadF mem))); 3254 format %{ "pshufd $dst,$mem,0x00\n\t" 3255 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3256 ins_encode %{ 3257 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3258 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3259 %} 3260 ins_pipe( pipe_slow ); 3261 %} 3262 3263 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3264 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3265 match(Set dst (ReplicateF zero)); 3266 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3267 ins_encode %{ 3268 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3269 %} 3270 ins_pipe( fpu_reg_reg ); 3271 %} 3272 3273 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3274 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3275 match(Set dst (ReplicateF zero)); 3276 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3277 ins_encode %{ 3278 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3279 %} 3280 ins_pipe( fpu_reg_reg ); 3281 %} 3282 3283 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3284 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3285 match(Set dst (ReplicateF zero)); 3286 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3287 ins_encode %{ 3288 int vector_len = 1; 3289 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3290 %} 3291 ins_pipe( fpu_reg_reg ); 3292 %} 3293 3294 instruct Repl2D_mem(vecX dst, memory mem) %{ 3295 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3296 match(Set dst (ReplicateD (LoadD mem))); 3297 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3298 ins_encode %{ 3299 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3300 %} 3301 ins_pipe( pipe_slow ); 3302 %} 3303 3304 instruct Repl4D(vecY dst, regD src) %{ 3305 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3306 match(Set dst (ReplicateD src)); 3307 format %{ "pshufd $dst,$src,0x44\n\t" 3308 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3309 ins_encode %{ 3310 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3311 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3312 %} 3313 ins_pipe( pipe_slow ); 3314 %} 3315 3316 instruct Repl4D_mem(vecY dst, memory mem) %{ 3317 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3318 match(Set dst (ReplicateD (LoadD mem))); 3319 format %{ "pshufd $dst,$mem,0x44\n\t" 3320 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3321 ins_encode %{ 3322 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3323 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3324 %} 3325 ins_pipe( pipe_slow ); 3326 %} 3327 3328 // Replicate double (8 byte) scalar zero to be vector 3329 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3330 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3331 match(Set dst (ReplicateD zero)); 3332 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3333 ins_encode %{ 3334 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3335 %} 3336 ins_pipe( fpu_reg_reg ); 3337 %} 3338 3339 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3340 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3341 match(Set dst (ReplicateD zero)); 3342 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3343 ins_encode %{ 3344 int vector_len = 1; 3345 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3346 %} 3347 ins_pipe( fpu_reg_reg ); 3348 %} 3349 3350 // ====================GENERIC REPLICATE========================================== 3351 3352 // Replicate byte scalar to be vector 3353 instruct Repl4B(vecS dst, rRegI src) %{ 3354 predicate(n->as_Vector()->length() == 4); 3355 match(Set dst (ReplicateB src)); 3356 format %{ "movd $dst,$src\n\t" 3357 "punpcklbw $dst,$dst\n\t" 3358 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3359 ins_encode %{ 3360 __ movdl($dst$$XMMRegister, $src$$Register); 3361 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3362 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3363 %} 3364 ins_pipe( pipe_slow ); 3365 %} 3366 3367 instruct Repl8B(vecD dst, rRegI src) %{ 3368 predicate(n->as_Vector()->length() == 8); 3369 match(Set dst (ReplicateB src)); 3370 format %{ "movd $dst,$src\n\t" 3371 "punpcklbw $dst,$dst\n\t" 3372 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3373 ins_encode %{ 3374 __ movdl($dst$$XMMRegister, $src$$Register); 3375 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3376 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3377 %} 3378 ins_pipe( pipe_slow ); 3379 %} 3380 3381 // Replicate byte scalar immediate to be vector by loading from const table. 3382 instruct Repl4B_imm(vecS dst, immI con) %{ 3383 predicate(n->as_Vector()->length() == 4); 3384 match(Set dst (ReplicateB con)); 3385 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3386 ins_encode %{ 3387 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3388 %} 3389 ins_pipe( pipe_slow ); 3390 %} 3391 3392 instruct Repl8B_imm(vecD dst, immI con) %{ 3393 predicate(n->as_Vector()->length() == 8); 3394 match(Set dst (ReplicateB con)); 3395 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3396 ins_encode %{ 3397 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3398 %} 3399 ins_pipe( pipe_slow ); 3400 %} 3401 3402 // Replicate byte scalar zero to be vector 3403 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3404 predicate(n->as_Vector()->length() == 4); 3405 match(Set dst (ReplicateB zero)); 3406 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3407 ins_encode %{ 3408 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3409 %} 3410 ins_pipe( fpu_reg_reg ); 3411 %} 3412 3413 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3414 predicate(n->as_Vector()->length() == 8); 3415 match(Set dst (ReplicateB zero)); 3416 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3417 ins_encode %{ 3418 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3419 %} 3420 ins_pipe( fpu_reg_reg ); 3421 %} 3422 3423 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3424 predicate(n->as_Vector()->length() == 16); 3425 match(Set dst (ReplicateB zero)); 3426 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3427 ins_encode %{ 3428 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3429 %} 3430 ins_pipe( fpu_reg_reg ); 3431 %} 3432 3433 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3434 predicate(n->as_Vector()->length() == 32); 3435 match(Set dst (ReplicateB zero)); 3436 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3437 ins_encode %{ 3438 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3439 int vector_len = 1; 3440 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3441 %} 3442 ins_pipe( fpu_reg_reg ); 3443 %} 3444 3445 // Replicate char/short (2 byte) scalar to be vector 3446 instruct Repl2S(vecS dst, rRegI src) %{ 3447 predicate(n->as_Vector()->length() == 2); 3448 match(Set dst (ReplicateS src)); 3449 format %{ "movd $dst,$src\n\t" 3450 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3451 ins_encode %{ 3452 __ movdl($dst$$XMMRegister, $src$$Register); 3453 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3454 %} 3455 ins_pipe( fpu_reg_reg ); 3456 %} 3457 3458 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3459 instruct Repl2S_imm(vecS dst, immI con) %{ 3460 predicate(n->as_Vector()->length() == 2); 3461 match(Set dst (ReplicateS con)); 3462 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3463 ins_encode %{ 3464 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3465 %} 3466 ins_pipe( fpu_reg_reg ); 3467 %} 3468 3469 instruct Repl4S_imm(vecD dst, immI con) %{ 3470 predicate(n->as_Vector()->length() == 4); 3471 match(Set dst (ReplicateS con)); 3472 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3473 ins_encode %{ 3474 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3475 %} 3476 ins_pipe( fpu_reg_reg ); 3477 %} 3478 3479 // Replicate char/short (2 byte) scalar zero to be vector 3480 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3481 predicate(n->as_Vector()->length() == 2); 3482 match(Set dst (ReplicateS zero)); 3483 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3484 ins_encode %{ 3485 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3486 %} 3487 ins_pipe( fpu_reg_reg ); 3488 %} 3489 3490 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3491 predicate(n->as_Vector()->length() == 4); 3492 match(Set dst (ReplicateS zero)); 3493 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3494 ins_encode %{ 3495 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3496 %} 3497 ins_pipe( fpu_reg_reg ); 3498 %} 3499 3500 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3501 predicate(n->as_Vector()->length() == 8); 3502 match(Set dst (ReplicateS zero)); 3503 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3504 ins_encode %{ 3505 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3506 %} 3507 ins_pipe( fpu_reg_reg ); 3508 %} 3509 3510 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3511 predicate(n->as_Vector()->length() == 16); 3512 match(Set dst (ReplicateS zero)); 3513 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3514 ins_encode %{ 3515 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3516 int vector_len = 1; 3517 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3518 %} 3519 ins_pipe( fpu_reg_reg ); 3520 %} 3521 3522 // Replicate integer (4 byte) scalar to be vector 3523 instruct Repl2I(vecD dst, rRegI src) %{ 3524 predicate(n->as_Vector()->length() == 2); 3525 match(Set dst (ReplicateI src)); 3526 format %{ "movd $dst,$src\n\t" 3527 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3528 ins_encode %{ 3529 __ movdl($dst$$XMMRegister, $src$$Register); 3530 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3531 %} 3532 ins_pipe( fpu_reg_reg ); 3533 %} 3534 3535 // Integer could be loaded into xmm register directly from memory. 3536 instruct Repl2I_mem(vecD dst, memory mem) %{ 3537 predicate(n->as_Vector()->length() == 2); 3538 match(Set dst (ReplicateI (LoadI mem))); 3539 format %{ "movd $dst,$mem\n\t" 3540 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3541 ins_encode %{ 3542 __ movdl($dst$$XMMRegister, $mem$$Address); 3543 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3544 %} 3545 ins_pipe( fpu_reg_reg ); 3546 %} 3547 3548 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3549 instruct Repl2I_imm(vecD dst, immI con) %{ 3550 predicate(n->as_Vector()->length() == 2); 3551 match(Set dst (ReplicateI con)); 3552 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3553 ins_encode %{ 3554 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3555 %} 3556 ins_pipe( fpu_reg_reg ); 3557 %} 3558 3559 // Replicate integer (4 byte) scalar zero to be vector 3560 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3561 predicate(n->as_Vector()->length() == 2); 3562 match(Set dst (ReplicateI zero)); 3563 format %{ "pxor $dst,$dst\t! replicate2I" %} 3564 ins_encode %{ 3565 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3566 %} 3567 ins_pipe( fpu_reg_reg ); 3568 %} 3569 3570 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3571 predicate(n->as_Vector()->length() == 4); 3572 match(Set dst (ReplicateI zero)); 3573 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3574 ins_encode %{ 3575 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3576 %} 3577 ins_pipe( fpu_reg_reg ); 3578 %} 3579 3580 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3581 predicate(n->as_Vector()->length() == 8); 3582 match(Set dst (ReplicateI zero)); 3583 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3584 ins_encode %{ 3585 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3586 int vector_len = 1; 3587 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3588 %} 3589 ins_pipe( fpu_reg_reg ); 3590 %} 3591 3592 // Replicate long (8 byte) scalar to be vector 3593 #ifdef _LP64 3594 instruct Repl2L(vecX dst, rRegL src) %{ 3595 predicate(n->as_Vector()->length() == 2); 3596 match(Set dst (ReplicateL src)); 3597 format %{ "movdq $dst,$src\n\t" 3598 "punpcklqdq $dst,$dst\t! replicate2L" %} 3599 ins_encode %{ 3600 __ movdq($dst$$XMMRegister, $src$$Register); 3601 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3602 %} 3603 ins_pipe( pipe_slow ); 3604 %} 3605 #else // _LP64 3606 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3607 predicate(n->as_Vector()->length() == 2); 3608 match(Set dst (ReplicateL src)); 3609 effect(TEMP dst, USE src, TEMP tmp); 3610 format %{ "movdl $dst,$src.lo\n\t" 3611 "movdl $tmp,$src.hi\n\t" 3612 "punpckldq $dst,$tmp\n\t" 3613 "punpcklqdq $dst,$dst\t! replicate2L"%} 3614 ins_encode %{ 3615 __ movdl($dst$$XMMRegister, $src$$Register); 3616 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3617 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3618 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3619 %} 3620 ins_pipe( pipe_slow ); 3621 %} 3622 #endif // _LP64 3623 3624 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3625 instruct Repl2L_imm(vecX dst, immL con) %{ 3626 predicate(n->as_Vector()->length() == 2); 3627 match(Set dst (ReplicateL con)); 3628 format %{ "movq $dst,[$constantaddress]\n\t" 3629 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3630 ins_encode %{ 3631 __ movq($dst$$XMMRegister, $constantaddress($con)); 3632 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3633 %} 3634 ins_pipe( pipe_slow ); 3635 %} 3636 3637 // Replicate long (8 byte) scalar zero to be vector 3638 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3639 predicate(n->as_Vector()->length() == 2); 3640 match(Set dst (ReplicateL zero)); 3641 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3642 ins_encode %{ 3643 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3644 %} 3645 ins_pipe( fpu_reg_reg ); 3646 %} 3647 3648 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3649 predicate(n->as_Vector()->length() == 4); 3650 match(Set dst (ReplicateL zero)); 3651 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3652 ins_encode %{ 3653 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3654 int vector_len = 1; 3655 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3656 %} 3657 ins_pipe( fpu_reg_reg ); 3658 %} 3659 3660 // Replicate float (4 byte) scalar to be vector 3661 instruct Repl2F(vecD dst, regF src) %{ 3662 predicate(n->as_Vector()->length() == 2); 3663 match(Set dst (ReplicateF src)); 3664 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3665 ins_encode %{ 3666 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3667 %} 3668 ins_pipe( fpu_reg_reg ); 3669 %} 3670 3671 instruct Repl4F(vecX dst, regF src) %{ 3672 predicate(n->as_Vector()->length() == 4); 3673 match(Set dst (ReplicateF src)); 3674 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3675 ins_encode %{ 3676 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3677 %} 3678 ins_pipe( pipe_slow ); 3679 %} 3680 3681 // Replicate double (8 bytes) scalar to be vector 3682 instruct Repl2D(vecX dst, regD src) %{ 3683 predicate(n->as_Vector()->length() == 2); 3684 match(Set dst (ReplicateD src)); 3685 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3686 ins_encode %{ 3687 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3688 %} 3689 ins_pipe( pipe_slow ); 3690 %} 3691 3692 // ====================EVEX REPLICATE============================================= 3693 3694 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3695 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3696 match(Set dst (ReplicateB (LoadB mem))); 3697 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3698 ins_encode %{ 3699 int vector_len = 0; 3700 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3701 %} 3702 ins_pipe( pipe_slow ); 3703 %} 3704 3705 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3706 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3707 match(Set dst (ReplicateB (LoadB mem))); 3708 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3709 ins_encode %{ 3710 int vector_len = 0; 3711 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3712 %} 3713 ins_pipe( pipe_slow ); 3714 %} 3715 3716 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3717 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3718 match(Set dst (ReplicateB src)); 3719 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3720 ins_encode %{ 3721 int vector_len = 0; 3722 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3723 %} 3724 ins_pipe( pipe_slow ); 3725 %} 3726 3727 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3728 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3729 match(Set dst (ReplicateB (LoadB mem))); 3730 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3731 ins_encode %{ 3732 int vector_len = 0; 3733 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3734 %} 3735 ins_pipe( pipe_slow ); 3736 %} 3737 3738 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3739 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3740 match(Set dst (ReplicateB src)); 3741 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3742 ins_encode %{ 3743 int vector_len = 1; 3744 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3745 %} 3746 ins_pipe( pipe_slow ); 3747 %} 3748 3749 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3750 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3751 match(Set dst (ReplicateB (LoadB mem))); 3752 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3753 ins_encode %{ 3754 int vector_len = 1; 3755 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3756 %} 3757 ins_pipe( pipe_slow ); 3758 %} 3759 3760 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3761 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3762 match(Set dst (ReplicateB src)); 3763 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3764 ins_encode %{ 3765 int vector_len = 2; 3766 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3767 %} 3768 ins_pipe( pipe_slow ); 3769 %} 3770 3771 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3772 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3773 match(Set dst (ReplicateB (LoadB mem))); 3774 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3775 ins_encode %{ 3776 int vector_len = 2; 3777 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3778 %} 3779 ins_pipe( pipe_slow ); 3780 %} 3781 3782 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3783 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3784 match(Set dst (ReplicateB con)); 3785 format %{ "movq $dst,[$constantaddress]\n\t" 3786 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3787 ins_encode %{ 3788 int vector_len = 0; 3789 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3790 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3791 %} 3792 ins_pipe( pipe_slow ); 3793 %} 3794 3795 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3796 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3797 match(Set dst (ReplicateB con)); 3798 format %{ "movq $dst,[$constantaddress]\n\t" 3799 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3800 ins_encode %{ 3801 int vector_len = 1; 3802 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3803 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3804 %} 3805 ins_pipe( pipe_slow ); 3806 %} 3807 3808 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3809 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3810 match(Set dst (ReplicateB con)); 3811 format %{ "movq $dst,[$constantaddress]\n\t" 3812 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3813 ins_encode %{ 3814 int vector_len = 2; 3815 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3816 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3817 %} 3818 ins_pipe( pipe_slow ); 3819 %} 3820 3821 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3822 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3823 match(Set dst (ReplicateB zero)); 3824 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3825 ins_encode %{ 3826 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3827 int vector_len = 2; 3828 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3829 %} 3830 ins_pipe( fpu_reg_reg ); 3831 %} 3832 3833 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3834 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3835 match(Set dst (ReplicateS src)); 3836 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3837 ins_encode %{ 3838 int vector_len = 0; 3839 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3840 %} 3841 ins_pipe( pipe_slow ); 3842 %} 3843 3844 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3845 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3846 match(Set dst (ReplicateS (LoadS mem))); 3847 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3848 ins_encode %{ 3849 int vector_len = 0; 3850 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3851 %} 3852 ins_pipe( pipe_slow ); 3853 %} 3854 3855 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3856 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3857 match(Set dst (ReplicateS src)); 3858 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3859 ins_encode %{ 3860 int vector_len = 0; 3861 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3862 %} 3863 ins_pipe( pipe_slow ); 3864 %} 3865 3866 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3867 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3868 match(Set dst (ReplicateS (LoadS mem))); 3869 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3870 ins_encode %{ 3871 int vector_len = 0; 3872 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3873 %} 3874 ins_pipe( pipe_slow ); 3875 %} 3876 3877 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3878 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3879 match(Set dst (ReplicateS src)); 3880 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3881 ins_encode %{ 3882 int vector_len = 1; 3883 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3884 %} 3885 ins_pipe( pipe_slow ); 3886 %} 3887 3888 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3889 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3890 match(Set dst (ReplicateS (LoadS mem))); 3891 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3892 ins_encode %{ 3893 int vector_len = 1; 3894 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3895 %} 3896 ins_pipe( pipe_slow ); 3897 %} 3898 3899 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3900 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3901 match(Set dst (ReplicateS src)); 3902 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3903 ins_encode %{ 3904 int vector_len = 2; 3905 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3906 %} 3907 ins_pipe( pipe_slow ); 3908 %} 3909 3910 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3911 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3912 match(Set dst (ReplicateS (LoadS mem))); 3913 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3914 ins_encode %{ 3915 int vector_len = 2; 3916 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3917 %} 3918 ins_pipe( pipe_slow ); 3919 %} 3920 3921 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3922 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3923 match(Set dst (ReplicateS con)); 3924 format %{ "movq $dst,[$constantaddress]\n\t" 3925 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3926 ins_encode %{ 3927 int vector_len = 0; 3928 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3929 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3930 %} 3931 ins_pipe( pipe_slow ); 3932 %} 3933 3934 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3935 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3936 match(Set dst (ReplicateS con)); 3937 format %{ "movq $dst,[$constantaddress]\n\t" 3938 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3939 ins_encode %{ 3940 int vector_len = 1; 3941 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3942 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3943 %} 3944 ins_pipe( pipe_slow ); 3945 %} 3946 3947 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3948 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3949 match(Set dst (ReplicateS con)); 3950 format %{ "movq $dst,[$constantaddress]\n\t" 3951 "vpbroadcastw $dst,$dst\t! replicate32S" %} 3952 ins_encode %{ 3953 int vector_len = 2; 3954 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3955 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3956 %} 3957 ins_pipe( pipe_slow ); 3958 %} 3959 3960 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 3961 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3962 match(Set dst (ReplicateS zero)); 3963 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3964 ins_encode %{ 3965 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3966 int vector_len = 2; 3967 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3968 %} 3969 ins_pipe( fpu_reg_reg ); 3970 %} 3971 3972 instruct Repl4I_evex(vecX dst, rRegI src) %{ 3973 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3974 match(Set dst (ReplicateI src)); 3975 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 3976 ins_encode %{ 3977 int vector_len = 0; 3978 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3979 %} 3980 ins_pipe( pipe_slow ); 3981 %} 3982 3983 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 3984 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3985 match(Set dst (ReplicateI (LoadI mem))); 3986 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 3987 ins_encode %{ 3988 int vector_len = 0; 3989 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3990 %} 3991 ins_pipe( pipe_slow ); 3992 %} 3993 3994 instruct Repl8I_evex(vecY dst, rRegI src) %{ 3995 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3996 match(Set dst (ReplicateI src)); 3997 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 3998 ins_encode %{ 3999 int vector_len = 1; 4000 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4001 %} 4002 ins_pipe( pipe_slow ); 4003 %} 4004 4005 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4006 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4007 match(Set dst (ReplicateI (LoadI mem))); 4008 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4009 ins_encode %{ 4010 int vector_len = 1; 4011 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4012 %} 4013 ins_pipe( pipe_slow ); 4014 %} 4015 4016 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4017 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4018 match(Set dst (ReplicateI src)); 4019 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4020 ins_encode %{ 4021 int vector_len = 2; 4022 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4023 %} 4024 ins_pipe( pipe_slow ); 4025 %} 4026 4027 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4028 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4029 match(Set dst (ReplicateI (LoadI mem))); 4030 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4031 ins_encode %{ 4032 int vector_len = 2; 4033 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4034 %} 4035 ins_pipe( pipe_slow ); 4036 %} 4037 4038 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4039 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4040 match(Set dst (ReplicateI con)); 4041 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4042 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4043 ins_encode %{ 4044 int vector_len = 0; 4045 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4046 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4047 %} 4048 ins_pipe( pipe_slow ); 4049 %} 4050 4051 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4052 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4053 match(Set dst (ReplicateI con)); 4054 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4055 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4056 ins_encode %{ 4057 int vector_len = 1; 4058 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4059 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4060 %} 4061 ins_pipe( pipe_slow ); 4062 %} 4063 4064 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4065 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4066 match(Set dst (ReplicateI con)); 4067 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4068 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4069 ins_encode %{ 4070 int vector_len = 2; 4071 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4072 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4073 %} 4074 ins_pipe( pipe_slow ); 4075 %} 4076 4077 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4078 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4079 match(Set dst (ReplicateI zero)); 4080 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4081 ins_encode %{ 4082 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4083 int vector_len = 2; 4084 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4085 %} 4086 ins_pipe( fpu_reg_reg ); 4087 %} 4088 4089 // Replicate long (8 byte) scalar to be vector 4090 #ifdef _LP64 4091 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4092 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4093 match(Set dst (ReplicateL src)); 4094 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4095 ins_encode %{ 4096 int vector_len = 1; 4097 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 4102 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4103 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4104 match(Set dst (ReplicateL src)); 4105 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4106 ins_encode %{ 4107 int vector_len = 2; 4108 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4109 %} 4110 ins_pipe( pipe_slow ); 4111 %} 4112 #else // _LP64 4113 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4114 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4115 match(Set dst (ReplicateL src)); 4116 effect(TEMP dst, USE src, TEMP tmp); 4117 format %{ "movdl $dst,$src.lo\n\t" 4118 "movdl $tmp,$src.hi\n\t" 4119 "punpckldq $dst,$tmp\n\t" 4120 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4121 ins_encode %{ 4122 int vector_len = 1; 4123 __ movdl($dst$$XMMRegister, $src$$Register); 4124 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4125 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4126 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4127 %} 4128 ins_pipe( pipe_slow ); 4129 %} 4130 4131 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4132 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4133 match(Set dst (ReplicateL src)); 4134 effect(TEMP dst, USE src, TEMP tmp); 4135 format %{ "movdl $dst,$src.lo\n\t" 4136 "movdl $tmp,$src.hi\n\t" 4137 "punpckldq $dst,$tmp\n\t" 4138 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4139 ins_encode %{ 4140 int vector_len = 2; 4141 __ movdl($dst$$XMMRegister, $src$$Register); 4142 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4143 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4144 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4145 %} 4146 ins_pipe( pipe_slow ); 4147 %} 4148 #endif // _LP64 4149 4150 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4151 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4152 match(Set dst (ReplicateL con)); 4153 format %{ "movq $dst,[$constantaddress]\n\t" 4154 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4155 ins_encode %{ 4156 int vector_len = 1; 4157 __ movq($dst$$XMMRegister, $constantaddress($con)); 4158 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4159 %} 4160 ins_pipe( pipe_slow ); 4161 %} 4162 4163 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4164 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4165 match(Set dst (ReplicateL con)); 4166 format %{ "movq $dst,[$constantaddress]\n\t" 4167 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4168 ins_encode %{ 4169 int vector_len = 2; 4170 __ movq($dst$$XMMRegister, $constantaddress($con)); 4171 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4172 %} 4173 ins_pipe( pipe_slow ); 4174 %} 4175 4176 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4177 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4178 match(Set dst (ReplicateL (LoadL mem))); 4179 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4180 ins_encode %{ 4181 int vector_len = 0; 4182 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4183 %} 4184 ins_pipe( pipe_slow ); 4185 %} 4186 4187 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4188 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4189 match(Set dst (ReplicateL (LoadL mem))); 4190 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4191 ins_encode %{ 4192 int vector_len = 1; 4193 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4194 %} 4195 ins_pipe( pipe_slow ); 4196 %} 4197 4198 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4199 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4200 match(Set dst (ReplicateL (LoadL mem))); 4201 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4202 ins_encode %{ 4203 int vector_len = 2; 4204 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4205 %} 4206 ins_pipe( pipe_slow ); 4207 %} 4208 4209 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4210 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4211 match(Set dst (ReplicateL zero)); 4212 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4213 ins_encode %{ 4214 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4215 int vector_len = 2; 4216 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4217 %} 4218 ins_pipe( fpu_reg_reg ); 4219 %} 4220 4221 instruct Repl8F_evex(vecY dst, regF src) %{ 4222 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4223 match(Set dst (ReplicateF src)); 4224 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4225 ins_encode %{ 4226 int vector_len = 1; 4227 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4228 %} 4229 ins_pipe( pipe_slow ); 4230 %} 4231 4232 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4233 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4234 match(Set dst (ReplicateF (LoadF mem))); 4235 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4236 ins_encode %{ 4237 int vector_len = 1; 4238 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4239 %} 4240 ins_pipe( pipe_slow ); 4241 %} 4242 4243 instruct Repl16F_evex(vecZ dst, regF src) %{ 4244 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4245 match(Set dst (ReplicateF src)); 4246 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4247 ins_encode %{ 4248 int vector_len = 2; 4249 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4250 %} 4251 ins_pipe( pipe_slow ); 4252 %} 4253 4254 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4255 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4256 match(Set dst (ReplicateF (LoadF mem))); 4257 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4258 ins_encode %{ 4259 int vector_len = 2; 4260 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4261 %} 4262 ins_pipe( pipe_slow ); 4263 %} 4264 4265 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4266 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4267 match(Set dst (ReplicateF zero)); 4268 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4269 ins_encode %{ 4270 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4271 int vector_len = 2; 4272 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4273 %} 4274 ins_pipe( fpu_reg_reg ); 4275 %} 4276 4277 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4278 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4279 match(Set dst (ReplicateF zero)); 4280 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4281 ins_encode %{ 4282 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4283 int vector_len = 2; 4284 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4285 %} 4286 ins_pipe( fpu_reg_reg ); 4287 %} 4288 4289 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4290 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4291 match(Set dst (ReplicateF zero)); 4292 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4293 ins_encode %{ 4294 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4295 int vector_len = 2; 4296 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4297 %} 4298 ins_pipe( fpu_reg_reg ); 4299 %} 4300 4301 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4302 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4303 match(Set dst (ReplicateF zero)); 4304 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4305 ins_encode %{ 4306 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4307 int vector_len = 2; 4308 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4309 %} 4310 ins_pipe( fpu_reg_reg ); 4311 %} 4312 4313 instruct Repl4D_evex(vecY dst, regD src) %{ 4314 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4315 match(Set dst (ReplicateD src)); 4316 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4317 ins_encode %{ 4318 int vector_len = 1; 4319 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4320 %} 4321 ins_pipe( pipe_slow ); 4322 %} 4323 4324 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4325 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4326 match(Set dst (ReplicateD (LoadD mem))); 4327 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4328 ins_encode %{ 4329 int vector_len = 1; 4330 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4331 %} 4332 ins_pipe( pipe_slow ); 4333 %} 4334 4335 instruct Repl8D_evex(vecZ dst, regD src) %{ 4336 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4337 match(Set dst (ReplicateD src)); 4338 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4339 ins_encode %{ 4340 int vector_len = 2; 4341 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4342 %} 4343 ins_pipe( pipe_slow ); 4344 %} 4345 4346 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4347 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4348 match(Set dst (ReplicateD (LoadD mem))); 4349 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4350 ins_encode %{ 4351 int vector_len = 2; 4352 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4353 %} 4354 ins_pipe( pipe_slow ); 4355 %} 4356 4357 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4358 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4359 match(Set dst (ReplicateD zero)); 4360 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4361 ins_encode %{ 4362 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4363 int vector_len = 2; 4364 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4365 %} 4366 ins_pipe( fpu_reg_reg ); 4367 %} 4368 4369 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4370 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4371 match(Set dst (ReplicateD zero)); 4372 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4373 ins_encode %{ 4374 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4375 int vector_len = 2; 4376 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4377 %} 4378 ins_pipe( fpu_reg_reg ); 4379 %} 4380 4381 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4382 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4383 match(Set dst (ReplicateD zero)); 4384 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4385 ins_encode %{ 4386 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4387 int vector_len = 2; 4388 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4389 %} 4390 ins_pipe( fpu_reg_reg ); 4391 %} 4392 4393 // ====================REDUCTION ARITHMETIC======================================= 4394 4395 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4396 predicate(UseSSE > 2 && UseAVX == 0); 4397 match(Set dst (AddReductionVI src1 src2)); 4398 effect(TEMP tmp2, TEMP tmp); 4399 format %{ "movdqu $tmp2,$src2\n\t" 4400 "phaddd $tmp2,$tmp2\n\t" 4401 "movd $tmp,$src1\n\t" 4402 "paddd $tmp,$tmp2\n\t" 4403 "movd $dst,$tmp\t! add reduction2I" %} 4404 ins_encode %{ 4405 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4406 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4407 __ movdl($tmp$$XMMRegister, $src1$$Register); 4408 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4409 __ movdl($dst$$Register, $tmp$$XMMRegister); 4410 %} 4411 ins_pipe( pipe_slow ); 4412 %} 4413 4414 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4415 predicate(VM_Version::supports_avxonly()); 4416 match(Set dst (AddReductionVI src1 src2)); 4417 effect(TEMP tmp, TEMP tmp2); 4418 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4419 "movd $tmp2,$src1\n\t" 4420 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4421 "movd $dst,$tmp2\t! add reduction2I" %} 4422 ins_encode %{ 4423 int vector_len = 0; 4424 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4425 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4426 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4427 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4428 %} 4429 ins_pipe( pipe_slow ); 4430 %} 4431 4432 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4433 predicate(UseAVX > 2); 4434 match(Set dst (AddReductionVI src1 src2)); 4435 effect(TEMP tmp, TEMP tmp2); 4436 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4437 "vpaddd $tmp,$src2,$tmp2\n\t" 4438 "movd $tmp2,$src1\n\t" 4439 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4440 "movd $dst,$tmp2\t! add reduction2I" %} 4441 ins_encode %{ 4442 int vector_len = 0; 4443 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4444 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4445 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4446 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4447 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4448 %} 4449 ins_pipe( pipe_slow ); 4450 %} 4451 4452 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4453 predicate(UseSSE > 2 && UseAVX == 0); 4454 match(Set dst (AddReductionVI src1 src2)); 4455 effect(TEMP tmp, TEMP tmp2); 4456 format %{ "movdqu $tmp,$src2\n\t" 4457 "phaddd $tmp,$tmp\n\t" 4458 "phaddd $tmp,$tmp\n\t" 4459 "movd $tmp2,$src1\n\t" 4460 "paddd $tmp2,$tmp\n\t" 4461 "movd $dst,$tmp2\t! add reduction4I" %} 4462 ins_encode %{ 4463 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4464 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4465 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4466 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4467 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4468 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4469 %} 4470 ins_pipe( pipe_slow ); 4471 %} 4472 4473 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4474 predicate(VM_Version::supports_avxonly()); 4475 match(Set dst (AddReductionVI src1 src2)); 4476 effect(TEMP tmp, TEMP tmp2); 4477 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4478 "vphaddd $tmp,$tmp,$tmp\n\t" 4479 "movd $tmp2,$src1\n\t" 4480 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4481 "movd $dst,$tmp2\t! add reduction4I" %} 4482 ins_encode %{ 4483 int vector_len = 0; 4484 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4485 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4486 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4487 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4488 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4489 %} 4490 ins_pipe( pipe_slow ); 4491 %} 4492 4493 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4494 predicate(UseAVX > 2); 4495 match(Set dst (AddReductionVI src1 src2)); 4496 effect(TEMP tmp, TEMP tmp2); 4497 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4498 "vpaddd $tmp,$src2,$tmp2\n\t" 4499 "pshufd $tmp2,$tmp,0x1\n\t" 4500 "vpaddd $tmp,$tmp,$tmp2\n\t" 4501 "movd $tmp2,$src1\n\t" 4502 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4503 "movd $dst,$tmp2\t! add reduction4I" %} 4504 ins_encode %{ 4505 int vector_len = 0; 4506 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4507 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4508 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4509 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4510 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4511 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4512 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4513 %} 4514 ins_pipe( pipe_slow ); 4515 %} 4516 4517 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4518 predicate(VM_Version::supports_avxonly()); 4519 match(Set dst (AddReductionVI src1 src2)); 4520 effect(TEMP tmp, TEMP tmp2); 4521 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4522 "vphaddd $tmp,$tmp,$tmp2\n\t" 4523 "vextracti128_high $tmp2,$tmp\n\t" 4524 "vpaddd $tmp,$tmp,$tmp2\n\t" 4525 "movd $tmp2,$src1\n\t" 4526 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4527 "movd $dst,$tmp2\t! add reduction8I" %} 4528 ins_encode %{ 4529 int vector_len = 1; 4530 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4531 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4532 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4533 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4534 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4535 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4536 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4537 %} 4538 ins_pipe( pipe_slow ); 4539 %} 4540 4541 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4542 predicate(UseAVX > 2); 4543 match(Set dst (AddReductionVI src1 src2)); 4544 effect(TEMP tmp, TEMP tmp2); 4545 format %{ "vextracti128_high $tmp,$src2\n\t" 4546 "vpaddd $tmp,$tmp,$src2\n\t" 4547 "pshufd $tmp2,$tmp,0xE\n\t" 4548 "vpaddd $tmp,$tmp,$tmp2\n\t" 4549 "pshufd $tmp2,$tmp,0x1\n\t" 4550 "vpaddd $tmp,$tmp,$tmp2\n\t" 4551 "movd $tmp2,$src1\n\t" 4552 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4553 "movd $dst,$tmp2\t! add reduction8I" %} 4554 ins_encode %{ 4555 int vector_len = 0; 4556 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4557 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4558 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4559 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4560 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4561 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4562 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4563 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4564 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4565 %} 4566 ins_pipe( pipe_slow ); 4567 %} 4568 4569 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4570 predicate(UseAVX > 2); 4571 match(Set dst (AddReductionVI src1 src2)); 4572 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4573 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4574 "vpaddd $tmp3,$tmp3,$src2\n\t" 4575 "vextracti128_high $tmp,$tmp3\n\t" 4576 "vpaddd $tmp,$tmp,$tmp3\n\t" 4577 "pshufd $tmp2,$tmp,0xE\n\t" 4578 "vpaddd $tmp,$tmp,$tmp2\n\t" 4579 "pshufd $tmp2,$tmp,0x1\n\t" 4580 "vpaddd $tmp,$tmp,$tmp2\n\t" 4581 "movd $tmp2,$src1\n\t" 4582 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4583 "movd $dst,$tmp2\t! mul reduction16I" %} 4584 ins_encode %{ 4585 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4586 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4587 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4588 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4589 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4590 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4591 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4592 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4593 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4594 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4595 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4596 %} 4597 ins_pipe( pipe_slow ); 4598 %} 4599 4600 #ifdef _LP64 4601 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4602 predicate(UseAVX > 2); 4603 match(Set dst (AddReductionVL src1 src2)); 4604 effect(TEMP tmp, TEMP tmp2); 4605 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4606 "vpaddq $tmp,$src2,$tmp2\n\t" 4607 "movdq $tmp2,$src1\n\t" 4608 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4609 "movdq $dst,$tmp2\t! add reduction2L" %} 4610 ins_encode %{ 4611 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4612 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4613 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4614 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4615 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4616 %} 4617 ins_pipe( pipe_slow ); 4618 %} 4619 4620 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4621 predicate(UseAVX > 2); 4622 match(Set dst (AddReductionVL src1 src2)); 4623 effect(TEMP tmp, TEMP tmp2); 4624 format %{ "vextracti128_high $tmp,$src2\n\t" 4625 "vpaddq $tmp2,$tmp,$src2\n\t" 4626 "pshufd $tmp,$tmp2,0xE\n\t" 4627 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4628 "movdq $tmp,$src1\n\t" 4629 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4630 "movdq $dst,$tmp2\t! add reduction4L" %} 4631 ins_encode %{ 4632 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4633 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4634 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4635 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4636 __ movdq($tmp$$XMMRegister, $src1$$Register); 4637 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4638 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4639 %} 4640 ins_pipe( pipe_slow ); 4641 %} 4642 4643 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4644 predicate(UseAVX > 2); 4645 match(Set dst (AddReductionVL src1 src2)); 4646 effect(TEMP tmp, TEMP tmp2); 4647 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4648 "vpaddq $tmp2,$tmp2,$src2\n\t" 4649 "vextracti128_high $tmp,$tmp2\n\t" 4650 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4651 "pshufd $tmp,$tmp2,0xE\n\t" 4652 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4653 "movdq $tmp,$src1\n\t" 4654 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4655 "movdq $dst,$tmp2\t! add reduction8L" %} 4656 ins_encode %{ 4657 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4658 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4659 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 4660 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4661 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4662 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4663 __ movdq($tmp$$XMMRegister, $src1$$Register); 4664 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4665 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4666 %} 4667 ins_pipe( pipe_slow ); 4668 %} 4669 #endif 4670 4671 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4672 predicate(UseSSE >= 1 && UseAVX == 0); 4673 match(Set dst (AddReductionVF dst src2)); 4674 effect(TEMP dst, TEMP tmp); 4675 format %{ "addss $dst,$src2\n\t" 4676 "pshufd $tmp,$src2,0x01\n\t" 4677 "addss $dst,$tmp\t! add reduction2F" %} 4678 ins_encode %{ 4679 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4680 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4681 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4682 %} 4683 ins_pipe( pipe_slow ); 4684 %} 4685 4686 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4687 predicate(UseAVX > 0); 4688 match(Set dst (AddReductionVF dst src2)); 4689 effect(TEMP dst, TEMP tmp); 4690 format %{ "vaddss $dst,$dst,$src2\n\t" 4691 "pshufd $tmp,$src2,0x01\n\t" 4692 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4693 ins_encode %{ 4694 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4695 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4696 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4697 %} 4698 ins_pipe( pipe_slow ); 4699 %} 4700 4701 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4702 predicate(UseSSE >= 1 && UseAVX == 0); 4703 match(Set dst (AddReductionVF dst src2)); 4704 effect(TEMP dst, TEMP tmp); 4705 format %{ "addss $dst,$src2\n\t" 4706 "pshufd $tmp,$src2,0x01\n\t" 4707 "addss $dst,$tmp\n\t" 4708 "pshufd $tmp,$src2,0x02\n\t" 4709 "addss $dst,$tmp\n\t" 4710 "pshufd $tmp,$src2,0x03\n\t" 4711 "addss $dst,$tmp\t! add reduction4F" %} 4712 ins_encode %{ 4713 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4714 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4715 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4716 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4717 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4718 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4719 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4720 %} 4721 ins_pipe( pipe_slow ); 4722 %} 4723 4724 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4725 predicate(UseAVX > 0); 4726 match(Set dst (AddReductionVF dst src2)); 4727 effect(TEMP tmp, TEMP dst); 4728 format %{ "vaddss $dst,dst,$src2\n\t" 4729 "pshufd $tmp,$src2,0x01\n\t" 4730 "vaddss $dst,$dst,$tmp\n\t" 4731 "pshufd $tmp,$src2,0x02\n\t" 4732 "vaddss $dst,$dst,$tmp\n\t" 4733 "pshufd $tmp,$src2,0x03\n\t" 4734 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4735 ins_encode %{ 4736 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4737 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4738 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4739 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4740 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4741 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4742 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4743 %} 4744 ins_pipe( pipe_slow ); 4745 %} 4746 4747 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 4748 predicate(UseAVX > 0); 4749 match(Set dst (AddReductionVF dst src2)); 4750 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4751 format %{ "vaddss $dst,$dst,$src2\n\t" 4752 "pshufd $tmp,$src2,0x01\n\t" 4753 "vaddss $dst,$dst,$tmp\n\t" 4754 "pshufd $tmp,$src2,0x02\n\t" 4755 "vaddss $dst,$dst,$tmp\n\t" 4756 "pshufd $tmp,$src2,0x03\n\t" 4757 "vaddss $dst,$dst,$tmp\n\t" 4758 "vextractf128_high $tmp2,$src2\n\t" 4759 "vaddss $dst,$dst,$tmp2\n\t" 4760 "pshufd $tmp,$tmp2,0x01\n\t" 4761 "vaddss $dst,$dst,$tmp\n\t" 4762 "pshufd $tmp,$tmp2,0x02\n\t" 4763 "vaddss $dst,$dst,$tmp\n\t" 4764 "pshufd $tmp,$tmp2,0x03\n\t" 4765 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 4766 ins_encode %{ 4767 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4768 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4769 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4770 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4771 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4772 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4773 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4774 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4775 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4776 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4777 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4778 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4779 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4780 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4781 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 4787 predicate(UseAVX > 2); 4788 match(Set dst (AddReductionVF dst src2)); 4789 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4790 format %{ "vaddss $dst,$dst,$src2\n\t" 4791 "pshufd $tmp,$src2,0x01\n\t" 4792 "vaddss $dst,$dst,$tmp\n\t" 4793 "pshufd $tmp,$src2,0x02\n\t" 4794 "vaddss $dst,$dst,$tmp\n\t" 4795 "pshufd $tmp,$src2,0x03\n\t" 4796 "vaddss $dst,$dst,$tmp\n\t" 4797 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4798 "vaddss $dst,$dst,$tmp2\n\t" 4799 "pshufd $tmp,$tmp2,0x01\n\t" 4800 "vaddss $dst,$dst,$tmp\n\t" 4801 "pshufd $tmp,$tmp2,0x02\n\t" 4802 "vaddss $dst,$dst,$tmp\n\t" 4803 "pshufd $tmp,$tmp2,0x03\n\t" 4804 "vaddss $dst,$dst,$tmp\n\t" 4805 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4806 "vaddss $dst,$dst,$tmp2\n\t" 4807 "pshufd $tmp,$tmp2,0x01\n\t" 4808 "vaddss $dst,$dst,$tmp\n\t" 4809 "pshufd $tmp,$tmp2,0x02\n\t" 4810 "vaddss $dst,$dst,$tmp\n\t" 4811 "pshufd $tmp,$tmp2,0x03\n\t" 4812 "vaddss $dst,$dst,$tmp\n\t" 4813 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4814 "vaddss $dst,$dst,$tmp2\n\t" 4815 "pshufd $tmp,$tmp2,0x01\n\t" 4816 "vaddss $dst,$dst,$tmp\n\t" 4817 "pshufd $tmp,$tmp2,0x02\n\t" 4818 "vaddss $dst,$dst,$tmp\n\t" 4819 "pshufd $tmp,$tmp2,0x03\n\t" 4820 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 4821 ins_encode %{ 4822 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4823 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4824 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4825 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4826 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4827 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4828 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4829 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4830 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4831 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4832 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4833 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4834 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4835 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4836 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4837 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4838 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4839 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4840 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4841 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4842 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4843 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4844 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4845 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4846 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4847 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4848 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4849 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4850 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4851 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4852 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4853 %} 4854 ins_pipe( pipe_slow ); 4855 %} 4856 4857 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4858 predicate(UseSSE >= 1 && UseAVX == 0); 4859 match(Set dst (AddReductionVD dst src2)); 4860 effect(TEMP tmp, TEMP dst); 4861 format %{ "addsd $dst,$src2\n\t" 4862 "pshufd $tmp,$src2,0xE\n\t" 4863 "addsd $dst,$tmp\t! add reduction2D" %} 4864 ins_encode %{ 4865 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 4866 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4867 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4868 %} 4869 ins_pipe( pipe_slow ); 4870 %} 4871 4872 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4873 predicate(UseAVX > 0); 4874 match(Set dst (AddReductionVD dst src2)); 4875 effect(TEMP tmp, TEMP dst); 4876 format %{ "vaddsd $dst,$dst,$src2\n\t" 4877 "pshufd $tmp,$src2,0xE\n\t" 4878 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 4879 ins_encode %{ 4880 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4881 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4882 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4883 %} 4884 ins_pipe( pipe_slow ); 4885 %} 4886 4887 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 4888 predicate(UseAVX > 0); 4889 match(Set dst (AddReductionVD dst src2)); 4890 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4891 format %{ "vaddsd $dst,$dst,$src2\n\t" 4892 "pshufd $tmp,$src2,0xE\n\t" 4893 "vaddsd $dst,$dst,$tmp\n\t" 4894 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4895 "vaddsd $dst,$dst,$tmp2\n\t" 4896 "pshufd $tmp,$tmp2,0xE\n\t" 4897 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 4898 ins_encode %{ 4899 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4900 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4901 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4902 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4903 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4904 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4905 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4906 %} 4907 ins_pipe( pipe_slow ); 4908 %} 4909 4910 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 4911 predicate(UseAVX > 2); 4912 match(Set dst (AddReductionVD dst src2)); 4913 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4914 format %{ "vaddsd $dst,$dst,$src2\n\t" 4915 "pshufd $tmp,$src2,0xE\n\t" 4916 "vaddsd $dst,$dst,$tmp\n\t" 4917 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4918 "vaddsd $dst,$dst,$tmp2\n\t" 4919 "pshufd $tmp,$tmp2,0xE\n\t" 4920 "vaddsd $dst,$dst,$tmp\n\t" 4921 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4922 "vaddsd $dst,$dst,$tmp2\n\t" 4923 "pshufd $tmp,$tmp2,0xE\n\t" 4924 "vaddsd $dst,$dst,$tmp\n\t" 4925 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4926 "vaddsd $dst,$dst,$tmp2\n\t" 4927 "pshufd $tmp,$tmp2,0xE\n\t" 4928 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 4929 ins_encode %{ 4930 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4931 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4932 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4933 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4934 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4935 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4936 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4937 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4938 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4939 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4940 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4941 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4942 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4943 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4944 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4945 %} 4946 ins_pipe( pipe_slow ); 4947 %} 4948 4949 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4950 predicate(UseSSE > 3 && UseAVX == 0); 4951 match(Set dst (MulReductionVI src1 src2)); 4952 effect(TEMP tmp, TEMP tmp2); 4953 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4954 "pmulld $tmp2,$src2\n\t" 4955 "movd $tmp,$src1\n\t" 4956 "pmulld $tmp2,$tmp\n\t" 4957 "movd $dst,$tmp2\t! mul reduction2I" %} 4958 ins_encode %{ 4959 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4960 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4961 __ movdl($tmp$$XMMRegister, $src1$$Register); 4962 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4963 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4964 %} 4965 ins_pipe( pipe_slow ); 4966 %} 4967 4968 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4969 predicate(UseAVX > 0); 4970 match(Set dst (MulReductionVI src1 src2)); 4971 effect(TEMP tmp, TEMP tmp2); 4972 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4973 "vpmulld $tmp,$src2,$tmp2\n\t" 4974 "movd $tmp2,$src1\n\t" 4975 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4976 "movd $dst,$tmp2\t! mul reduction2I" %} 4977 ins_encode %{ 4978 int vector_len = 0; 4979 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4980 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4981 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4982 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4983 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4984 %} 4985 ins_pipe( pipe_slow ); 4986 %} 4987 4988 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4989 predicate(UseSSE > 3 && UseAVX == 0); 4990 match(Set dst (MulReductionVI src1 src2)); 4991 effect(TEMP tmp, TEMP tmp2); 4992 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4993 "pmulld $tmp2,$src2\n\t" 4994 "pshufd $tmp,$tmp2,0x1\n\t" 4995 "pmulld $tmp2,$tmp\n\t" 4996 "movd $tmp,$src1\n\t" 4997 "pmulld $tmp2,$tmp\n\t" 4998 "movd $dst,$tmp2\t! mul reduction4I" %} 4999 ins_encode %{ 5000 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5001 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5002 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5003 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5004 __ movdl($tmp$$XMMRegister, $src1$$Register); 5005 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5006 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5007 %} 5008 ins_pipe( pipe_slow ); 5009 %} 5010 5011 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5012 predicate(UseAVX > 0); 5013 match(Set dst (MulReductionVI src1 src2)); 5014 effect(TEMP tmp, TEMP tmp2); 5015 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5016 "vpmulld $tmp,$src2,$tmp2\n\t" 5017 "pshufd $tmp2,$tmp,0x1\n\t" 5018 "vpmulld $tmp,$tmp,$tmp2\n\t" 5019 "movd $tmp2,$src1\n\t" 5020 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5021 "movd $dst,$tmp2\t! mul reduction4I" %} 5022 ins_encode %{ 5023 int vector_len = 0; 5024 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5025 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5026 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5027 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5028 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5029 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5030 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5031 %} 5032 ins_pipe( pipe_slow ); 5033 %} 5034 5035 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5036 predicate(UseAVX > 0); 5037 match(Set dst (MulReductionVI src1 src2)); 5038 effect(TEMP tmp, TEMP tmp2); 5039 format %{ "vextracti128_high $tmp,$src2\n\t" 5040 "vpmulld $tmp,$tmp,$src2\n\t" 5041 "pshufd $tmp2,$tmp,0xE\n\t" 5042 "vpmulld $tmp,$tmp,$tmp2\n\t" 5043 "pshufd $tmp2,$tmp,0x1\n\t" 5044 "vpmulld $tmp,$tmp,$tmp2\n\t" 5045 "movd $tmp2,$src1\n\t" 5046 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5047 "movd $dst,$tmp2\t! mul reduction8I" %} 5048 ins_encode %{ 5049 int vector_len = 0; 5050 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5051 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5052 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5053 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5054 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5055 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5056 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5057 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5058 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5059 %} 5060 ins_pipe( pipe_slow ); 5061 %} 5062 5063 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5064 predicate(UseAVX > 2); 5065 match(Set dst (MulReductionVI src1 src2)); 5066 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5067 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5068 "vpmulld $tmp3,$tmp3,$src2\n\t" 5069 "vextracti128_high $tmp,$tmp3\n\t" 5070 "vpmulld $tmp,$tmp,$src2\n\t" 5071 "pshufd $tmp2,$tmp,0xE\n\t" 5072 "vpmulld $tmp,$tmp,$tmp2\n\t" 5073 "pshufd $tmp2,$tmp,0x1\n\t" 5074 "vpmulld $tmp,$tmp,$tmp2\n\t" 5075 "movd $tmp2,$src1\n\t" 5076 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5077 "movd $dst,$tmp2\t! mul reduction16I" %} 5078 ins_encode %{ 5079 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5080 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5081 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5082 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5083 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5084 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5085 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5086 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5087 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5088 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5089 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5090 %} 5091 ins_pipe( pipe_slow ); 5092 %} 5093 5094 #ifdef _LP64 5095 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5096 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5097 match(Set dst (MulReductionVL src1 src2)); 5098 effect(TEMP tmp, TEMP tmp2); 5099 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5100 "vpmullq $tmp,$src2,$tmp2\n\t" 5101 "movdq $tmp2,$src1\n\t" 5102 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5103 "movdq $dst,$tmp2\t! mul reduction2L" %} 5104 ins_encode %{ 5105 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5106 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5107 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5108 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5109 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5110 %} 5111 ins_pipe( pipe_slow ); 5112 %} 5113 5114 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5115 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5116 match(Set dst (MulReductionVL src1 src2)); 5117 effect(TEMP tmp, TEMP tmp2); 5118 format %{ "vextracti128_high $tmp,$src2\n\t" 5119 "vpmullq $tmp2,$tmp,$src2\n\t" 5120 "pshufd $tmp,$tmp2,0xE\n\t" 5121 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5122 "movdq $tmp,$src1\n\t" 5123 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5124 "movdq $dst,$tmp2\t! mul reduction4L" %} 5125 ins_encode %{ 5126 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5127 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5128 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5129 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5130 __ movdq($tmp$$XMMRegister, $src1$$Register); 5131 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5132 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5133 %} 5134 ins_pipe( pipe_slow ); 5135 %} 5136 5137 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5138 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5139 match(Set dst (MulReductionVL src1 src2)); 5140 effect(TEMP tmp, TEMP tmp2); 5141 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5142 "vpmullq $tmp2,$tmp2,$src2\n\t" 5143 "vextracti128_high $tmp,$tmp2\n\t" 5144 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5145 "pshufd $tmp,$tmp2,0xE\n\t" 5146 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5147 "movdq $tmp,$src1\n\t" 5148 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5149 "movdq $dst,$tmp2\t! mul reduction8L" %} 5150 ins_encode %{ 5151 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5152 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5153 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5154 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5155 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5156 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5157 __ movdq($tmp$$XMMRegister, $src1$$Register); 5158 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5159 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5160 %} 5161 ins_pipe( pipe_slow ); 5162 %} 5163 #endif 5164 5165 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5166 predicate(UseSSE >= 1 && UseAVX == 0); 5167 match(Set dst (MulReductionVF dst src2)); 5168 effect(TEMP dst, TEMP tmp); 5169 format %{ "mulss $dst,$src2\n\t" 5170 "pshufd $tmp,$src2,0x01\n\t" 5171 "mulss $dst,$tmp\t! mul reduction2F" %} 5172 ins_encode %{ 5173 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5174 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5175 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5176 %} 5177 ins_pipe( pipe_slow ); 5178 %} 5179 5180 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5181 predicate(UseAVX > 0); 5182 match(Set dst (MulReductionVF dst src2)); 5183 effect(TEMP tmp, TEMP dst); 5184 format %{ "vmulss $dst,$dst,$src2\n\t" 5185 "pshufd $tmp,$src2,0x01\n\t" 5186 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5187 ins_encode %{ 5188 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5189 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5190 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5191 %} 5192 ins_pipe( pipe_slow ); 5193 %} 5194 5195 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5196 predicate(UseSSE >= 1 && UseAVX == 0); 5197 match(Set dst (MulReductionVF dst src2)); 5198 effect(TEMP dst, TEMP tmp); 5199 format %{ "mulss $dst,$src2\n\t" 5200 "pshufd $tmp,$src2,0x01\n\t" 5201 "mulss $dst,$tmp\n\t" 5202 "pshufd $tmp,$src2,0x02\n\t" 5203 "mulss $dst,$tmp\n\t" 5204 "pshufd $tmp,$src2,0x03\n\t" 5205 "mulss $dst,$tmp\t! mul reduction4F" %} 5206 ins_encode %{ 5207 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5208 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5209 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5210 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5211 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5212 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5213 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5214 %} 5215 ins_pipe( pipe_slow ); 5216 %} 5217 5218 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5219 predicate(UseAVX > 0); 5220 match(Set dst (MulReductionVF dst src2)); 5221 effect(TEMP tmp, TEMP dst); 5222 format %{ "vmulss $dst,$dst,$src2\n\t" 5223 "pshufd $tmp,$src2,0x01\n\t" 5224 "vmulss $dst,$dst,$tmp\n\t" 5225 "pshufd $tmp,$src2,0x02\n\t" 5226 "vmulss $dst,$dst,$tmp\n\t" 5227 "pshufd $tmp,$src2,0x03\n\t" 5228 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5229 ins_encode %{ 5230 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5231 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5232 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5233 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5234 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5235 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5236 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5237 %} 5238 ins_pipe( pipe_slow ); 5239 %} 5240 5241 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5242 predicate(UseAVX > 0); 5243 match(Set dst (MulReductionVF dst src2)); 5244 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5245 format %{ "vmulss $dst,$dst,$src2\n\t" 5246 "pshufd $tmp,$src2,0x01\n\t" 5247 "vmulss $dst,$dst,$tmp\n\t" 5248 "pshufd $tmp,$src2,0x02\n\t" 5249 "vmulss $dst,$dst,$tmp\n\t" 5250 "pshufd $tmp,$src2,0x03\n\t" 5251 "vmulss $dst,$dst,$tmp\n\t" 5252 "vextractf128_high $tmp2,$src2\n\t" 5253 "vmulss $dst,$dst,$tmp2\n\t" 5254 "pshufd $tmp,$tmp2,0x01\n\t" 5255 "vmulss $dst,$dst,$tmp\n\t" 5256 "pshufd $tmp,$tmp2,0x02\n\t" 5257 "vmulss $dst,$dst,$tmp\n\t" 5258 "pshufd $tmp,$tmp2,0x03\n\t" 5259 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5260 ins_encode %{ 5261 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5262 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5263 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5264 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5265 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5266 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5267 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5268 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5269 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5270 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5271 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5272 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5273 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5274 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5275 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5276 %} 5277 ins_pipe( pipe_slow ); 5278 %} 5279 5280 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5281 predicate(UseAVX > 2); 5282 match(Set dst (MulReductionVF dst src2)); 5283 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5284 format %{ "vmulss $dst,$dst,$src2\n\t" 5285 "pshufd $tmp,$src2,0x01\n\t" 5286 "vmulss $dst,$dst,$tmp\n\t" 5287 "pshufd $tmp,$src2,0x02\n\t" 5288 "vmulss $dst,$dst,$tmp\n\t" 5289 "pshufd $tmp,$src2,0x03\n\t" 5290 "vmulss $dst,$dst,$tmp\n\t" 5291 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5292 "vmulss $dst,$dst,$tmp2\n\t" 5293 "pshufd $tmp,$tmp2,0x01\n\t" 5294 "vmulss $dst,$dst,$tmp\n\t" 5295 "pshufd $tmp,$tmp2,0x02\n\t" 5296 "vmulss $dst,$dst,$tmp\n\t" 5297 "pshufd $tmp,$tmp2,0x03\n\t" 5298 "vmulss $dst,$dst,$tmp\n\t" 5299 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5300 "vmulss $dst,$dst,$tmp2\n\t" 5301 "pshufd $tmp,$tmp2,0x01\n\t" 5302 "vmulss $dst,$dst,$tmp\n\t" 5303 "pshufd $tmp,$tmp2,0x02\n\t" 5304 "vmulss $dst,$dst,$tmp\n\t" 5305 "pshufd $tmp,$tmp2,0x03\n\t" 5306 "vmulss $dst,$dst,$tmp\n\t" 5307 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5308 "vmulss $dst,$dst,$tmp2\n\t" 5309 "pshufd $tmp,$tmp2,0x01\n\t" 5310 "vmulss $dst,$dst,$tmp\n\t" 5311 "pshufd $tmp,$tmp2,0x02\n\t" 5312 "vmulss $dst,$dst,$tmp\n\t" 5313 "pshufd $tmp,$tmp2,0x03\n\t" 5314 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5315 ins_encode %{ 5316 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5317 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5318 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5319 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5320 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5321 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5322 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5323 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5324 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5325 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5326 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5327 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5328 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5329 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5330 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5331 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5332 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5333 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5334 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5335 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5336 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5337 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5338 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5339 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5340 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5341 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5342 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5343 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5344 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5345 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5346 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5347 %} 5348 ins_pipe( pipe_slow ); 5349 %} 5350 5351 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5352 predicate(UseSSE >= 1 && UseAVX == 0); 5353 match(Set dst (MulReductionVD dst src2)); 5354 effect(TEMP dst, TEMP tmp); 5355 format %{ "mulsd $dst,$src2\n\t" 5356 "pshufd $tmp,$src2,0xE\n\t" 5357 "mulsd $dst,$tmp\t! mul reduction2D" %} 5358 ins_encode %{ 5359 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5360 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5361 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5362 %} 5363 ins_pipe( pipe_slow ); 5364 %} 5365 5366 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5367 predicate(UseAVX > 0); 5368 match(Set dst (MulReductionVD dst src2)); 5369 effect(TEMP tmp, TEMP dst); 5370 format %{ "vmulsd $dst,$dst,$src2\n\t" 5371 "pshufd $tmp,$src2,0xE\n\t" 5372 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5373 ins_encode %{ 5374 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5375 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5376 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5377 %} 5378 ins_pipe( pipe_slow ); 5379 %} 5380 5381 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5382 predicate(UseAVX > 0); 5383 match(Set dst (MulReductionVD dst src2)); 5384 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5385 format %{ "vmulsd $dst,$dst,$src2\n\t" 5386 "pshufd $tmp,$src2,0xE\n\t" 5387 "vmulsd $dst,$dst,$tmp\n\t" 5388 "vextractf128_high $tmp2,$src2\n\t" 5389 "vmulsd $dst,$dst,$tmp2\n\t" 5390 "pshufd $tmp,$tmp2,0xE\n\t" 5391 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5392 ins_encode %{ 5393 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5394 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5395 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5396 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5397 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5398 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5399 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5400 %} 5401 ins_pipe( pipe_slow ); 5402 %} 5403 5404 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5405 predicate(UseAVX > 2); 5406 match(Set dst (MulReductionVD dst src2)); 5407 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5408 format %{ "vmulsd $dst,$dst,$src2\n\t" 5409 "pshufd $tmp,$src2,0xE\n\t" 5410 "vmulsd $dst,$dst,$tmp\n\t" 5411 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5412 "vmulsd $dst,$dst,$tmp2\n\t" 5413 "pshufd $tmp,$src2,0xE\n\t" 5414 "vmulsd $dst,$dst,$tmp\n\t" 5415 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5416 "vmulsd $dst,$dst,$tmp2\n\t" 5417 "pshufd $tmp,$tmp2,0xE\n\t" 5418 "vmulsd $dst,$dst,$tmp\n\t" 5419 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5420 "vmulsd $dst,$dst,$tmp2\n\t" 5421 "pshufd $tmp,$tmp2,0xE\n\t" 5422 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5423 ins_encode %{ 5424 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5425 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5426 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5427 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5428 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5429 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5430 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5431 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5432 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5433 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5434 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5435 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5436 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5437 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5438 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5439 %} 5440 ins_pipe( pipe_slow ); 5441 %} 5442 5443 // ====================VECTOR ARITHMETIC======================================= 5444 5445 // --------------------------------- ADD -------------------------------------- 5446 5447 // Bytes vector add 5448 instruct vadd4B(vecS dst, vecS src) %{ 5449 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5450 match(Set dst (AddVB dst src)); 5451 format %{ "paddb $dst,$src\t! add packed4B" %} 5452 ins_encode %{ 5453 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5454 %} 5455 ins_pipe( pipe_slow ); 5456 %} 5457 5458 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5459 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5460 match(Set dst (AddVB src1 src2)); 5461 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5462 ins_encode %{ 5463 int vector_len = 0; 5464 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5465 %} 5466 ins_pipe( pipe_slow ); 5467 %} 5468 5469 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5470 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5471 match(Set dst (AddVB src1 src2)); 5472 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5473 ins_encode %{ 5474 int vector_len = 0; 5475 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5476 %} 5477 ins_pipe( pipe_slow ); 5478 %} 5479 5480 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5481 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5482 match(Set dst (AddVB dst src2)); 5483 effect(TEMP src1); 5484 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5485 ins_encode %{ 5486 int vector_len = 0; 5487 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5488 %} 5489 ins_pipe( pipe_slow ); 5490 %} 5491 5492 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5493 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5494 match(Set dst (AddVB src (LoadVector mem))); 5495 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5496 ins_encode %{ 5497 int vector_len = 0; 5498 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5499 %} 5500 ins_pipe( pipe_slow ); 5501 %} 5502 5503 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5504 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5505 match(Set dst (AddVB src (LoadVector mem))); 5506 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5507 ins_encode %{ 5508 int vector_len = 0; 5509 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5510 %} 5511 ins_pipe( pipe_slow ); 5512 %} 5513 5514 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5515 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5516 match(Set dst (AddVB dst (LoadVector mem))); 5517 effect(TEMP src); 5518 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5519 ins_encode %{ 5520 int vector_len = 0; 5521 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5522 %} 5523 ins_pipe( pipe_slow ); 5524 %} 5525 5526 instruct vadd8B(vecD dst, vecD src) %{ 5527 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5528 match(Set dst (AddVB dst src)); 5529 format %{ "paddb $dst,$src\t! add packed8B" %} 5530 ins_encode %{ 5531 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5532 %} 5533 ins_pipe( pipe_slow ); 5534 %} 5535 5536 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5537 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5538 match(Set dst (AddVB src1 src2)); 5539 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5540 ins_encode %{ 5541 int vector_len = 0; 5542 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5543 %} 5544 ins_pipe( pipe_slow ); 5545 %} 5546 5547 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5548 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5549 match(Set dst (AddVB src1 src2)); 5550 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5551 ins_encode %{ 5552 int vector_len = 0; 5553 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5554 %} 5555 ins_pipe( pipe_slow ); 5556 %} 5557 5558 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5559 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5560 match(Set dst (AddVB dst src2)); 5561 effect(TEMP src1); 5562 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5563 ins_encode %{ 5564 int vector_len = 0; 5565 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5566 %} 5567 ins_pipe( pipe_slow ); 5568 %} 5569 5570 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5571 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5572 match(Set dst (AddVB src (LoadVector mem))); 5573 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5574 ins_encode %{ 5575 int vector_len = 0; 5576 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5577 %} 5578 ins_pipe( pipe_slow ); 5579 %} 5580 5581 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5582 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5583 match(Set dst (AddVB src (LoadVector mem))); 5584 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5585 ins_encode %{ 5586 int vector_len = 0; 5587 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5588 %} 5589 ins_pipe( pipe_slow ); 5590 %} 5591 5592 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5593 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5594 match(Set dst (AddVB dst (LoadVector mem))); 5595 effect(TEMP src); 5596 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5597 ins_encode %{ 5598 int vector_len = 0; 5599 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5600 %} 5601 ins_pipe( pipe_slow ); 5602 %} 5603 5604 instruct vadd16B(vecX dst, vecX src) %{ 5605 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5606 match(Set dst (AddVB dst src)); 5607 format %{ "paddb $dst,$src\t! add packed16B" %} 5608 ins_encode %{ 5609 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5610 %} 5611 ins_pipe( pipe_slow ); 5612 %} 5613 5614 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5615 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5616 match(Set dst (AddVB src1 src2)); 5617 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5618 ins_encode %{ 5619 int vector_len = 0; 5620 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5626 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5627 match(Set dst (AddVB src1 src2)); 5628 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5629 ins_encode %{ 5630 int vector_len = 0; 5631 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5637 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5638 match(Set dst (AddVB dst src2)); 5639 effect(TEMP src1); 5640 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5641 ins_encode %{ 5642 int vector_len = 0; 5643 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5644 %} 5645 ins_pipe( pipe_slow ); 5646 %} 5647 5648 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5649 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5650 match(Set dst (AddVB src (LoadVector mem))); 5651 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5652 ins_encode %{ 5653 int vector_len = 0; 5654 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5655 %} 5656 ins_pipe( pipe_slow ); 5657 %} 5658 5659 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5660 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5661 match(Set dst (AddVB src (LoadVector mem))); 5662 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5663 ins_encode %{ 5664 int vector_len = 0; 5665 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5666 %} 5667 ins_pipe( pipe_slow ); 5668 %} 5669 5670 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5671 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5672 match(Set dst (AddVB dst (LoadVector mem))); 5673 effect(TEMP src); 5674 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5675 ins_encode %{ 5676 int vector_len = 0; 5677 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5678 %} 5679 ins_pipe( pipe_slow ); 5680 %} 5681 5682 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5683 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5684 match(Set dst (AddVB src1 src2)); 5685 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5686 ins_encode %{ 5687 int vector_len = 1; 5688 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5689 %} 5690 ins_pipe( pipe_slow ); 5691 %} 5692 5693 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5694 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5695 match(Set dst (AddVB src1 src2)); 5696 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5697 ins_encode %{ 5698 int vector_len = 1; 5699 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5700 %} 5701 ins_pipe( pipe_slow ); 5702 %} 5703 5704 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5705 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5706 match(Set dst (AddVB dst src2)); 5707 effect(TEMP src1); 5708 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5709 ins_encode %{ 5710 int vector_len = 1; 5711 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5712 %} 5713 ins_pipe( pipe_slow ); 5714 %} 5715 5716 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5717 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5718 match(Set dst (AddVB src (LoadVector mem))); 5719 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5720 ins_encode %{ 5721 int vector_len = 1; 5722 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5723 %} 5724 ins_pipe( pipe_slow ); 5725 %} 5726 5727 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5728 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5729 match(Set dst (AddVB src (LoadVector mem))); 5730 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5731 ins_encode %{ 5732 int vector_len = 1; 5733 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5734 %} 5735 ins_pipe( pipe_slow ); 5736 %} 5737 5738 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 5739 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5740 match(Set dst (AddVB dst (LoadVector mem))); 5741 effect(TEMP src); 5742 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5743 ins_encode %{ 5744 int vector_len = 1; 5745 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5751 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5752 match(Set dst (AddVB src1 src2)); 5753 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5754 ins_encode %{ 5755 int vector_len = 2; 5756 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5757 %} 5758 ins_pipe( pipe_slow ); 5759 %} 5760 5761 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5762 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5763 match(Set dst (AddVB src (LoadVector mem))); 5764 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5765 ins_encode %{ 5766 int vector_len = 2; 5767 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5768 %} 5769 ins_pipe( pipe_slow ); 5770 %} 5771 5772 // Shorts/Chars vector add 5773 instruct vadd2S(vecS dst, vecS src) %{ 5774 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 5775 match(Set dst (AddVS dst src)); 5776 format %{ "paddw $dst,$src\t! add packed2S" %} 5777 ins_encode %{ 5778 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5784 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5785 match(Set dst (AddVS src1 src2)); 5786 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5787 ins_encode %{ 5788 int vector_len = 0; 5789 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5790 %} 5791 ins_pipe( pipe_slow ); 5792 %} 5793 5794 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5795 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5796 match(Set dst (AddVS src1 src2)); 5797 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5798 ins_encode %{ 5799 int vector_len = 0; 5800 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5801 %} 5802 ins_pipe( pipe_slow ); 5803 %} 5804 5805 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5806 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 5807 match(Set dst (AddVS dst src2)); 5808 effect(TEMP src1); 5809 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 5810 ins_encode %{ 5811 int vector_len = 0; 5812 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 5818 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5819 match(Set dst (AddVS src (LoadVector mem))); 5820 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5821 ins_encode %{ 5822 int vector_len = 0; 5823 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5824 %} 5825 ins_pipe( pipe_slow ); 5826 %} 5827 5828 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 5829 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5830 match(Set dst (AddVS src (LoadVector mem))); 5831 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5832 ins_encode %{ 5833 int vector_len = 0; 5834 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5840 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5841 match(Set dst (AddVS dst (LoadVector mem))); 5842 effect(TEMP src); 5843 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5844 ins_encode %{ 5845 int vector_len = 0; 5846 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5847 %} 5848 ins_pipe( pipe_slow ); 5849 %} 5850 5851 instruct vadd4S(vecD dst, vecD src) %{ 5852 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5853 match(Set dst (AddVS dst src)); 5854 format %{ "paddw $dst,$src\t! add packed4S" %} 5855 ins_encode %{ 5856 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5862 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5863 match(Set dst (AddVS src1 src2)); 5864 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5865 ins_encode %{ 5866 int vector_len = 0; 5867 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5873 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5874 match(Set dst (AddVS src1 src2)); 5875 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5876 ins_encode %{ 5877 int vector_len = 0; 5878 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5879 %} 5880 ins_pipe( pipe_slow ); 5881 %} 5882 5883 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5884 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5885 match(Set dst (AddVS dst src2)); 5886 effect(TEMP src1); 5887 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 5888 ins_encode %{ 5889 int vector_len = 0; 5890 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5891 %} 5892 ins_pipe( pipe_slow ); 5893 %} 5894 5895 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 5896 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5897 match(Set dst (AddVS src (LoadVector mem))); 5898 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5899 ins_encode %{ 5900 int vector_len = 0; 5901 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5902 %} 5903 ins_pipe( pipe_slow ); 5904 %} 5905 5906 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 5907 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5908 match(Set dst (AddVS src (LoadVector mem))); 5909 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5910 ins_encode %{ 5911 int vector_len = 0; 5912 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5913 %} 5914 ins_pipe( pipe_slow ); 5915 %} 5916 5917 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5918 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5919 match(Set dst (AddVS dst (LoadVector mem))); 5920 effect(TEMP src); 5921 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5922 ins_encode %{ 5923 int vector_len = 0; 5924 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5925 %} 5926 ins_pipe( pipe_slow ); 5927 %} 5928 5929 instruct vadd8S(vecX dst, vecX src) %{ 5930 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5931 match(Set dst (AddVS dst src)); 5932 format %{ "paddw $dst,$src\t! add packed8S" %} 5933 ins_encode %{ 5934 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5940 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5941 match(Set dst (AddVS src1 src2)); 5942 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5943 ins_encode %{ 5944 int vector_len = 0; 5945 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5946 %} 5947 ins_pipe( pipe_slow ); 5948 %} 5949 5950 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5951 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5952 match(Set dst (AddVS src1 src2)); 5953 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5954 ins_encode %{ 5955 int vector_len = 0; 5956 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5957 %} 5958 ins_pipe( pipe_slow ); 5959 %} 5960 5961 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5962 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5963 match(Set dst (AddVS dst src2)); 5964 effect(TEMP src1); 5965 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 5966 ins_encode %{ 5967 int vector_len = 0; 5968 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5969 %} 5970 ins_pipe( pipe_slow ); 5971 %} 5972 5973 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 5974 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5975 match(Set dst (AddVS src (LoadVector mem))); 5976 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5977 ins_encode %{ 5978 int vector_len = 0; 5979 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5980 %} 5981 ins_pipe( pipe_slow ); 5982 %} 5983 5984 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 5985 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5986 match(Set dst (AddVS src (LoadVector mem))); 5987 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5988 ins_encode %{ 5989 int vector_len = 0; 5990 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5991 %} 5992 ins_pipe( pipe_slow ); 5993 %} 5994 5995 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5996 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5997 match(Set dst (AddVS dst (LoadVector mem))); 5998 effect(TEMP src); 5999 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6000 ins_encode %{ 6001 int vector_len = 0; 6002 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6003 %} 6004 ins_pipe( pipe_slow ); 6005 %} 6006 6007 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6008 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6009 match(Set dst (AddVS src1 src2)); 6010 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6011 ins_encode %{ 6012 int vector_len = 1; 6013 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6014 %} 6015 ins_pipe( pipe_slow ); 6016 %} 6017 6018 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6019 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6020 match(Set dst (AddVS src1 src2)); 6021 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6022 ins_encode %{ 6023 int vector_len = 1; 6024 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6025 %} 6026 ins_pipe( pipe_slow ); 6027 %} 6028 6029 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6030 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6031 match(Set dst (AddVS dst src2)); 6032 effect(TEMP src1); 6033 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6034 ins_encode %{ 6035 int vector_len = 1; 6036 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6037 %} 6038 ins_pipe( pipe_slow ); 6039 %} 6040 6041 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6042 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6043 match(Set dst (AddVS src (LoadVector mem))); 6044 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6045 ins_encode %{ 6046 int vector_len = 1; 6047 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6048 %} 6049 ins_pipe( pipe_slow ); 6050 %} 6051 6052 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6053 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6054 match(Set dst (AddVS src (LoadVector mem))); 6055 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6056 ins_encode %{ 6057 int vector_len = 1; 6058 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6059 %} 6060 ins_pipe( pipe_slow ); 6061 %} 6062 6063 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6064 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6065 match(Set dst (AddVS dst (LoadVector mem))); 6066 effect(TEMP src); 6067 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6068 ins_encode %{ 6069 int vector_len = 1; 6070 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6071 %} 6072 ins_pipe( pipe_slow ); 6073 %} 6074 6075 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6076 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6077 match(Set dst (AddVS src1 src2)); 6078 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6079 ins_encode %{ 6080 int vector_len = 2; 6081 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6082 %} 6083 ins_pipe( pipe_slow ); 6084 %} 6085 6086 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6087 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6088 match(Set dst (AddVS src (LoadVector mem))); 6089 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6090 ins_encode %{ 6091 int vector_len = 2; 6092 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6093 %} 6094 ins_pipe( pipe_slow ); 6095 %} 6096 6097 // Integers vector add 6098 instruct vadd2I(vecD dst, vecD src) %{ 6099 predicate(n->as_Vector()->length() == 2); 6100 match(Set dst (AddVI dst src)); 6101 format %{ "paddd $dst,$src\t! add packed2I" %} 6102 ins_encode %{ 6103 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6104 %} 6105 ins_pipe( pipe_slow ); 6106 %} 6107 6108 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6109 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6110 match(Set dst (AddVI src1 src2)); 6111 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6112 ins_encode %{ 6113 int vector_len = 0; 6114 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6115 %} 6116 ins_pipe( pipe_slow ); 6117 %} 6118 6119 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6120 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6121 match(Set dst (AddVI src (LoadVector mem))); 6122 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6123 ins_encode %{ 6124 int vector_len = 0; 6125 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6126 %} 6127 ins_pipe( pipe_slow ); 6128 %} 6129 6130 instruct vadd4I(vecX dst, vecX src) %{ 6131 predicate(n->as_Vector()->length() == 4); 6132 match(Set dst (AddVI dst src)); 6133 format %{ "paddd $dst,$src\t! add packed4I" %} 6134 ins_encode %{ 6135 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6136 %} 6137 ins_pipe( pipe_slow ); 6138 %} 6139 6140 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6141 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6142 match(Set dst (AddVI src1 src2)); 6143 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6144 ins_encode %{ 6145 int vector_len = 0; 6146 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6147 %} 6148 ins_pipe( pipe_slow ); 6149 %} 6150 6151 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6152 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6153 match(Set dst (AddVI src (LoadVector mem))); 6154 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6155 ins_encode %{ 6156 int vector_len = 0; 6157 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6158 %} 6159 ins_pipe( pipe_slow ); 6160 %} 6161 6162 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6163 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6164 match(Set dst (AddVI src1 src2)); 6165 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6166 ins_encode %{ 6167 int vector_len = 1; 6168 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6169 %} 6170 ins_pipe( pipe_slow ); 6171 %} 6172 6173 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6174 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6175 match(Set dst (AddVI src (LoadVector mem))); 6176 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6177 ins_encode %{ 6178 int vector_len = 1; 6179 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6180 %} 6181 ins_pipe( pipe_slow ); 6182 %} 6183 6184 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6185 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6186 match(Set dst (AddVI src1 src2)); 6187 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6188 ins_encode %{ 6189 int vector_len = 2; 6190 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6191 %} 6192 ins_pipe( pipe_slow ); 6193 %} 6194 6195 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6196 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6197 match(Set dst (AddVI src (LoadVector mem))); 6198 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6199 ins_encode %{ 6200 int vector_len = 2; 6201 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6202 %} 6203 ins_pipe( pipe_slow ); 6204 %} 6205 6206 // Longs vector add 6207 instruct vadd2L(vecX dst, vecX src) %{ 6208 predicate(n->as_Vector()->length() == 2); 6209 match(Set dst (AddVL dst src)); 6210 format %{ "paddq $dst,$src\t! add packed2L" %} 6211 ins_encode %{ 6212 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6213 %} 6214 ins_pipe( pipe_slow ); 6215 %} 6216 6217 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6218 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6219 match(Set dst (AddVL src1 src2)); 6220 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6221 ins_encode %{ 6222 int vector_len = 0; 6223 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6224 %} 6225 ins_pipe( pipe_slow ); 6226 %} 6227 6228 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6229 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6230 match(Set dst (AddVL src (LoadVector mem))); 6231 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6232 ins_encode %{ 6233 int vector_len = 0; 6234 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6235 %} 6236 ins_pipe( pipe_slow ); 6237 %} 6238 6239 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6240 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6241 match(Set dst (AddVL src1 src2)); 6242 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6243 ins_encode %{ 6244 int vector_len = 1; 6245 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6246 %} 6247 ins_pipe( pipe_slow ); 6248 %} 6249 6250 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6251 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6252 match(Set dst (AddVL src (LoadVector mem))); 6253 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6254 ins_encode %{ 6255 int vector_len = 1; 6256 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6257 %} 6258 ins_pipe( pipe_slow ); 6259 %} 6260 6261 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6262 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6263 match(Set dst (AddVL src1 src2)); 6264 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6265 ins_encode %{ 6266 int vector_len = 2; 6267 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6268 %} 6269 ins_pipe( pipe_slow ); 6270 %} 6271 6272 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6273 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6274 match(Set dst (AddVL src (LoadVector mem))); 6275 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6276 ins_encode %{ 6277 int vector_len = 2; 6278 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6279 %} 6280 ins_pipe( pipe_slow ); 6281 %} 6282 6283 // Floats vector add 6284 instruct vadd2F(vecD dst, vecD src) %{ 6285 predicate(n->as_Vector()->length() == 2); 6286 match(Set dst (AddVF dst src)); 6287 format %{ "addps $dst,$src\t! add packed2F" %} 6288 ins_encode %{ 6289 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6290 %} 6291 ins_pipe( pipe_slow ); 6292 %} 6293 6294 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6295 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6296 match(Set dst (AddVF src1 src2)); 6297 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6298 ins_encode %{ 6299 int vector_len = 0; 6300 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6301 %} 6302 ins_pipe( pipe_slow ); 6303 %} 6304 6305 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6306 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6307 match(Set dst (AddVF src (LoadVector mem))); 6308 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6309 ins_encode %{ 6310 int vector_len = 0; 6311 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6312 %} 6313 ins_pipe( pipe_slow ); 6314 %} 6315 6316 instruct vadd4F(vecX dst, vecX src) %{ 6317 predicate(n->as_Vector()->length() == 4); 6318 match(Set dst (AddVF dst src)); 6319 format %{ "addps $dst,$src\t! add packed4F" %} 6320 ins_encode %{ 6321 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6322 %} 6323 ins_pipe( pipe_slow ); 6324 %} 6325 6326 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6327 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6328 match(Set dst (AddVF src1 src2)); 6329 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6330 ins_encode %{ 6331 int vector_len = 0; 6332 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6333 %} 6334 ins_pipe( pipe_slow ); 6335 %} 6336 6337 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6338 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6339 match(Set dst (AddVF src (LoadVector mem))); 6340 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6341 ins_encode %{ 6342 int vector_len = 0; 6343 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6344 %} 6345 ins_pipe( pipe_slow ); 6346 %} 6347 6348 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6349 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6350 match(Set dst (AddVF src1 src2)); 6351 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6352 ins_encode %{ 6353 int vector_len = 1; 6354 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6355 %} 6356 ins_pipe( pipe_slow ); 6357 %} 6358 6359 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6360 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6361 match(Set dst (AddVF src (LoadVector mem))); 6362 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6363 ins_encode %{ 6364 int vector_len = 1; 6365 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6366 %} 6367 ins_pipe( pipe_slow ); 6368 %} 6369 6370 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6371 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6372 match(Set dst (AddVF src1 src2)); 6373 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6374 ins_encode %{ 6375 int vector_len = 2; 6376 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6377 %} 6378 ins_pipe( pipe_slow ); 6379 %} 6380 6381 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6382 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6383 match(Set dst (AddVF src (LoadVector mem))); 6384 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6385 ins_encode %{ 6386 int vector_len = 2; 6387 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6388 %} 6389 ins_pipe( pipe_slow ); 6390 %} 6391 6392 // Doubles vector add 6393 instruct vadd2D(vecX dst, vecX src) %{ 6394 predicate(n->as_Vector()->length() == 2); 6395 match(Set dst (AddVD dst src)); 6396 format %{ "addpd $dst,$src\t! add packed2D" %} 6397 ins_encode %{ 6398 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6399 %} 6400 ins_pipe( pipe_slow ); 6401 %} 6402 6403 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6405 match(Set dst (AddVD src1 src2)); 6406 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6407 ins_encode %{ 6408 int vector_len = 0; 6409 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6410 %} 6411 ins_pipe( pipe_slow ); 6412 %} 6413 6414 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6415 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6416 match(Set dst (AddVD src (LoadVector mem))); 6417 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6418 ins_encode %{ 6419 int vector_len = 0; 6420 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6421 %} 6422 ins_pipe( pipe_slow ); 6423 %} 6424 6425 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6426 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6427 match(Set dst (AddVD src1 src2)); 6428 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6429 ins_encode %{ 6430 int vector_len = 1; 6431 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6432 %} 6433 ins_pipe( pipe_slow ); 6434 %} 6435 6436 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6437 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6438 match(Set dst (AddVD src (LoadVector mem))); 6439 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6440 ins_encode %{ 6441 int vector_len = 1; 6442 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6443 %} 6444 ins_pipe( pipe_slow ); 6445 %} 6446 6447 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6448 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6449 match(Set dst (AddVD src1 src2)); 6450 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6451 ins_encode %{ 6452 int vector_len = 2; 6453 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6459 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6460 match(Set dst (AddVD src (LoadVector mem))); 6461 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6462 ins_encode %{ 6463 int vector_len = 2; 6464 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 // --------------------------------- SUB -------------------------------------- 6470 6471 // Bytes vector sub 6472 instruct vsub4B(vecS dst, vecS src) %{ 6473 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6474 match(Set dst (SubVB dst src)); 6475 format %{ "psubb $dst,$src\t! sub packed4B" %} 6476 ins_encode %{ 6477 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6478 %} 6479 ins_pipe( pipe_slow ); 6480 %} 6481 6482 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6483 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6484 match(Set dst (SubVB src1 src2)); 6485 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6486 ins_encode %{ 6487 int vector_len = 0; 6488 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6489 %} 6490 ins_pipe( pipe_slow ); 6491 %} 6492 6493 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6494 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6495 match(Set dst (SubVB src1 src2)); 6496 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6497 ins_encode %{ 6498 int vector_len = 0; 6499 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6500 %} 6501 ins_pipe( pipe_slow ); 6502 %} 6503 6504 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6505 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6506 match(Set dst (SubVB dst src2)); 6507 effect(TEMP src1); 6508 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6509 ins_encode %{ 6510 int vector_len = 0; 6511 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6512 %} 6513 ins_pipe( pipe_slow ); 6514 %} 6515 6516 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6517 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6518 match(Set dst (SubVB src (LoadVector mem))); 6519 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6520 ins_encode %{ 6521 int vector_len = 0; 6522 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6523 %} 6524 ins_pipe( pipe_slow ); 6525 %} 6526 6527 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6528 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6529 match(Set dst (SubVB src (LoadVector mem))); 6530 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6531 ins_encode %{ 6532 int vector_len = 0; 6533 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6534 %} 6535 ins_pipe( pipe_slow ); 6536 %} 6537 6538 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6539 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6540 match(Set dst (SubVB dst (LoadVector mem))); 6541 effect(TEMP src); 6542 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6543 ins_encode %{ 6544 int vector_len = 0; 6545 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6546 %} 6547 ins_pipe( pipe_slow ); 6548 %} 6549 6550 instruct vsub8B(vecD dst, vecD src) %{ 6551 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6552 match(Set dst (SubVB dst src)); 6553 format %{ "psubb $dst,$src\t! sub packed8B" %} 6554 ins_encode %{ 6555 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6556 %} 6557 ins_pipe( pipe_slow ); 6558 %} 6559 6560 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6561 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6562 match(Set dst (SubVB src1 src2)); 6563 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6564 ins_encode %{ 6565 int vector_len = 0; 6566 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6567 %} 6568 ins_pipe( pipe_slow ); 6569 %} 6570 6571 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6572 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6573 match(Set dst (SubVB src1 src2)); 6574 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6575 ins_encode %{ 6576 int vector_len = 0; 6577 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6578 %} 6579 ins_pipe( pipe_slow ); 6580 %} 6581 6582 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6583 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6584 match(Set dst (SubVB dst src2)); 6585 effect(TEMP src1); 6586 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6587 ins_encode %{ 6588 int vector_len = 0; 6589 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6590 %} 6591 ins_pipe( pipe_slow ); 6592 %} 6593 6594 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6595 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6596 match(Set dst (SubVB src (LoadVector mem))); 6597 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6598 ins_encode %{ 6599 int vector_len = 0; 6600 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6601 %} 6602 ins_pipe( pipe_slow ); 6603 %} 6604 6605 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6606 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6607 match(Set dst (SubVB src (LoadVector mem))); 6608 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6609 ins_encode %{ 6610 int vector_len = 0; 6611 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6612 %} 6613 ins_pipe( pipe_slow ); 6614 %} 6615 6616 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6617 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6618 match(Set dst (SubVB dst (LoadVector mem))); 6619 effect(TEMP src); 6620 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6621 ins_encode %{ 6622 int vector_len = 0; 6623 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6624 %} 6625 ins_pipe( pipe_slow ); 6626 %} 6627 6628 instruct vsub16B(vecX dst, vecX src) %{ 6629 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6630 match(Set dst (SubVB dst src)); 6631 format %{ "psubb $dst,$src\t! sub packed16B" %} 6632 ins_encode %{ 6633 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6634 %} 6635 ins_pipe( pipe_slow ); 6636 %} 6637 6638 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6639 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6640 match(Set dst (SubVB src1 src2)); 6641 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6642 ins_encode %{ 6643 int vector_len = 0; 6644 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6645 %} 6646 ins_pipe( pipe_slow ); 6647 %} 6648 6649 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6650 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6651 match(Set dst (SubVB src1 src2)); 6652 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6653 ins_encode %{ 6654 int vector_len = 0; 6655 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6656 %} 6657 ins_pipe( pipe_slow ); 6658 %} 6659 6660 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6661 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6662 match(Set dst (SubVB dst src2)); 6663 effect(TEMP src1); 6664 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6665 ins_encode %{ 6666 int vector_len = 0; 6667 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6668 %} 6669 ins_pipe( pipe_slow ); 6670 %} 6671 6672 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6673 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6674 match(Set dst (SubVB src (LoadVector mem))); 6675 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6676 ins_encode %{ 6677 int vector_len = 0; 6678 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6679 %} 6680 ins_pipe( pipe_slow ); 6681 %} 6682 6683 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6684 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6685 match(Set dst (SubVB src (LoadVector mem))); 6686 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6687 ins_encode %{ 6688 int vector_len = 0; 6689 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6690 %} 6691 ins_pipe( pipe_slow ); 6692 %} 6693 6694 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6695 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6696 match(Set dst (SubVB dst (LoadVector mem))); 6697 effect(TEMP src); 6698 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6699 ins_encode %{ 6700 int vector_len = 0; 6701 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6702 %} 6703 ins_pipe( pipe_slow ); 6704 %} 6705 6706 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6707 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6708 match(Set dst (SubVB src1 src2)); 6709 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6710 ins_encode %{ 6711 int vector_len = 1; 6712 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6713 %} 6714 ins_pipe( pipe_slow ); 6715 %} 6716 6717 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6718 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6719 match(Set dst (SubVB src1 src2)); 6720 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6721 ins_encode %{ 6722 int vector_len = 1; 6723 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6724 %} 6725 ins_pipe( pipe_slow ); 6726 %} 6727 6728 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6729 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6730 match(Set dst (SubVB dst src2)); 6731 effect(TEMP src1); 6732 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6733 ins_encode %{ 6734 int vector_len = 1; 6735 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6736 %} 6737 ins_pipe( pipe_slow ); 6738 %} 6739 6740 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6741 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6742 match(Set dst (SubVB src (LoadVector mem))); 6743 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6744 ins_encode %{ 6745 int vector_len = 1; 6746 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6747 %} 6748 ins_pipe( pipe_slow ); 6749 %} 6750 6751 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6752 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6753 match(Set dst (SubVB src (LoadVector mem))); 6754 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6755 ins_encode %{ 6756 int vector_len = 1; 6757 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6758 %} 6759 ins_pipe( pipe_slow ); 6760 %} 6761 6762 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6763 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6764 match(Set dst (SubVB dst (LoadVector mem))); 6765 effect(TEMP src); 6766 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6767 ins_encode %{ 6768 int vector_len = 1; 6769 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6770 %} 6771 ins_pipe( pipe_slow ); 6772 %} 6773 6774 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6775 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6776 match(Set dst (SubVB src1 src2)); 6777 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6778 ins_encode %{ 6779 int vector_len = 2; 6780 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6781 %} 6782 ins_pipe( pipe_slow ); 6783 %} 6784 6785 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6786 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6787 match(Set dst (SubVB src (LoadVector mem))); 6788 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6789 ins_encode %{ 6790 int vector_len = 2; 6791 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6792 %} 6793 ins_pipe( pipe_slow ); 6794 %} 6795 6796 // Shorts/Chars vector sub 6797 instruct vsub2S(vecS dst, vecS src) %{ 6798 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6799 match(Set dst (SubVS dst src)); 6800 format %{ "psubw $dst,$src\t! sub packed2S" %} 6801 ins_encode %{ 6802 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6803 %} 6804 ins_pipe( pipe_slow ); 6805 %} 6806 6807 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6808 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6809 match(Set dst (SubVS src1 src2)); 6810 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6811 ins_encode %{ 6812 int vector_len = 0; 6813 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6814 %} 6815 ins_pipe( pipe_slow ); 6816 %} 6817 6818 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6819 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6820 match(Set dst (SubVS src1 src2)); 6821 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6822 ins_encode %{ 6823 int vector_len = 0; 6824 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6825 %} 6826 ins_pipe( pipe_slow ); 6827 %} 6828 6829 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6830 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6831 match(Set dst (SubVS dst src2)); 6832 effect(TEMP src1); 6833 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6834 ins_encode %{ 6835 int vector_len = 0; 6836 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6837 %} 6838 ins_pipe( pipe_slow ); 6839 %} 6840 6841 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6842 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6843 match(Set dst (SubVS src (LoadVector mem))); 6844 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6845 ins_encode %{ 6846 int vector_len = 0; 6847 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6848 %} 6849 ins_pipe( pipe_slow ); 6850 %} 6851 6852 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6853 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6854 match(Set dst (SubVS src (LoadVector mem))); 6855 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6856 ins_encode %{ 6857 int vector_len = 0; 6858 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6859 %} 6860 ins_pipe( pipe_slow ); 6861 %} 6862 6863 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6864 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6865 match(Set dst (SubVS dst (LoadVector mem))); 6866 effect(TEMP src); 6867 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6868 ins_encode %{ 6869 int vector_len = 0; 6870 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 instruct vsub4S(vecD dst, vecD src) %{ 6876 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6877 match(Set dst (SubVS dst src)); 6878 format %{ "psubw $dst,$src\t! sub packed4S" %} 6879 ins_encode %{ 6880 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6881 %} 6882 ins_pipe( pipe_slow ); 6883 %} 6884 6885 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6886 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6887 match(Set dst (SubVS src1 src2)); 6888 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6889 ins_encode %{ 6890 int vector_len = 0; 6891 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6897 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6898 match(Set dst (SubVS src1 src2)); 6899 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6900 ins_encode %{ 6901 int vector_len = 0; 6902 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6903 %} 6904 ins_pipe( pipe_slow ); 6905 %} 6906 6907 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6908 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6909 match(Set dst (SubVS dst src2)); 6910 effect(TEMP src1); 6911 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6912 ins_encode %{ 6913 int vector_len = 0; 6914 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6920 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6921 match(Set dst (SubVS src (LoadVector mem))); 6922 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6923 ins_encode %{ 6924 int vector_len = 0; 6925 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6926 %} 6927 ins_pipe( pipe_slow ); 6928 %} 6929 6930 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6931 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6932 match(Set dst (SubVS src (LoadVector mem))); 6933 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6934 ins_encode %{ 6935 int vector_len = 0; 6936 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6937 %} 6938 ins_pipe( pipe_slow ); 6939 %} 6940 6941 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6942 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6943 match(Set dst (SubVS dst (LoadVector mem))); 6944 effect(TEMP src); 6945 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6946 ins_encode %{ 6947 int vector_len = 0; 6948 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6949 %} 6950 ins_pipe( pipe_slow ); 6951 %} 6952 6953 instruct vsub8S(vecX dst, vecX src) %{ 6954 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6955 match(Set dst (SubVS dst src)); 6956 format %{ "psubw $dst,$src\t! sub packed8S" %} 6957 ins_encode %{ 6958 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6964 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6965 match(Set dst (SubVS src1 src2)); 6966 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6967 ins_encode %{ 6968 int vector_len = 0; 6969 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6975 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6976 match(Set dst (SubVS src1 src2)); 6977 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6978 ins_encode %{ 6979 int vector_len = 0; 6980 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6986 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6987 match(Set dst (SubVS dst src2)); 6988 effect(TEMP src1); 6989 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6990 ins_encode %{ 6991 int vector_len = 0; 6992 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6993 %} 6994 ins_pipe( pipe_slow ); 6995 %} 6996 6997 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6998 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6999 match(Set dst (SubVS src (LoadVector mem))); 7000 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7001 ins_encode %{ 7002 int vector_len = 0; 7003 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7004 %} 7005 ins_pipe( pipe_slow ); 7006 %} 7007 7008 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7009 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7010 match(Set dst (SubVS src (LoadVector mem))); 7011 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7012 ins_encode %{ 7013 int vector_len = 0; 7014 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7015 %} 7016 ins_pipe( pipe_slow ); 7017 %} 7018 7019 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7020 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7021 match(Set dst (SubVS dst (LoadVector mem))); 7022 effect(TEMP src); 7023 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7024 ins_encode %{ 7025 int vector_len = 0; 7026 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7027 %} 7028 ins_pipe( pipe_slow ); 7029 %} 7030 7031 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7032 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7033 match(Set dst (SubVS src1 src2)); 7034 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7035 ins_encode %{ 7036 int vector_len = 1; 7037 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7038 %} 7039 ins_pipe( pipe_slow ); 7040 %} 7041 7042 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7043 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7044 match(Set dst (SubVS src1 src2)); 7045 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7046 ins_encode %{ 7047 int vector_len = 1; 7048 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7049 %} 7050 ins_pipe( pipe_slow ); 7051 %} 7052 7053 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7054 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7055 match(Set dst (SubVS dst src2)); 7056 effect(TEMP src1); 7057 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7058 ins_encode %{ 7059 int vector_len = 1; 7060 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7061 %} 7062 ins_pipe( pipe_slow ); 7063 %} 7064 7065 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7066 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7067 match(Set dst (SubVS src (LoadVector mem))); 7068 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7069 ins_encode %{ 7070 int vector_len = 1; 7071 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7072 %} 7073 ins_pipe( pipe_slow ); 7074 %} 7075 7076 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7077 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7078 match(Set dst (SubVS src (LoadVector mem))); 7079 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7080 ins_encode %{ 7081 int vector_len = 1; 7082 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7083 %} 7084 ins_pipe( pipe_slow ); 7085 %} 7086 7087 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7088 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7089 match(Set dst (SubVS dst (LoadVector mem))); 7090 effect(TEMP src); 7091 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7092 ins_encode %{ 7093 int vector_len = 1; 7094 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7095 %} 7096 ins_pipe( pipe_slow ); 7097 %} 7098 7099 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7100 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7101 match(Set dst (SubVS src1 src2)); 7102 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7103 ins_encode %{ 7104 int vector_len = 2; 7105 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7106 %} 7107 ins_pipe( pipe_slow ); 7108 %} 7109 7110 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7111 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7112 match(Set dst (SubVS src (LoadVector mem))); 7113 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7114 ins_encode %{ 7115 int vector_len = 2; 7116 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7117 %} 7118 ins_pipe( pipe_slow ); 7119 %} 7120 7121 // Integers vector sub 7122 instruct vsub2I(vecD dst, vecD src) %{ 7123 predicate(n->as_Vector()->length() == 2); 7124 match(Set dst (SubVI dst src)); 7125 format %{ "psubd $dst,$src\t! sub packed2I" %} 7126 ins_encode %{ 7127 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7128 %} 7129 ins_pipe( pipe_slow ); 7130 %} 7131 7132 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7133 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7134 match(Set dst (SubVI src1 src2)); 7135 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7136 ins_encode %{ 7137 int vector_len = 0; 7138 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7139 %} 7140 ins_pipe( pipe_slow ); 7141 %} 7142 7143 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7144 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7145 match(Set dst (SubVI src (LoadVector mem))); 7146 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7147 ins_encode %{ 7148 int vector_len = 0; 7149 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7150 %} 7151 ins_pipe( pipe_slow ); 7152 %} 7153 7154 instruct vsub4I(vecX dst, vecX src) %{ 7155 predicate(n->as_Vector()->length() == 4); 7156 match(Set dst (SubVI dst src)); 7157 format %{ "psubd $dst,$src\t! sub packed4I" %} 7158 ins_encode %{ 7159 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7160 %} 7161 ins_pipe( pipe_slow ); 7162 %} 7163 7164 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7165 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7166 match(Set dst (SubVI src1 src2)); 7167 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7168 ins_encode %{ 7169 int vector_len = 0; 7170 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7171 %} 7172 ins_pipe( pipe_slow ); 7173 %} 7174 7175 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7176 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7177 match(Set dst (SubVI src (LoadVector mem))); 7178 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7179 ins_encode %{ 7180 int vector_len = 0; 7181 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7182 %} 7183 ins_pipe( pipe_slow ); 7184 %} 7185 7186 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7187 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7188 match(Set dst (SubVI src1 src2)); 7189 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7190 ins_encode %{ 7191 int vector_len = 1; 7192 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7193 %} 7194 ins_pipe( pipe_slow ); 7195 %} 7196 7197 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7198 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7199 match(Set dst (SubVI src (LoadVector mem))); 7200 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7201 ins_encode %{ 7202 int vector_len = 1; 7203 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7204 %} 7205 ins_pipe( pipe_slow ); 7206 %} 7207 7208 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7209 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7210 match(Set dst (SubVI src1 src2)); 7211 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7212 ins_encode %{ 7213 int vector_len = 2; 7214 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7220 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7221 match(Set dst (SubVI src (LoadVector mem))); 7222 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7223 ins_encode %{ 7224 int vector_len = 2; 7225 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7226 %} 7227 ins_pipe( pipe_slow ); 7228 %} 7229 7230 // Longs vector sub 7231 instruct vsub2L(vecX dst, vecX src) %{ 7232 predicate(n->as_Vector()->length() == 2); 7233 match(Set dst (SubVL dst src)); 7234 format %{ "psubq $dst,$src\t! sub packed2L" %} 7235 ins_encode %{ 7236 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7237 %} 7238 ins_pipe( pipe_slow ); 7239 %} 7240 7241 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7242 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7243 match(Set dst (SubVL src1 src2)); 7244 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7245 ins_encode %{ 7246 int vector_len = 0; 7247 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7248 %} 7249 ins_pipe( pipe_slow ); 7250 %} 7251 7252 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7253 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7254 match(Set dst (SubVL src (LoadVector mem))); 7255 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7256 ins_encode %{ 7257 int vector_len = 0; 7258 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7259 %} 7260 ins_pipe( pipe_slow ); 7261 %} 7262 7263 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7264 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7265 match(Set dst (SubVL src1 src2)); 7266 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7267 ins_encode %{ 7268 int vector_len = 1; 7269 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7270 %} 7271 ins_pipe( pipe_slow ); 7272 %} 7273 7274 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7275 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7276 match(Set dst (SubVL src (LoadVector mem))); 7277 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7278 ins_encode %{ 7279 int vector_len = 1; 7280 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7286 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7287 match(Set dst (SubVL src1 src2)); 7288 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7289 ins_encode %{ 7290 int vector_len = 2; 7291 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7297 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7298 match(Set dst (SubVL src (LoadVector mem))); 7299 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7300 ins_encode %{ 7301 int vector_len = 2; 7302 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 // Floats vector sub 7308 instruct vsub2F(vecD dst, vecD src) %{ 7309 predicate(n->as_Vector()->length() == 2); 7310 match(Set dst (SubVF dst src)); 7311 format %{ "subps $dst,$src\t! sub packed2F" %} 7312 ins_encode %{ 7313 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7314 %} 7315 ins_pipe( pipe_slow ); 7316 %} 7317 7318 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7319 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7320 match(Set dst (SubVF src1 src2)); 7321 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7322 ins_encode %{ 7323 int vector_len = 0; 7324 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7325 %} 7326 ins_pipe( pipe_slow ); 7327 %} 7328 7329 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7330 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7331 match(Set dst (SubVF src (LoadVector mem))); 7332 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7333 ins_encode %{ 7334 int vector_len = 0; 7335 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 instruct vsub4F(vecX dst, vecX src) %{ 7341 predicate(n->as_Vector()->length() == 4); 7342 match(Set dst (SubVF dst src)); 7343 format %{ "subps $dst,$src\t! sub packed4F" %} 7344 ins_encode %{ 7345 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7346 %} 7347 ins_pipe( pipe_slow ); 7348 %} 7349 7350 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7351 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7352 match(Set dst (SubVF src1 src2)); 7353 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7354 ins_encode %{ 7355 int vector_len = 0; 7356 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7357 %} 7358 ins_pipe( pipe_slow ); 7359 %} 7360 7361 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7362 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7363 match(Set dst (SubVF src (LoadVector mem))); 7364 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7365 ins_encode %{ 7366 int vector_len = 0; 7367 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7368 %} 7369 ins_pipe( pipe_slow ); 7370 %} 7371 7372 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7374 match(Set dst (SubVF src1 src2)); 7375 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7376 ins_encode %{ 7377 int vector_len = 1; 7378 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7379 %} 7380 ins_pipe( pipe_slow ); 7381 %} 7382 7383 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7384 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7385 match(Set dst (SubVF src (LoadVector mem))); 7386 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7387 ins_encode %{ 7388 int vector_len = 1; 7389 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7390 %} 7391 ins_pipe( pipe_slow ); 7392 %} 7393 7394 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7395 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7396 match(Set dst (SubVF src1 src2)); 7397 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7398 ins_encode %{ 7399 int vector_len = 2; 7400 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7401 %} 7402 ins_pipe( pipe_slow ); 7403 %} 7404 7405 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7406 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7407 match(Set dst (SubVF src (LoadVector mem))); 7408 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7409 ins_encode %{ 7410 int vector_len = 2; 7411 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7412 %} 7413 ins_pipe( pipe_slow ); 7414 %} 7415 7416 // Doubles vector sub 7417 instruct vsub2D(vecX dst, vecX src) %{ 7418 predicate(n->as_Vector()->length() == 2); 7419 match(Set dst (SubVD dst src)); 7420 format %{ "subpd $dst,$src\t! sub packed2D" %} 7421 ins_encode %{ 7422 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7423 %} 7424 ins_pipe( pipe_slow ); 7425 %} 7426 7427 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7428 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7429 match(Set dst (SubVD src1 src2)); 7430 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7431 ins_encode %{ 7432 int vector_len = 0; 7433 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7434 %} 7435 ins_pipe( pipe_slow ); 7436 %} 7437 7438 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7439 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7440 match(Set dst (SubVD src (LoadVector mem))); 7441 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7442 ins_encode %{ 7443 int vector_len = 0; 7444 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7445 %} 7446 ins_pipe( pipe_slow ); 7447 %} 7448 7449 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7450 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7451 match(Set dst (SubVD src1 src2)); 7452 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7453 ins_encode %{ 7454 int vector_len = 1; 7455 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7456 %} 7457 ins_pipe( pipe_slow ); 7458 %} 7459 7460 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7461 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7462 match(Set dst (SubVD src (LoadVector mem))); 7463 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7464 ins_encode %{ 7465 int vector_len = 1; 7466 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7467 %} 7468 ins_pipe( pipe_slow ); 7469 %} 7470 7471 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7472 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7473 match(Set dst (SubVD src1 src2)); 7474 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7475 ins_encode %{ 7476 int vector_len = 2; 7477 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7478 %} 7479 ins_pipe( pipe_slow ); 7480 %} 7481 7482 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7483 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7484 match(Set dst (SubVD src (LoadVector mem))); 7485 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7486 ins_encode %{ 7487 int vector_len = 2; 7488 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7489 %} 7490 ins_pipe( pipe_slow ); 7491 %} 7492 7493 // --------------------------------- MUL -------------------------------------- 7494 7495 // Shorts/Chars vector mul 7496 instruct vmul2S(vecS dst, vecS src) %{ 7497 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7498 match(Set dst (MulVS dst src)); 7499 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7500 ins_encode %{ 7501 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7502 %} 7503 ins_pipe( pipe_slow ); 7504 %} 7505 7506 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7507 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7508 match(Set dst (MulVS src1 src2)); 7509 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7510 ins_encode %{ 7511 int vector_len = 0; 7512 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7513 %} 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7518 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7519 match(Set dst (MulVS src1 src2)); 7520 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7521 ins_encode %{ 7522 int vector_len = 0; 7523 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7524 %} 7525 ins_pipe( pipe_slow ); 7526 %} 7527 7528 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7529 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7530 match(Set dst (MulVS dst src2)); 7531 effect(TEMP src1); 7532 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7533 ins_encode %{ 7534 int vector_len = 0; 7535 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7536 %} 7537 ins_pipe( pipe_slow ); 7538 %} 7539 7540 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7541 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7542 match(Set dst (MulVS src (LoadVector mem))); 7543 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7544 ins_encode %{ 7545 int vector_len = 0; 7546 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7547 %} 7548 ins_pipe( pipe_slow ); 7549 %} 7550 7551 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7552 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7553 match(Set dst (MulVS src (LoadVector mem))); 7554 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7555 ins_encode %{ 7556 int vector_len = 0; 7557 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7558 %} 7559 ins_pipe( pipe_slow ); 7560 %} 7561 7562 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7563 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7564 match(Set dst (MulVS dst (LoadVector mem))); 7565 effect(TEMP src); 7566 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7567 ins_encode %{ 7568 int vector_len = 0; 7569 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7570 %} 7571 ins_pipe( pipe_slow ); 7572 %} 7573 7574 instruct vmul4S(vecD dst, vecD src) %{ 7575 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7576 match(Set dst (MulVS dst src)); 7577 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7578 ins_encode %{ 7579 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7580 %} 7581 ins_pipe( pipe_slow ); 7582 %} 7583 7584 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7585 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7586 match(Set dst (MulVS src1 src2)); 7587 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7588 ins_encode %{ 7589 int vector_len = 0; 7590 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7591 %} 7592 ins_pipe( pipe_slow ); 7593 %} 7594 7595 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7596 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7597 match(Set dst (MulVS src1 src2)); 7598 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7599 ins_encode %{ 7600 int vector_len = 0; 7601 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7602 %} 7603 ins_pipe( pipe_slow ); 7604 %} 7605 7606 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7607 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7608 match(Set dst (MulVS dst src2)); 7609 effect(TEMP src1); 7610 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7611 ins_encode %{ 7612 int vector_len = 0; 7613 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7614 %} 7615 ins_pipe( pipe_slow ); 7616 %} 7617 7618 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7619 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7620 match(Set dst (MulVS src (LoadVector mem))); 7621 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7622 ins_encode %{ 7623 int vector_len = 0; 7624 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7625 %} 7626 ins_pipe( pipe_slow ); 7627 %} 7628 7629 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7630 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7631 match(Set dst (MulVS src (LoadVector mem))); 7632 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7633 ins_encode %{ 7634 int vector_len = 0; 7635 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7636 %} 7637 ins_pipe( pipe_slow ); 7638 %} 7639 7640 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7641 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7642 match(Set dst (MulVS dst (LoadVector mem))); 7643 effect(TEMP src); 7644 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7645 ins_encode %{ 7646 int vector_len = 0; 7647 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7648 %} 7649 ins_pipe( pipe_slow ); 7650 %} 7651 7652 instruct vmul8S(vecX dst, vecX src) %{ 7653 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7654 match(Set dst (MulVS dst src)); 7655 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7656 ins_encode %{ 7657 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7658 %} 7659 ins_pipe( pipe_slow ); 7660 %} 7661 7662 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7663 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7664 match(Set dst (MulVS src1 src2)); 7665 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7666 ins_encode %{ 7667 int vector_len = 0; 7668 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7669 %} 7670 ins_pipe( pipe_slow ); 7671 %} 7672 7673 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7674 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7675 match(Set dst (MulVS src1 src2)); 7676 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7677 ins_encode %{ 7678 int vector_len = 0; 7679 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7680 %} 7681 ins_pipe( pipe_slow ); 7682 %} 7683 7684 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7685 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7686 match(Set dst (MulVS dst src2)); 7687 effect(TEMP src1); 7688 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7689 ins_encode %{ 7690 int vector_len = 0; 7691 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7692 %} 7693 ins_pipe( pipe_slow ); 7694 %} 7695 7696 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7697 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7698 match(Set dst (MulVS src (LoadVector mem))); 7699 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7700 ins_encode %{ 7701 int vector_len = 0; 7702 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7708 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7709 match(Set dst (MulVS src (LoadVector mem))); 7710 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7711 ins_encode %{ 7712 int vector_len = 0; 7713 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7714 %} 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7719 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7720 match(Set dst (MulVS dst (LoadVector mem))); 7721 effect(TEMP src); 7722 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7723 ins_encode %{ 7724 int vector_len = 0; 7725 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7726 %} 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7731 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7732 match(Set dst (MulVS src1 src2)); 7733 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7734 ins_encode %{ 7735 int vector_len = 1; 7736 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7742 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7743 match(Set dst (MulVS src1 src2)); 7744 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7745 ins_encode %{ 7746 int vector_len = 1; 7747 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7748 %} 7749 ins_pipe( pipe_slow ); 7750 %} 7751 7752 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7753 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7754 match(Set dst (MulVS dst src2)); 7755 effect(TEMP src1); 7756 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7757 ins_encode %{ 7758 int vector_len = 1; 7759 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7760 %} 7761 ins_pipe( pipe_slow ); 7762 %} 7763 7764 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7765 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7766 match(Set dst (MulVS src (LoadVector mem))); 7767 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7768 ins_encode %{ 7769 int vector_len = 1; 7770 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 7775 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7776 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7777 match(Set dst (MulVS src (LoadVector mem))); 7778 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7779 ins_encode %{ 7780 int vector_len = 1; 7781 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7782 %} 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7787 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7788 match(Set dst (MulVS dst (LoadVector mem))); 7789 effect(TEMP src); 7790 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7791 ins_encode %{ 7792 int vector_len = 1; 7793 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7794 %} 7795 ins_pipe( pipe_slow ); 7796 %} 7797 7798 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7799 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7800 match(Set dst (MulVS src1 src2)); 7801 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7802 ins_encode %{ 7803 int vector_len = 2; 7804 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7805 %} 7806 ins_pipe( pipe_slow ); 7807 %} 7808 7809 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7810 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7811 match(Set dst (MulVS src (LoadVector mem))); 7812 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7813 ins_encode %{ 7814 int vector_len = 2; 7815 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7816 %} 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 // Integers vector mul (sse4_1) 7821 instruct vmul2I(vecD dst, vecD src) %{ 7822 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7823 match(Set dst (MulVI dst src)); 7824 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7825 ins_encode %{ 7826 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7833 match(Set dst (MulVI src1 src2)); 7834 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7835 ins_encode %{ 7836 int vector_len = 0; 7837 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7838 %} 7839 ins_pipe( pipe_slow ); 7840 %} 7841 7842 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7843 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7844 match(Set dst (MulVI src (LoadVector mem))); 7845 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7846 ins_encode %{ 7847 int vector_len = 0; 7848 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7849 %} 7850 ins_pipe( pipe_slow ); 7851 %} 7852 7853 instruct vmul4I(vecX dst, vecX src) %{ 7854 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7855 match(Set dst (MulVI dst src)); 7856 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7857 ins_encode %{ 7858 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7859 %} 7860 ins_pipe( pipe_slow ); 7861 %} 7862 7863 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7864 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7865 match(Set dst (MulVI src1 src2)); 7866 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7867 ins_encode %{ 7868 int vector_len = 0; 7869 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7870 %} 7871 ins_pipe( pipe_slow ); 7872 %} 7873 7874 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7875 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7876 match(Set dst (MulVI src (LoadVector mem))); 7877 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7878 ins_encode %{ 7879 int vector_len = 0; 7880 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7881 %} 7882 ins_pipe( pipe_slow ); 7883 %} 7884 7885 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7886 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7887 match(Set dst (MulVL src1 src2)); 7888 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7889 ins_encode %{ 7890 int vector_len = 0; 7891 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7892 %} 7893 ins_pipe( pipe_slow ); 7894 %} 7895 7896 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7897 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7898 match(Set dst (MulVL src (LoadVector mem))); 7899 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7900 ins_encode %{ 7901 int vector_len = 0; 7902 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7903 %} 7904 ins_pipe( pipe_slow ); 7905 %} 7906 7907 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7908 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7909 match(Set dst (MulVL src1 src2)); 7910 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7911 ins_encode %{ 7912 int vector_len = 1; 7913 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7914 %} 7915 ins_pipe( pipe_slow ); 7916 %} 7917 7918 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7919 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7920 match(Set dst (MulVL src (LoadVector mem))); 7921 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7922 ins_encode %{ 7923 int vector_len = 1; 7924 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7925 %} 7926 ins_pipe( pipe_slow ); 7927 %} 7928 7929 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7930 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7931 match(Set dst (MulVL src1 src2)); 7932 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7933 ins_encode %{ 7934 int vector_len = 2; 7935 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7936 %} 7937 ins_pipe( pipe_slow ); 7938 %} 7939 7940 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7941 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7942 match(Set dst (MulVL src (LoadVector mem))); 7943 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7944 ins_encode %{ 7945 int vector_len = 2; 7946 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7947 %} 7948 ins_pipe( pipe_slow ); 7949 %} 7950 7951 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7952 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7953 match(Set dst (MulVI src1 src2)); 7954 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7955 ins_encode %{ 7956 int vector_len = 1; 7957 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7958 %} 7959 ins_pipe( pipe_slow ); 7960 %} 7961 7962 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7963 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7964 match(Set dst (MulVI src (LoadVector mem))); 7965 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7966 ins_encode %{ 7967 int vector_len = 1; 7968 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7969 %} 7970 ins_pipe( pipe_slow ); 7971 %} 7972 7973 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7974 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7975 match(Set dst (MulVI src1 src2)); 7976 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7977 ins_encode %{ 7978 int vector_len = 2; 7979 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7980 %} 7981 ins_pipe( pipe_slow ); 7982 %} 7983 7984 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7985 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7986 match(Set dst (MulVI src (LoadVector mem))); 7987 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7988 ins_encode %{ 7989 int vector_len = 2; 7990 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7991 %} 7992 ins_pipe( pipe_slow ); 7993 %} 7994 7995 // Floats vector mul 7996 instruct vmul2F(vecD dst, vecD src) %{ 7997 predicate(n->as_Vector()->length() == 2); 7998 match(Set dst (MulVF dst src)); 7999 format %{ "mulps $dst,$src\t! mul packed2F" %} 8000 ins_encode %{ 8001 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8002 %} 8003 ins_pipe( pipe_slow ); 8004 %} 8005 8006 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8007 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8008 match(Set dst (MulVF src1 src2)); 8009 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8010 ins_encode %{ 8011 int vector_len = 0; 8012 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8013 %} 8014 ins_pipe( pipe_slow ); 8015 %} 8016 8017 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8018 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8019 match(Set dst (MulVF src (LoadVector mem))); 8020 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8021 ins_encode %{ 8022 int vector_len = 0; 8023 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8024 %} 8025 ins_pipe( pipe_slow ); 8026 %} 8027 8028 instruct vmul4F(vecX dst, vecX src) %{ 8029 predicate(n->as_Vector()->length() == 4); 8030 match(Set dst (MulVF dst src)); 8031 format %{ "mulps $dst,$src\t! mul packed4F" %} 8032 ins_encode %{ 8033 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8034 %} 8035 ins_pipe( pipe_slow ); 8036 %} 8037 8038 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8039 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8040 match(Set dst (MulVF src1 src2)); 8041 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8042 ins_encode %{ 8043 int vector_len = 0; 8044 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8045 %} 8046 ins_pipe( pipe_slow ); 8047 %} 8048 8049 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8050 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8051 match(Set dst (MulVF src (LoadVector mem))); 8052 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8053 ins_encode %{ 8054 int vector_len = 0; 8055 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8056 %} 8057 ins_pipe( pipe_slow ); 8058 %} 8059 8060 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8061 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8062 match(Set dst (MulVF src1 src2)); 8063 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8064 ins_encode %{ 8065 int vector_len = 1; 8066 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8067 %} 8068 ins_pipe( pipe_slow ); 8069 %} 8070 8071 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8072 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8073 match(Set dst (MulVF src (LoadVector mem))); 8074 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8075 ins_encode %{ 8076 int vector_len = 1; 8077 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8078 %} 8079 ins_pipe( pipe_slow ); 8080 %} 8081 8082 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8083 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8084 match(Set dst (MulVF src1 src2)); 8085 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8086 ins_encode %{ 8087 int vector_len = 2; 8088 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8089 %} 8090 ins_pipe( pipe_slow ); 8091 %} 8092 8093 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8094 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8095 match(Set dst (MulVF src (LoadVector mem))); 8096 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8097 ins_encode %{ 8098 int vector_len = 2; 8099 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8100 %} 8101 ins_pipe( pipe_slow ); 8102 %} 8103 8104 // Doubles vector mul 8105 instruct vmul2D(vecX dst, vecX src) %{ 8106 predicate(n->as_Vector()->length() == 2); 8107 match(Set dst (MulVD dst src)); 8108 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8109 ins_encode %{ 8110 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8111 %} 8112 ins_pipe( pipe_slow ); 8113 %} 8114 8115 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8116 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8117 match(Set dst (MulVD src1 src2)); 8118 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8119 ins_encode %{ 8120 int vector_len = 0; 8121 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8127 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8128 match(Set dst (MulVD src (LoadVector mem))); 8129 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8130 ins_encode %{ 8131 int vector_len = 0; 8132 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8133 %} 8134 ins_pipe( pipe_slow ); 8135 %} 8136 8137 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8138 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8139 match(Set dst (MulVD src1 src2)); 8140 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8141 ins_encode %{ 8142 int vector_len = 1; 8143 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8144 %} 8145 ins_pipe( pipe_slow ); 8146 %} 8147 8148 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8149 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8150 match(Set dst (MulVD src (LoadVector mem))); 8151 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8152 ins_encode %{ 8153 int vector_len = 1; 8154 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8155 %} 8156 ins_pipe( pipe_slow ); 8157 %} 8158 8159 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8160 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8161 match(Set dst (MulVD src1 src2)); 8162 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8163 ins_encode %{ 8164 int vector_len = 2; 8165 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8166 %} 8167 ins_pipe( pipe_slow ); 8168 %} 8169 8170 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8171 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8172 match(Set dst (MulVD src (LoadVector mem))); 8173 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8174 ins_encode %{ 8175 int vector_len = 2; 8176 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8177 %} 8178 ins_pipe( pipe_slow ); 8179 %} 8180 8181 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8182 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 8183 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 8184 effect(TEMP dst, USE src1, USE src2); 8185 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 8186 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 8187 %} 8188 ins_encode %{ 8189 int vector_len = 1; 8190 int cond = (Assembler::Condition)($copnd$$cmpcode); 8191 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8192 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8193 %} 8194 ins_pipe( pipe_slow ); 8195 %} 8196 8197 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8198 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8199 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8200 effect(TEMP dst, USE src1, USE src2); 8201 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8202 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8203 %} 8204 ins_encode %{ 8205 int vector_len = 1; 8206 int cond = (Assembler::Condition)($copnd$$cmpcode); 8207 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8208 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8209 %} 8210 ins_pipe( pipe_slow ); 8211 %} 8212 8213 // --------------------------------- DIV -------------------------------------- 8214 8215 // Floats vector div 8216 instruct vdiv2F(vecD dst, vecD src) %{ 8217 predicate(n->as_Vector()->length() == 2); 8218 match(Set dst (DivVF dst src)); 8219 format %{ "divps $dst,$src\t! div packed2F" %} 8220 ins_encode %{ 8221 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8222 %} 8223 ins_pipe( pipe_slow ); 8224 %} 8225 8226 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8227 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8228 match(Set dst (DivVF src1 src2)); 8229 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8230 ins_encode %{ 8231 int vector_len = 0; 8232 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8233 %} 8234 ins_pipe( pipe_slow ); 8235 %} 8236 8237 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8238 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8239 match(Set dst (DivVF src (LoadVector mem))); 8240 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8241 ins_encode %{ 8242 int vector_len = 0; 8243 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8244 %} 8245 ins_pipe( pipe_slow ); 8246 %} 8247 8248 instruct vdiv4F(vecX dst, vecX src) %{ 8249 predicate(n->as_Vector()->length() == 4); 8250 match(Set dst (DivVF dst src)); 8251 format %{ "divps $dst,$src\t! div packed4F" %} 8252 ins_encode %{ 8253 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8254 %} 8255 ins_pipe( pipe_slow ); 8256 %} 8257 8258 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8259 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8260 match(Set dst (DivVF src1 src2)); 8261 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8262 ins_encode %{ 8263 int vector_len = 0; 8264 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8265 %} 8266 ins_pipe( pipe_slow ); 8267 %} 8268 8269 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8270 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8271 match(Set dst (DivVF src (LoadVector mem))); 8272 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8273 ins_encode %{ 8274 int vector_len = 0; 8275 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8276 %} 8277 ins_pipe( pipe_slow ); 8278 %} 8279 8280 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8281 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8282 match(Set dst (DivVF src1 src2)); 8283 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8284 ins_encode %{ 8285 int vector_len = 1; 8286 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8287 %} 8288 ins_pipe( pipe_slow ); 8289 %} 8290 8291 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8292 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8293 match(Set dst (DivVF src (LoadVector mem))); 8294 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8295 ins_encode %{ 8296 int vector_len = 1; 8297 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8298 %} 8299 ins_pipe( pipe_slow ); 8300 %} 8301 8302 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8303 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8304 match(Set dst (DivVF src1 src2)); 8305 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8306 ins_encode %{ 8307 int vector_len = 2; 8308 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8309 %} 8310 ins_pipe( pipe_slow ); 8311 %} 8312 8313 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8314 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8315 match(Set dst (DivVF src (LoadVector mem))); 8316 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8317 ins_encode %{ 8318 int vector_len = 2; 8319 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8320 %} 8321 ins_pipe( pipe_slow ); 8322 %} 8323 8324 // Doubles vector div 8325 instruct vdiv2D(vecX dst, vecX src) %{ 8326 predicate(n->as_Vector()->length() == 2); 8327 match(Set dst (DivVD dst src)); 8328 format %{ "divpd $dst,$src\t! div packed2D" %} 8329 ins_encode %{ 8330 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8331 %} 8332 ins_pipe( pipe_slow ); 8333 %} 8334 8335 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8336 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8337 match(Set dst (DivVD src1 src2)); 8338 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8339 ins_encode %{ 8340 int vector_len = 0; 8341 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8342 %} 8343 ins_pipe( pipe_slow ); 8344 %} 8345 8346 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8347 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8348 match(Set dst (DivVD src (LoadVector mem))); 8349 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8350 ins_encode %{ 8351 int vector_len = 0; 8352 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8353 %} 8354 ins_pipe( pipe_slow ); 8355 %} 8356 8357 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8358 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8359 match(Set dst (DivVD src1 src2)); 8360 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8361 ins_encode %{ 8362 int vector_len = 1; 8363 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8364 %} 8365 ins_pipe( pipe_slow ); 8366 %} 8367 8368 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8369 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8370 match(Set dst (DivVD src (LoadVector mem))); 8371 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8372 ins_encode %{ 8373 int vector_len = 1; 8374 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8375 %} 8376 ins_pipe( pipe_slow ); 8377 %} 8378 8379 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8380 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8381 match(Set dst (DivVD src1 src2)); 8382 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8383 ins_encode %{ 8384 int vector_len = 2; 8385 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8386 %} 8387 ins_pipe( pipe_slow ); 8388 %} 8389 8390 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8391 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8392 match(Set dst (DivVD src (LoadVector mem))); 8393 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8394 ins_encode %{ 8395 int vector_len = 2; 8396 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8397 %} 8398 ins_pipe( pipe_slow ); 8399 %} 8400 8401 // ------------------------------ Shift --------------------------------------- 8402 8403 // Left and right shift count vectors are the same on x86 8404 // (only lowest bits of xmm reg are used for count). 8405 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8406 match(Set dst (LShiftCntV cnt)); 8407 match(Set dst (RShiftCntV cnt)); 8408 format %{ "movd $dst,$cnt\t! load shift count" %} 8409 ins_encode %{ 8410 __ movdl($dst$$XMMRegister, $cnt$$Register); 8411 %} 8412 ins_pipe( pipe_slow ); 8413 %} 8414 8415 // --------------------------------- Sqrt -------------------------------------- 8416 8417 // Floating point vector sqrt 8418 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8419 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8420 match(Set dst (SqrtVD src)); 8421 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8422 ins_encode %{ 8423 int vector_len = 0; 8424 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8425 %} 8426 ins_pipe( pipe_slow ); 8427 %} 8428 8429 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8430 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8431 match(Set dst (SqrtVD (LoadVector mem))); 8432 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8433 ins_encode %{ 8434 int vector_len = 0; 8435 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8436 %} 8437 ins_pipe( pipe_slow ); 8438 %} 8439 8440 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8441 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8442 match(Set dst (SqrtVD src)); 8443 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8444 ins_encode %{ 8445 int vector_len = 1; 8446 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8447 %} 8448 ins_pipe( pipe_slow ); 8449 %} 8450 8451 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8452 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8453 match(Set dst (SqrtVD (LoadVector mem))); 8454 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8455 ins_encode %{ 8456 int vector_len = 1; 8457 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8458 %} 8459 ins_pipe( pipe_slow ); 8460 %} 8461 8462 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8463 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8464 match(Set dst (SqrtVD src)); 8465 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8466 ins_encode %{ 8467 int vector_len = 2; 8468 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8469 %} 8470 ins_pipe( pipe_slow ); 8471 %} 8472 8473 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8474 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8475 match(Set dst (SqrtVD (LoadVector mem))); 8476 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8477 ins_encode %{ 8478 int vector_len = 2; 8479 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8480 %} 8481 ins_pipe( pipe_slow ); 8482 %} 8483 8484 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8485 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8486 match(Set dst (SqrtVF src)); 8487 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8488 ins_encode %{ 8489 int vector_len = 0; 8490 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8491 %} 8492 ins_pipe( pipe_slow ); 8493 %} 8494 8495 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8496 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8497 match(Set dst (SqrtVF (LoadVector mem))); 8498 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8499 ins_encode %{ 8500 int vector_len = 0; 8501 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8502 %} 8503 ins_pipe( pipe_slow ); 8504 %} 8505 8506 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8507 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8508 match(Set dst (SqrtVF src)); 8509 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8510 ins_encode %{ 8511 int vector_len = 0; 8512 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8513 %} 8514 ins_pipe( pipe_slow ); 8515 %} 8516 8517 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8518 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8519 match(Set dst (SqrtVF (LoadVector mem))); 8520 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8521 ins_encode %{ 8522 int vector_len = 0; 8523 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8524 %} 8525 ins_pipe( pipe_slow ); 8526 %} 8527 8528 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8529 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8530 match(Set dst (SqrtVF src)); 8531 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8532 ins_encode %{ 8533 int vector_len = 1; 8534 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8535 %} 8536 ins_pipe( pipe_slow ); 8537 %} 8538 8539 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8540 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8541 match(Set dst (SqrtVF (LoadVector mem))); 8542 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8543 ins_encode %{ 8544 int vector_len = 1; 8545 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8546 %} 8547 ins_pipe( pipe_slow ); 8548 %} 8549 8550 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8551 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8552 match(Set dst (SqrtVF src)); 8553 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8554 ins_encode %{ 8555 int vector_len = 2; 8556 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8557 %} 8558 ins_pipe( pipe_slow ); 8559 %} 8560 8561 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8562 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8563 match(Set dst (SqrtVF (LoadVector mem))); 8564 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8565 ins_encode %{ 8566 int vector_len = 2; 8567 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8568 %} 8569 ins_pipe( pipe_slow ); 8570 %} 8571 8572 // ------------------------------ LeftShift ----------------------------------- 8573 8574 // Shorts/Chars vector left shift 8575 instruct vsll2S(vecS dst, vecS shift) %{ 8576 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8577 match(Set dst (LShiftVS dst shift)); 8578 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8579 ins_encode %{ 8580 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8581 %} 8582 ins_pipe( pipe_slow ); 8583 %} 8584 8585 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8586 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8587 match(Set dst (LShiftVS dst shift)); 8588 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8589 ins_encode %{ 8590 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8591 %} 8592 ins_pipe( pipe_slow ); 8593 %} 8594 8595 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8596 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8597 match(Set dst (LShiftVS src shift)); 8598 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8599 ins_encode %{ 8600 int vector_len = 0; 8601 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8602 %} 8603 ins_pipe( pipe_slow ); 8604 %} 8605 8606 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8607 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8608 match(Set dst (LShiftVS src shift)); 8609 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8610 ins_encode %{ 8611 int vector_len = 0; 8612 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8613 %} 8614 ins_pipe( pipe_slow ); 8615 %} 8616 8617 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8618 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8619 match(Set dst (LShiftVS dst shift)); 8620 effect(TEMP src); 8621 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8622 ins_encode %{ 8623 int vector_len = 0; 8624 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8625 %} 8626 ins_pipe( pipe_slow ); 8627 %} 8628 8629 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8630 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8631 match(Set dst (LShiftVS src shift)); 8632 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8633 ins_encode %{ 8634 int vector_len = 0; 8635 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8636 %} 8637 ins_pipe( pipe_slow ); 8638 %} 8639 8640 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8641 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8642 match(Set dst (LShiftVS src shift)); 8643 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8644 ins_encode %{ 8645 int vector_len = 0; 8646 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8647 %} 8648 ins_pipe( pipe_slow ); 8649 %} 8650 8651 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8652 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8653 match(Set dst (LShiftVS dst shift)); 8654 effect(TEMP src); 8655 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8656 ins_encode %{ 8657 int vector_len = 0; 8658 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8659 %} 8660 ins_pipe( pipe_slow ); 8661 %} 8662 8663 instruct vsll4S(vecD dst, vecS shift) %{ 8664 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8665 match(Set dst (LShiftVS dst shift)); 8666 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8667 ins_encode %{ 8668 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8669 %} 8670 ins_pipe( pipe_slow ); 8671 %} 8672 8673 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8674 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8675 match(Set dst (LShiftVS dst shift)); 8676 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8677 ins_encode %{ 8678 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8679 %} 8680 ins_pipe( pipe_slow ); 8681 %} 8682 8683 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8684 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8685 match(Set dst (LShiftVS src shift)); 8686 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8687 ins_encode %{ 8688 int vector_len = 0; 8689 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8690 %} 8691 ins_pipe( pipe_slow ); 8692 %} 8693 8694 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8695 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8696 match(Set dst (LShiftVS src shift)); 8697 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8698 ins_encode %{ 8699 int vector_len = 0; 8700 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8701 %} 8702 ins_pipe( pipe_slow ); 8703 %} 8704 8705 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8706 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8707 match(Set dst (LShiftVS dst shift)); 8708 effect(TEMP src); 8709 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8710 ins_encode %{ 8711 int vector_len = 0; 8712 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8718 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8719 match(Set dst (LShiftVS src shift)); 8720 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8721 ins_encode %{ 8722 int vector_len = 0; 8723 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8724 %} 8725 ins_pipe( pipe_slow ); 8726 %} 8727 8728 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8729 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8730 match(Set dst (LShiftVS src shift)); 8731 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8732 ins_encode %{ 8733 int vector_len = 0; 8734 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8735 %} 8736 ins_pipe( pipe_slow ); 8737 %} 8738 8739 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8740 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8741 match(Set dst (LShiftVS dst shift)); 8742 effect(TEMP src); 8743 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8744 ins_encode %{ 8745 int vector_len = 0; 8746 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8747 %} 8748 ins_pipe( pipe_slow ); 8749 %} 8750 8751 instruct vsll8S(vecX dst, vecS shift) %{ 8752 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8753 match(Set dst (LShiftVS dst shift)); 8754 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8755 ins_encode %{ 8756 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8757 %} 8758 ins_pipe( pipe_slow ); 8759 %} 8760 8761 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8762 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8763 match(Set dst (LShiftVS dst shift)); 8764 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8765 ins_encode %{ 8766 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8767 %} 8768 ins_pipe( pipe_slow ); 8769 %} 8770 8771 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8772 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8773 match(Set dst (LShiftVS src shift)); 8774 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8775 ins_encode %{ 8776 int vector_len = 0; 8777 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8778 %} 8779 ins_pipe( pipe_slow ); 8780 %} 8781 8782 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8783 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8784 match(Set dst (LShiftVS src shift)); 8785 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8786 ins_encode %{ 8787 int vector_len = 0; 8788 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8789 %} 8790 ins_pipe( pipe_slow ); 8791 %} 8792 8793 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8794 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8795 match(Set dst (LShiftVS dst shift)); 8796 effect(TEMP src); 8797 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8798 ins_encode %{ 8799 int vector_len = 0; 8800 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8801 %} 8802 ins_pipe( pipe_slow ); 8803 %} 8804 8805 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8806 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8807 match(Set dst (LShiftVS src shift)); 8808 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8809 ins_encode %{ 8810 int vector_len = 0; 8811 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8812 %} 8813 ins_pipe( pipe_slow ); 8814 %} 8815 8816 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8817 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8818 match(Set dst (LShiftVS src shift)); 8819 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8820 ins_encode %{ 8821 int vector_len = 0; 8822 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8823 %} 8824 ins_pipe( pipe_slow ); 8825 %} 8826 8827 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8828 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8829 match(Set dst (LShiftVS dst shift)); 8830 effect(TEMP src); 8831 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8832 ins_encode %{ 8833 int vector_len = 0; 8834 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8835 %} 8836 ins_pipe( pipe_slow ); 8837 %} 8838 8839 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8840 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8841 match(Set dst (LShiftVS src shift)); 8842 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8843 ins_encode %{ 8844 int vector_len = 1; 8845 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8846 %} 8847 ins_pipe( pipe_slow ); 8848 %} 8849 8850 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 8851 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8852 match(Set dst (LShiftVS src shift)); 8853 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8854 ins_encode %{ 8855 int vector_len = 1; 8856 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8857 %} 8858 ins_pipe( pipe_slow ); 8859 %} 8860 8861 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 8862 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8863 match(Set dst (LShiftVS dst shift)); 8864 effect(TEMP src); 8865 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8866 ins_encode %{ 8867 int vector_len = 1; 8868 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8869 %} 8870 ins_pipe( pipe_slow ); 8871 %} 8872 8873 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 8874 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8875 match(Set dst (LShiftVS src shift)); 8876 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8877 ins_encode %{ 8878 int vector_len = 1; 8879 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8880 %} 8881 ins_pipe( pipe_slow ); 8882 %} 8883 8884 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 8885 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8886 match(Set dst (LShiftVS src shift)); 8887 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8888 ins_encode %{ 8889 int vector_len = 1; 8890 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8891 %} 8892 ins_pipe( pipe_slow ); 8893 %} 8894 8895 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 8896 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8897 match(Set dst (LShiftVS dst shift)); 8898 effect(TEMP src); 8899 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8900 ins_encode %{ 8901 int vector_len = 1; 8902 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8903 %} 8904 ins_pipe( pipe_slow ); 8905 %} 8906 8907 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8908 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8909 match(Set dst (LShiftVS src shift)); 8910 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8911 ins_encode %{ 8912 int vector_len = 2; 8913 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8914 %} 8915 ins_pipe( pipe_slow ); 8916 %} 8917 8918 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8919 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8920 match(Set dst (LShiftVS src shift)); 8921 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8922 ins_encode %{ 8923 int vector_len = 2; 8924 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8925 %} 8926 ins_pipe( pipe_slow ); 8927 %} 8928 8929 // Integers vector left shift 8930 instruct vsll2I(vecD dst, vecS shift) %{ 8931 predicate(n->as_Vector()->length() == 2); 8932 match(Set dst (LShiftVI dst shift)); 8933 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8934 ins_encode %{ 8935 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8936 %} 8937 ins_pipe( pipe_slow ); 8938 %} 8939 8940 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8941 predicate(n->as_Vector()->length() == 2); 8942 match(Set dst (LShiftVI dst shift)); 8943 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8944 ins_encode %{ 8945 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8946 %} 8947 ins_pipe( pipe_slow ); 8948 %} 8949 8950 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8951 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8952 match(Set dst (LShiftVI src shift)); 8953 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8954 ins_encode %{ 8955 int vector_len = 0; 8956 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8957 %} 8958 ins_pipe( pipe_slow ); 8959 %} 8960 8961 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8962 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8963 match(Set dst (LShiftVI src shift)); 8964 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8965 ins_encode %{ 8966 int vector_len = 0; 8967 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8968 %} 8969 ins_pipe( pipe_slow ); 8970 %} 8971 8972 instruct vsll4I(vecX dst, vecS shift) %{ 8973 predicate(n->as_Vector()->length() == 4); 8974 match(Set dst (LShiftVI dst shift)); 8975 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8976 ins_encode %{ 8977 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8978 %} 8979 ins_pipe( pipe_slow ); 8980 %} 8981 8982 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8983 predicate(n->as_Vector()->length() == 4); 8984 match(Set dst (LShiftVI dst shift)); 8985 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8986 ins_encode %{ 8987 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8988 %} 8989 ins_pipe( pipe_slow ); 8990 %} 8991 8992 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8993 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8994 match(Set dst (LShiftVI src shift)); 8995 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8996 ins_encode %{ 8997 int vector_len = 0; 8998 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8999 %} 9000 ins_pipe( pipe_slow ); 9001 %} 9002 9003 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9004 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9005 match(Set dst (LShiftVI src shift)); 9006 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9007 ins_encode %{ 9008 int vector_len = 0; 9009 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9010 %} 9011 ins_pipe( pipe_slow ); 9012 %} 9013 9014 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9015 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9016 match(Set dst (LShiftVI src shift)); 9017 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9018 ins_encode %{ 9019 int vector_len = 1; 9020 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9021 %} 9022 ins_pipe( pipe_slow ); 9023 %} 9024 9025 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9026 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9027 match(Set dst (LShiftVI src shift)); 9028 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9029 ins_encode %{ 9030 int vector_len = 1; 9031 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9032 %} 9033 ins_pipe( pipe_slow ); 9034 %} 9035 9036 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9037 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9038 match(Set dst (LShiftVI src shift)); 9039 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9040 ins_encode %{ 9041 int vector_len = 2; 9042 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9043 %} 9044 ins_pipe( pipe_slow ); 9045 %} 9046 9047 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9048 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9049 match(Set dst (LShiftVI src shift)); 9050 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9051 ins_encode %{ 9052 int vector_len = 2; 9053 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9054 %} 9055 ins_pipe( pipe_slow ); 9056 %} 9057 9058 // Longs vector left shift 9059 instruct vsll2L(vecX dst, vecS shift) %{ 9060 predicate(n->as_Vector()->length() == 2); 9061 match(Set dst (LShiftVL dst shift)); 9062 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9063 ins_encode %{ 9064 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9065 %} 9066 ins_pipe( pipe_slow ); 9067 %} 9068 9069 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9070 predicate(n->as_Vector()->length() == 2); 9071 match(Set dst (LShiftVL dst shift)); 9072 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9073 ins_encode %{ 9074 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9075 %} 9076 ins_pipe( pipe_slow ); 9077 %} 9078 9079 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9080 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9081 match(Set dst (LShiftVL src shift)); 9082 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9083 ins_encode %{ 9084 int vector_len = 0; 9085 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9086 %} 9087 ins_pipe( pipe_slow ); 9088 %} 9089 9090 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9091 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9092 match(Set dst (LShiftVL src shift)); 9093 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9094 ins_encode %{ 9095 int vector_len = 0; 9096 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9097 %} 9098 ins_pipe( pipe_slow ); 9099 %} 9100 9101 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9102 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9103 match(Set dst (LShiftVL src shift)); 9104 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9105 ins_encode %{ 9106 int vector_len = 1; 9107 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9108 %} 9109 ins_pipe( pipe_slow ); 9110 %} 9111 9112 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9113 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9114 match(Set dst (LShiftVL src shift)); 9115 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9116 ins_encode %{ 9117 int vector_len = 1; 9118 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9119 %} 9120 ins_pipe( pipe_slow ); 9121 %} 9122 9123 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9124 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9125 match(Set dst (LShiftVL src shift)); 9126 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9127 ins_encode %{ 9128 int vector_len = 2; 9129 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9130 %} 9131 ins_pipe( pipe_slow ); 9132 %} 9133 9134 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9135 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9136 match(Set dst (LShiftVL src shift)); 9137 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9138 ins_encode %{ 9139 int vector_len = 2; 9140 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9141 %} 9142 ins_pipe( pipe_slow ); 9143 %} 9144 9145 // ----------------------- LogicalRightShift ----------------------------------- 9146 9147 // Shorts vector logical right shift produces incorrect Java result 9148 // for negative data because java code convert short value into int with 9149 // sign extension before a shift. But char vectors are fine since chars are 9150 // unsigned values. 9151 9152 instruct vsrl2S(vecS dst, vecS shift) %{ 9153 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9154 match(Set dst (URShiftVS dst shift)); 9155 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9156 ins_encode %{ 9157 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9158 %} 9159 ins_pipe( pipe_slow ); 9160 %} 9161 9162 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9163 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9164 match(Set dst (URShiftVS dst shift)); 9165 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9166 ins_encode %{ 9167 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9168 %} 9169 ins_pipe( pipe_slow ); 9170 %} 9171 9172 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9173 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9174 match(Set dst (URShiftVS src shift)); 9175 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9176 ins_encode %{ 9177 int vector_len = 0; 9178 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9179 %} 9180 ins_pipe( pipe_slow ); 9181 %} 9182 9183 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9184 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9185 match(Set dst (URShiftVS src shift)); 9186 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9187 ins_encode %{ 9188 int vector_len = 0; 9189 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9190 %} 9191 ins_pipe( pipe_slow ); 9192 %} 9193 9194 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9195 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9196 match(Set dst (URShiftVS dst shift)); 9197 effect(TEMP src); 9198 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9199 ins_encode %{ 9200 int vector_len = 0; 9201 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9207 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9208 match(Set dst (URShiftVS src shift)); 9209 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9210 ins_encode %{ 9211 int vector_len = 0; 9212 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9213 %} 9214 ins_pipe( pipe_slow ); 9215 %} 9216 9217 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9218 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9219 match(Set dst (URShiftVS src shift)); 9220 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9221 ins_encode %{ 9222 int vector_len = 0; 9223 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9224 %} 9225 ins_pipe( pipe_slow ); 9226 %} 9227 9228 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9229 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9230 match(Set dst (URShiftVS dst shift)); 9231 effect(TEMP src); 9232 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9233 ins_encode %{ 9234 int vector_len = 0; 9235 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9236 %} 9237 ins_pipe( pipe_slow ); 9238 %} 9239 9240 instruct vsrl4S(vecD dst, vecS shift) %{ 9241 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9242 match(Set dst (URShiftVS dst shift)); 9243 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9244 ins_encode %{ 9245 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9251 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9252 match(Set dst (URShiftVS dst shift)); 9253 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9254 ins_encode %{ 9255 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9256 %} 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9261 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9262 match(Set dst (URShiftVS src shift)); 9263 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9264 ins_encode %{ 9265 int vector_len = 0; 9266 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9272 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9273 match(Set dst (URShiftVS src shift)); 9274 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9275 ins_encode %{ 9276 int vector_len = 0; 9277 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9278 %} 9279 ins_pipe( pipe_slow ); 9280 %} 9281 9282 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9283 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9284 match(Set dst (URShiftVS dst shift)); 9285 effect(TEMP src); 9286 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9287 ins_encode %{ 9288 int vector_len = 0; 9289 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9290 %} 9291 ins_pipe( pipe_slow ); 9292 %} 9293 9294 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9295 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9296 match(Set dst (URShiftVS src shift)); 9297 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9298 ins_encode %{ 9299 int vector_len = 0; 9300 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9301 %} 9302 ins_pipe( pipe_slow ); 9303 %} 9304 9305 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9306 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9307 match(Set dst (URShiftVS src shift)); 9308 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9309 ins_encode %{ 9310 int vector_len = 0; 9311 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9312 %} 9313 ins_pipe( pipe_slow ); 9314 %} 9315 9316 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9317 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9318 match(Set dst (URShiftVS dst shift)); 9319 effect(TEMP src); 9320 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9321 ins_encode %{ 9322 int vector_len = 0; 9323 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9324 %} 9325 ins_pipe( pipe_slow ); 9326 %} 9327 9328 instruct vsrl8S(vecX dst, vecS shift) %{ 9329 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9330 match(Set dst (URShiftVS dst shift)); 9331 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9332 ins_encode %{ 9333 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9334 %} 9335 ins_pipe( pipe_slow ); 9336 %} 9337 9338 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9339 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9340 match(Set dst (URShiftVS dst shift)); 9341 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9342 ins_encode %{ 9343 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9344 %} 9345 ins_pipe( pipe_slow ); 9346 %} 9347 9348 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9349 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9350 match(Set dst (URShiftVS src shift)); 9351 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9352 ins_encode %{ 9353 int vector_len = 0; 9354 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9355 %} 9356 ins_pipe( pipe_slow ); 9357 %} 9358 9359 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9360 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9361 match(Set dst (URShiftVS src shift)); 9362 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9363 ins_encode %{ 9364 int vector_len = 0; 9365 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9366 %} 9367 ins_pipe( pipe_slow ); 9368 %} 9369 9370 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9371 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9372 match(Set dst (URShiftVS dst shift)); 9373 effect(TEMP src); 9374 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9375 ins_encode %{ 9376 int vector_len = 0; 9377 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9378 %} 9379 ins_pipe( pipe_slow ); 9380 %} 9381 9382 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9383 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9384 match(Set dst (URShiftVS src shift)); 9385 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9386 ins_encode %{ 9387 int vector_len = 0; 9388 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9389 %} 9390 ins_pipe( pipe_slow ); 9391 %} 9392 9393 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9394 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9395 match(Set dst (URShiftVS src shift)); 9396 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9397 ins_encode %{ 9398 int vector_len = 0; 9399 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9400 %} 9401 ins_pipe( pipe_slow ); 9402 %} 9403 9404 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9405 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9406 match(Set dst (URShiftVS dst shift)); 9407 effect(TEMP src); 9408 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9409 ins_encode %{ 9410 int vector_len = 0; 9411 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9412 %} 9413 ins_pipe( pipe_slow ); 9414 %} 9415 9416 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9417 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9418 match(Set dst (URShiftVS src shift)); 9419 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9420 ins_encode %{ 9421 int vector_len = 1; 9422 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9423 %} 9424 ins_pipe( pipe_slow ); 9425 %} 9426 9427 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9428 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9429 match(Set dst (URShiftVS src shift)); 9430 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9431 ins_encode %{ 9432 int vector_len = 1; 9433 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9434 %} 9435 ins_pipe( pipe_slow ); 9436 %} 9437 9438 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9439 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9440 match(Set dst (URShiftVS dst shift)); 9441 effect(TEMP src); 9442 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9443 ins_encode %{ 9444 int vector_len = 1; 9445 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9446 %} 9447 ins_pipe( pipe_slow ); 9448 %} 9449 9450 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9451 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9452 match(Set dst (URShiftVS src shift)); 9453 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9454 ins_encode %{ 9455 int vector_len = 1; 9456 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9457 %} 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9462 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9463 match(Set dst (URShiftVS src shift)); 9464 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9465 ins_encode %{ 9466 int vector_len = 1; 9467 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9468 %} 9469 ins_pipe( pipe_slow ); 9470 %} 9471 9472 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9473 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9474 match(Set dst (URShiftVS dst shift)); 9475 effect(TEMP src); 9476 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9477 ins_encode %{ 9478 int vector_len = 1; 9479 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9480 %} 9481 ins_pipe( pipe_slow ); 9482 %} 9483 9484 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9485 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9486 match(Set dst (URShiftVS src shift)); 9487 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9488 ins_encode %{ 9489 int vector_len = 2; 9490 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9491 %} 9492 ins_pipe( pipe_slow ); 9493 %} 9494 9495 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9496 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9497 match(Set dst (URShiftVS src shift)); 9498 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9499 ins_encode %{ 9500 int vector_len = 2; 9501 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9502 %} 9503 ins_pipe( pipe_slow ); 9504 %} 9505 9506 // Integers vector logical right shift 9507 instruct vsrl2I(vecD dst, vecS shift) %{ 9508 predicate(n->as_Vector()->length() == 2); 9509 match(Set dst (URShiftVI dst shift)); 9510 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9511 ins_encode %{ 9512 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9513 %} 9514 ins_pipe( pipe_slow ); 9515 %} 9516 9517 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9518 predicate(n->as_Vector()->length() == 2); 9519 match(Set dst (URShiftVI dst shift)); 9520 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9521 ins_encode %{ 9522 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9523 %} 9524 ins_pipe( pipe_slow ); 9525 %} 9526 9527 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9528 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9529 match(Set dst (URShiftVI src shift)); 9530 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9531 ins_encode %{ 9532 int vector_len = 0; 9533 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9534 %} 9535 ins_pipe( pipe_slow ); 9536 %} 9537 9538 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9539 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9540 match(Set dst (URShiftVI src shift)); 9541 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9542 ins_encode %{ 9543 int vector_len = 0; 9544 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct vsrl4I(vecX dst, vecS shift) %{ 9550 predicate(n->as_Vector()->length() == 4); 9551 match(Set dst (URShiftVI dst shift)); 9552 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9553 ins_encode %{ 9554 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9555 %} 9556 ins_pipe( pipe_slow ); 9557 %} 9558 9559 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9560 predicate(n->as_Vector()->length() == 4); 9561 match(Set dst (URShiftVI dst shift)); 9562 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9563 ins_encode %{ 9564 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9565 %} 9566 ins_pipe( pipe_slow ); 9567 %} 9568 9569 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9570 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9571 match(Set dst (URShiftVI src shift)); 9572 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9573 ins_encode %{ 9574 int vector_len = 0; 9575 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9576 %} 9577 ins_pipe( pipe_slow ); 9578 %} 9579 9580 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9581 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9582 match(Set dst (URShiftVI src shift)); 9583 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9584 ins_encode %{ 9585 int vector_len = 0; 9586 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9592 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9593 match(Set dst (URShiftVI src shift)); 9594 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9595 ins_encode %{ 9596 int vector_len = 1; 9597 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9598 %} 9599 ins_pipe( pipe_slow ); 9600 %} 9601 9602 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9603 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9604 match(Set dst (URShiftVI src shift)); 9605 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9606 ins_encode %{ 9607 int vector_len = 1; 9608 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9609 %} 9610 ins_pipe( pipe_slow ); 9611 %} 9612 9613 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9614 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9615 match(Set dst (URShiftVI src shift)); 9616 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9617 ins_encode %{ 9618 int vector_len = 2; 9619 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9620 %} 9621 ins_pipe( pipe_slow ); 9622 %} 9623 9624 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9625 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9626 match(Set dst (URShiftVI src shift)); 9627 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9628 ins_encode %{ 9629 int vector_len = 2; 9630 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9631 %} 9632 ins_pipe( pipe_slow ); 9633 %} 9634 9635 // Longs vector logical right shift 9636 instruct vsrl2L(vecX dst, vecS shift) %{ 9637 predicate(n->as_Vector()->length() == 2); 9638 match(Set dst (URShiftVL dst shift)); 9639 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9640 ins_encode %{ 9641 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9642 %} 9643 ins_pipe( pipe_slow ); 9644 %} 9645 9646 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9647 predicate(n->as_Vector()->length() == 2); 9648 match(Set dst (URShiftVL dst shift)); 9649 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9650 ins_encode %{ 9651 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9652 %} 9653 ins_pipe( pipe_slow ); 9654 %} 9655 9656 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9657 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9658 match(Set dst (URShiftVL src shift)); 9659 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9660 ins_encode %{ 9661 int vector_len = 0; 9662 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9663 %} 9664 ins_pipe( pipe_slow ); 9665 %} 9666 9667 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9668 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9669 match(Set dst (URShiftVL src shift)); 9670 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9671 ins_encode %{ 9672 int vector_len = 0; 9673 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9679 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9680 match(Set dst (URShiftVL src shift)); 9681 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9682 ins_encode %{ 9683 int vector_len = 1; 9684 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9685 %} 9686 ins_pipe( pipe_slow ); 9687 %} 9688 9689 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9690 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9691 match(Set dst (URShiftVL src shift)); 9692 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9693 ins_encode %{ 9694 int vector_len = 1; 9695 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9696 %} 9697 ins_pipe( pipe_slow ); 9698 %} 9699 9700 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9701 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9702 match(Set dst (URShiftVL src shift)); 9703 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9704 ins_encode %{ 9705 int vector_len = 2; 9706 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9707 %} 9708 ins_pipe( pipe_slow ); 9709 %} 9710 9711 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9712 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9713 match(Set dst (URShiftVL src shift)); 9714 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9715 ins_encode %{ 9716 int vector_len = 2; 9717 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9718 %} 9719 ins_pipe( pipe_slow ); 9720 %} 9721 9722 // ------------------- ArithmeticRightShift ----------------------------------- 9723 9724 // Shorts/Chars vector arithmetic right shift 9725 instruct vsra2S(vecS dst, vecS shift) %{ 9726 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9727 match(Set dst (RShiftVS dst shift)); 9728 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9729 ins_encode %{ 9730 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9731 %} 9732 ins_pipe( pipe_slow ); 9733 %} 9734 9735 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9736 predicate(n->as_Vector()->length() == 2); 9737 match(Set dst (RShiftVS dst shift)); 9738 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9739 ins_encode %{ 9740 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9741 %} 9742 ins_pipe( pipe_slow ); 9743 %} 9744 9745 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9746 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9747 match(Set dst (RShiftVS src shift)); 9748 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9749 ins_encode %{ 9750 int vector_len = 0; 9751 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9752 %} 9753 ins_pipe( pipe_slow ); 9754 %} 9755 9756 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9757 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9758 match(Set dst (RShiftVS src shift)); 9759 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9760 ins_encode %{ 9761 int vector_len = 0; 9762 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9763 %} 9764 ins_pipe( pipe_slow ); 9765 %} 9766 9767 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9768 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9769 match(Set dst (RShiftVS dst shift)); 9770 effect(TEMP src); 9771 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9772 ins_encode %{ 9773 int vector_len = 0; 9774 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9775 %} 9776 ins_pipe( pipe_slow ); 9777 %} 9778 9779 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9780 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9781 match(Set dst (RShiftVS src shift)); 9782 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9783 ins_encode %{ 9784 int vector_len = 0; 9785 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9786 %} 9787 ins_pipe( pipe_slow ); 9788 %} 9789 9790 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9791 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9792 match(Set dst (RShiftVS src shift)); 9793 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9794 ins_encode %{ 9795 int vector_len = 0; 9796 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9797 %} 9798 ins_pipe( pipe_slow ); 9799 %} 9800 9801 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9802 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9803 match(Set dst (RShiftVS dst shift)); 9804 effect(TEMP src); 9805 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9806 ins_encode %{ 9807 int vector_len = 0; 9808 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9809 %} 9810 ins_pipe( pipe_slow ); 9811 %} 9812 9813 instruct vsra4S(vecD dst, vecS shift) %{ 9814 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9815 match(Set dst (RShiftVS dst shift)); 9816 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9817 ins_encode %{ 9818 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9819 %} 9820 ins_pipe( pipe_slow ); 9821 %} 9822 9823 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9824 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9825 match(Set dst (RShiftVS dst shift)); 9826 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9827 ins_encode %{ 9828 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9829 %} 9830 ins_pipe( pipe_slow ); 9831 %} 9832 9833 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9834 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9835 match(Set dst (RShiftVS src shift)); 9836 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9837 ins_encode %{ 9838 int vector_len = 0; 9839 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9840 %} 9841 ins_pipe( pipe_slow ); 9842 %} 9843 9844 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9845 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9846 match(Set dst (RShiftVS src shift)); 9847 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9848 ins_encode %{ 9849 int vector_len = 0; 9850 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9851 %} 9852 ins_pipe( pipe_slow ); 9853 %} 9854 9855 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9856 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9857 match(Set dst (RShiftVS dst shift)); 9858 effect(TEMP src); 9859 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9860 ins_encode %{ 9861 int vector_len = 0; 9862 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9863 %} 9864 ins_pipe( pipe_slow ); 9865 %} 9866 9867 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9868 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9869 match(Set dst (RShiftVS src shift)); 9870 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9871 ins_encode %{ 9872 int vector_len = 0; 9873 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9874 %} 9875 ins_pipe( pipe_slow ); 9876 %} 9877 9878 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9879 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9880 match(Set dst (RShiftVS src shift)); 9881 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9882 ins_encode %{ 9883 int vector_len = 0; 9884 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9885 %} 9886 ins_pipe( pipe_slow ); 9887 %} 9888 9889 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9890 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9891 match(Set dst (RShiftVS dst shift)); 9892 effect(TEMP src); 9893 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9894 ins_encode %{ 9895 int vector_len = 0; 9896 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9897 %} 9898 ins_pipe( pipe_slow ); 9899 %} 9900 9901 instruct vsra8S(vecX dst, vecS shift) %{ 9902 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9903 match(Set dst (RShiftVS dst shift)); 9904 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9905 ins_encode %{ 9906 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9907 %} 9908 ins_pipe( pipe_slow ); 9909 %} 9910 9911 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9912 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9913 match(Set dst (RShiftVS dst shift)); 9914 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9915 ins_encode %{ 9916 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9917 %} 9918 ins_pipe( pipe_slow ); 9919 %} 9920 9921 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9922 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9923 match(Set dst (RShiftVS src shift)); 9924 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9925 ins_encode %{ 9926 int vector_len = 0; 9927 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9928 %} 9929 ins_pipe( pipe_slow ); 9930 %} 9931 9932 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9933 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9934 match(Set dst (RShiftVS src shift)); 9935 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9936 ins_encode %{ 9937 int vector_len = 0; 9938 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9939 %} 9940 ins_pipe( pipe_slow ); 9941 %} 9942 9943 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9944 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9945 match(Set dst (RShiftVS dst shift)); 9946 effect(TEMP src); 9947 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9948 ins_encode %{ 9949 int vector_len = 0; 9950 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9951 %} 9952 ins_pipe( pipe_slow ); 9953 %} 9954 9955 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9956 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9957 match(Set dst (RShiftVS src shift)); 9958 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9959 ins_encode %{ 9960 int vector_len = 0; 9961 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9962 %} 9963 ins_pipe( pipe_slow ); 9964 %} 9965 9966 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9967 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9968 match(Set dst (RShiftVS src shift)); 9969 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9970 ins_encode %{ 9971 int vector_len = 0; 9972 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9973 %} 9974 ins_pipe( pipe_slow ); 9975 %} 9976 9977 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9978 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9979 match(Set dst (RShiftVS dst shift)); 9980 effect(TEMP src); 9981 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9982 ins_encode %{ 9983 int vector_len = 0; 9984 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9985 %} 9986 ins_pipe( pipe_slow ); 9987 %} 9988 9989 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9990 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9991 match(Set dst (RShiftVS src shift)); 9992 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9993 ins_encode %{ 9994 int vector_len = 1; 9995 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9996 %} 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10001 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10002 match(Set dst (RShiftVS src shift)); 10003 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10004 ins_encode %{ 10005 int vector_len = 1; 10006 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10007 %} 10008 ins_pipe( pipe_slow ); 10009 %} 10010 10011 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10012 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10013 match(Set dst (RShiftVS dst shift)); 10014 effect(TEMP src); 10015 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10016 ins_encode %{ 10017 int vector_len = 1; 10018 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10019 %} 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10024 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10025 match(Set dst (RShiftVS src shift)); 10026 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10027 ins_encode %{ 10028 int vector_len = 1; 10029 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10030 %} 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10035 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10036 match(Set dst (RShiftVS src shift)); 10037 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10038 ins_encode %{ 10039 int vector_len = 1; 10040 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10041 %} 10042 ins_pipe( pipe_slow ); 10043 %} 10044 10045 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10046 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10047 match(Set dst (RShiftVS dst shift)); 10048 effect(TEMP src); 10049 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10050 ins_encode %{ 10051 int vector_len = 1; 10052 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10053 %} 10054 ins_pipe( pipe_slow ); 10055 %} 10056 10057 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10058 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10059 match(Set dst (RShiftVS src shift)); 10060 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10061 ins_encode %{ 10062 int vector_len = 2; 10063 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10064 %} 10065 ins_pipe( pipe_slow ); 10066 %} 10067 10068 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10069 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10070 match(Set dst (RShiftVS src shift)); 10071 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10072 ins_encode %{ 10073 int vector_len = 2; 10074 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10075 %} 10076 ins_pipe( pipe_slow ); 10077 %} 10078 10079 // Integers vector arithmetic right shift 10080 instruct vsra2I(vecD dst, vecS shift) %{ 10081 predicate(n->as_Vector()->length() == 2); 10082 match(Set dst (RShiftVI dst shift)); 10083 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10084 ins_encode %{ 10085 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10086 %} 10087 ins_pipe( pipe_slow ); 10088 %} 10089 10090 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10091 predicate(n->as_Vector()->length() == 2); 10092 match(Set dst (RShiftVI dst shift)); 10093 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10094 ins_encode %{ 10095 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10096 %} 10097 ins_pipe( pipe_slow ); 10098 %} 10099 10100 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10101 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10102 match(Set dst (RShiftVI src shift)); 10103 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10104 ins_encode %{ 10105 int vector_len = 0; 10106 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10107 %} 10108 ins_pipe( pipe_slow ); 10109 %} 10110 10111 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10112 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10113 match(Set dst (RShiftVI src shift)); 10114 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10115 ins_encode %{ 10116 int vector_len = 0; 10117 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10118 %} 10119 ins_pipe( pipe_slow ); 10120 %} 10121 10122 instruct vsra4I(vecX dst, vecS shift) %{ 10123 predicate(n->as_Vector()->length() == 4); 10124 match(Set dst (RShiftVI dst shift)); 10125 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10126 ins_encode %{ 10127 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10128 %} 10129 ins_pipe( pipe_slow ); 10130 %} 10131 10132 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10133 predicate(n->as_Vector()->length() == 4); 10134 match(Set dst (RShiftVI dst shift)); 10135 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10136 ins_encode %{ 10137 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10138 %} 10139 ins_pipe( pipe_slow ); 10140 %} 10141 10142 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10143 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10144 match(Set dst (RShiftVI src shift)); 10145 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10146 ins_encode %{ 10147 int vector_len = 0; 10148 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10149 %} 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10154 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10155 match(Set dst (RShiftVI src shift)); 10156 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10157 ins_encode %{ 10158 int vector_len = 0; 10159 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10160 %} 10161 ins_pipe( pipe_slow ); 10162 %} 10163 10164 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10165 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10166 match(Set dst (RShiftVI src shift)); 10167 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10168 ins_encode %{ 10169 int vector_len = 1; 10170 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10171 %} 10172 ins_pipe( pipe_slow ); 10173 %} 10174 10175 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10176 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10177 match(Set dst (RShiftVI src shift)); 10178 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10179 ins_encode %{ 10180 int vector_len = 1; 10181 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10182 %} 10183 ins_pipe( pipe_slow ); 10184 %} 10185 10186 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10187 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10188 match(Set dst (RShiftVI src shift)); 10189 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10190 ins_encode %{ 10191 int vector_len = 2; 10192 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10193 %} 10194 ins_pipe( pipe_slow ); 10195 %} 10196 10197 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10198 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10199 match(Set dst (RShiftVI src shift)); 10200 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10201 ins_encode %{ 10202 int vector_len = 2; 10203 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10204 %} 10205 ins_pipe( pipe_slow ); 10206 %} 10207 10208 // There are no longs vector arithmetic right shift instructions. 10209 10210 10211 // --------------------------------- AND -------------------------------------- 10212 10213 instruct vand4B(vecS dst, vecS src) %{ 10214 predicate(n->as_Vector()->length_in_bytes() == 4); 10215 match(Set dst (AndV dst src)); 10216 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10217 ins_encode %{ 10218 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10219 %} 10220 ins_pipe( pipe_slow ); 10221 %} 10222 10223 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10224 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10225 match(Set dst (AndV src1 src2)); 10226 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10227 ins_encode %{ 10228 int vector_len = 0; 10229 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10230 %} 10231 ins_pipe( pipe_slow ); 10232 %} 10233 10234 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10235 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10236 match(Set dst (AndV src (LoadVector mem))); 10237 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10238 ins_encode %{ 10239 int vector_len = 0; 10240 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10241 %} 10242 ins_pipe( pipe_slow ); 10243 %} 10244 10245 instruct vand8B(vecD dst, vecD src) %{ 10246 predicate(n->as_Vector()->length_in_bytes() == 8); 10247 match(Set dst (AndV dst src)); 10248 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10249 ins_encode %{ 10250 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10251 %} 10252 ins_pipe( pipe_slow ); 10253 %} 10254 10255 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10256 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10257 match(Set dst (AndV src1 src2)); 10258 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10259 ins_encode %{ 10260 int vector_len = 0; 10261 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10262 %} 10263 ins_pipe( pipe_slow ); 10264 %} 10265 10266 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10267 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10268 match(Set dst (AndV src (LoadVector mem))); 10269 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10270 ins_encode %{ 10271 int vector_len = 0; 10272 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10273 %} 10274 ins_pipe( pipe_slow ); 10275 %} 10276 10277 instruct vand16B(vecX dst, vecX src) %{ 10278 predicate(n->as_Vector()->length_in_bytes() == 16); 10279 match(Set dst (AndV dst src)); 10280 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10281 ins_encode %{ 10282 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10283 %} 10284 ins_pipe( pipe_slow ); 10285 %} 10286 10287 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10288 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10289 match(Set dst (AndV src1 src2)); 10290 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10291 ins_encode %{ 10292 int vector_len = 0; 10293 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10294 %} 10295 ins_pipe( pipe_slow ); 10296 %} 10297 10298 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10299 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10300 match(Set dst (AndV src (LoadVector mem))); 10301 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10302 ins_encode %{ 10303 int vector_len = 0; 10304 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10305 %} 10306 ins_pipe( pipe_slow ); 10307 %} 10308 10309 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10310 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10311 match(Set dst (AndV src1 src2)); 10312 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10313 ins_encode %{ 10314 int vector_len = 1; 10315 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10316 %} 10317 ins_pipe( pipe_slow ); 10318 %} 10319 10320 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10321 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10322 match(Set dst (AndV src (LoadVector mem))); 10323 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10324 ins_encode %{ 10325 int vector_len = 1; 10326 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10327 %} 10328 ins_pipe( pipe_slow ); 10329 %} 10330 10331 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10332 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10333 match(Set dst (AndV src1 src2)); 10334 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10335 ins_encode %{ 10336 int vector_len = 2; 10337 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10338 %} 10339 ins_pipe( pipe_slow ); 10340 %} 10341 10342 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10343 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10344 match(Set dst (AndV src (LoadVector mem))); 10345 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10346 ins_encode %{ 10347 int vector_len = 2; 10348 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10349 %} 10350 ins_pipe( pipe_slow ); 10351 %} 10352 10353 // --------------------------------- OR --------------------------------------- 10354 10355 instruct vor4B(vecS dst, vecS src) %{ 10356 predicate(n->as_Vector()->length_in_bytes() == 4); 10357 match(Set dst (OrV dst src)); 10358 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10359 ins_encode %{ 10360 __ por($dst$$XMMRegister, $src$$XMMRegister); 10361 %} 10362 ins_pipe( pipe_slow ); 10363 %} 10364 10365 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10366 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10367 match(Set dst (OrV src1 src2)); 10368 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10369 ins_encode %{ 10370 int vector_len = 0; 10371 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10372 %} 10373 ins_pipe( pipe_slow ); 10374 %} 10375 10376 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10377 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10378 match(Set dst (OrV src (LoadVector mem))); 10379 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10380 ins_encode %{ 10381 int vector_len = 0; 10382 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10383 %} 10384 ins_pipe( pipe_slow ); 10385 %} 10386 10387 instruct vor8B(vecD dst, vecD src) %{ 10388 predicate(n->as_Vector()->length_in_bytes() == 8); 10389 match(Set dst (OrV dst src)); 10390 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10391 ins_encode %{ 10392 __ por($dst$$XMMRegister, $src$$XMMRegister); 10393 %} 10394 ins_pipe( pipe_slow ); 10395 %} 10396 10397 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10398 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10399 match(Set dst (OrV src1 src2)); 10400 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10401 ins_encode %{ 10402 int vector_len = 0; 10403 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10404 %} 10405 ins_pipe( pipe_slow ); 10406 %} 10407 10408 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10409 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10410 match(Set dst (OrV src (LoadVector mem))); 10411 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10412 ins_encode %{ 10413 int vector_len = 0; 10414 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10415 %} 10416 ins_pipe( pipe_slow ); 10417 %} 10418 10419 instruct vor16B(vecX dst, vecX src) %{ 10420 predicate(n->as_Vector()->length_in_bytes() == 16); 10421 match(Set dst (OrV dst src)); 10422 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10423 ins_encode %{ 10424 __ por($dst$$XMMRegister, $src$$XMMRegister); 10425 %} 10426 ins_pipe( pipe_slow ); 10427 %} 10428 10429 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10430 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10431 match(Set dst (OrV src1 src2)); 10432 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10433 ins_encode %{ 10434 int vector_len = 0; 10435 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10436 %} 10437 ins_pipe( pipe_slow ); 10438 %} 10439 10440 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10441 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10442 match(Set dst (OrV src (LoadVector mem))); 10443 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10444 ins_encode %{ 10445 int vector_len = 0; 10446 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10447 %} 10448 ins_pipe( pipe_slow ); 10449 %} 10450 10451 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10452 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10453 match(Set dst (OrV src1 src2)); 10454 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10455 ins_encode %{ 10456 int vector_len = 1; 10457 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10458 %} 10459 ins_pipe( pipe_slow ); 10460 %} 10461 10462 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10463 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10464 match(Set dst (OrV src (LoadVector mem))); 10465 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10466 ins_encode %{ 10467 int vector_len = 1; 10468 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10469 %} 10470 ins_pipe( pipe_slow ); 10471 %} 10472 10473 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10474 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10475 match(Set dst (OrV src1 src2)); 10476 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10477 ins_encode %{ 10478 int vector_len = 2; 10479 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10480 %} 10481 ins_pipe( pipe_slow ); 10482 %} 10483 10484 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10485 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10486 match(Set dst (OrV src (LoadVector mem))); 10487 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10488 ins_encode %{ 10489 int vector_len = 2; 10490 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10491 %} 10492 ins_pipe( pipe_slow ); 10493 %} 10494 10495 // --------------------------------- XOR -------------------------------------- 10496 10497 instruct vxor4B(vecS dst, vecS src) %{ 10498 predicate(n->as_Vector()->length_in_bytes() == 4); 10499 match(Set dst (XorV dst src)); 10500 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10501 ins_encode %{ 10502 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10503 %} 10504 ins_pipe( pipe_slow ); 10505 %} 10506 10507 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10508 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10509 match(Set dst (XorV src1 src2)); 10510 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10511 ins_encode %{ 10512 int vector_len = 0; 10513 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10514 %} 10515 ins_pipe( pipe_slow ); 10516 %} 10517 10518 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10519 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10520 match(Set dst (XorV src (LoadVector mem))); 10521 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10522 ins_encode %{ 10523 int vector_len = 0; 10524 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10525 %} 10526 ins_pipe( pipe_slow ); 10527 %} 10528 10529 instruct vxor8B(vecD dst, vecD src) %{ 10530 predicate(n->as_Vector()->length_in_bytes() == 8); 10531 match(Set dst (XorV dst src)); 10532 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10533 ins_encode %{ 10534 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10535 %} 10536 ins_pipe( pipe_slow ); 10537 %} 10538 10539 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10540 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10541 match(Set dst (XorV src1 src2)); 10542 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10543 ins_encode %{ 10544 int vector_len = 0; 10545 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10546 %} 10547 ins_pipe( pipe_slow ); 10548 %} 10549 10550 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10551 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10552 match(Set dst (XorV src (LoadVector mem))); 10553 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10554 ins_encode %{ 10555 int vector_len = 0; 10556 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10557 %} 10558 ins_pipe( pipe_slow ); 10559 %} 10560 10561 instruct vxor16B(vecX dst, vecX src) %{ 10562 predicate(n->as_Vector()->length_in_bytes() == 16); 10563 match(Set dst (XorV dst src)); 10564 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10565 ins_encode %{ 10566 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10567 %} 10568 ins_pipe( pipe_slow ); 10569 %} 10570 10571 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10572 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10573 match(Set dst (XorV src1 src2)); 10574 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10575 ins_encode %{ 10576 int vector_len = 0; 10577 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10578 %} 10579 ins_pipe( pipe_slow ); 10580 %} 10581 10582 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10583 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10584 match(Set dst (XorV src (LoadVector mem))); 10585 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10586 ins_encode %{ 10587 int vector_len = 0; 10588 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10589 %} 10590 ins_pipe( pipe_slow ); 10591 %} 10592 10593 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10594 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10595 match(Set dst (XorV src1 src2)); 10596 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10597 ins_encode %{ 10598 int vector_len = 1; 10599 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10600 %} 10601 ins_pipe( pipe_slow ); 10602 %} 10603 10604 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10605 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10606 match(Set dst (XorV src (LoadVector mem))); 10607 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10608 ins_encode %{ 10609 int vector_len = 1; 10610 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10611 %} 10612 ins_pipe( pipe_slow ); 10613 %} 10614 10615 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10616 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10617 match(Set dst (XorV src1 src2)); 10618 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10619 ins_encode %{ 10620 int vector_len = 2; 10621 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10622 %} 10623 ins_pipe( pipe_slow ); 10624 %} 10625 10626 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10627 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10628 match(Set dst (XorV src (LoadVector mem))); 10629 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10630 ins_encode %{ 10631 int vector_len = 2; 10632 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10633 %} 10634 ins_pipe( pipe_slow ); 10635 %} 10636 10637 // --------------------------------- FMA -------------------------------------- 10638 10639 // a * b + c 10640 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 10641 predicate(UseFMA && n->as_Vector()->length() == 2); 10642 match(Set c (FmaVD c (Binary a b))); 10643 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10644 ins_cost(150); 10645 ins_encode %{ 10646 int vector_len = 0; 10647 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10648 %} 10649 ins_pipe( pipe_slow ); 10650 %} 10651 10652 // a * b + c 10653 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 10654 predicate(UseFMA && n->as_Vector()->length() == 2); 10655 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10656 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10657 ins_cost(150); 10658 ins_encode %{ 10659 int vector_len = 0; 10660 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10661 %} 10662 ins_pipe( pipe_slow ); 10663 %} 10664 10665 10666 // a * b + c 10667 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 10668 predicate(UseFMA && n->as_Vector()->length() == 4); 10669 match(Set c (FmaVD c (Binary a b))); 10670 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10671 ins_cost(150); 10672 ins_encode %{ 10673 int vector_len = 1; 10674 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10675 %} 10676 ins_pipe( pipe_slow ); 10677 %} 10678 10679 // a * b + c 10680 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 10681 predicate(UseFMA && n->as_Vector()->length() == 4); 10682 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10683 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10684 ins_cost(150); 10685 ins_encode %{ 10686 int vector_len = 1; 10687 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10688 %} 10689 ins_pipe( pipe_slow ); 10690 %} 10691 10692 // a * b + c 10693 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 10694 predicate(UseFMA && n->as_Vector()->length() == 8); 10695 match(Set c (FmaVD c (Binary a b))); 10696 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10697 ins_cost(150); 10698 ins_encode %{ 10699 int vector_len = 2; 10700 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10701 %} 10702 ins_pipe( pipe_slow ); 10703 %} 10704 10705 // a * b + c 10706 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 10707 predicate(UseFMA && n->as_Vector()->length() == 8); 10708 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10709 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10710 ins_cost(150); 10711 ins_encode %{ 10712 int vector_len = 2; 10713 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10714 %} 10715 ins_pipe( pipe_slow ); 10716 %} 10717 10718 // a * b + c 10719 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 10720 predicate(UseFMA && n->as_Vector()->length() == 4); 10721 match(Set c (FmaVF c (Binary a b))); 10722 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10723 ins_cost(150); 10724 ins_encode %{ 10725 int vector_len = 0; 10726 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10727 %} 10728 ins_pipe( pipe_slow ); 10729 %} 10730 10731 // a * b + c 10732 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 10733 predicate(UseFMA && n->as_Vector()->length() == 4); 10734 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10735 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10736 ins_cost(150); 10737 ins_encode %{ 10738 int vector_len = 0; 10739 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10740 %} 10741 ins_pipe( pipe_slow ); 10742 %} 10743 10744 // a * b + c 10745 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 10746 predicate(UseFMA && n->as_Vector()->length() == 8); 10747 match(Set c (FmaVF c (Binary a b))); 10748 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10749 ins_cost(150); 10750 ins_encode %{ 10751 int vector_len = 1; 10752 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10753 %} 10754 ins_pipe( pipe_slow ); 10755 %} 10756 10757 // a * b + c 10758 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 10759 predicate(UseFMA && n->as_Vector()->length() == 8); 10760 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10761 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10762 ins_cost(150); 10763 ins_encode %{ 10764 int vector_len = 1; 10765 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10766 %} 10767 ins_pipe( pipe_slow ); 10768 %} 10769 10770 // a * b + c 10771 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 10772 predicate(UseFMA && n->as_Vector()->length() == 16); 10773 match(Set c (FmaVF c (Binary a b))); 10774 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10775 ins_cost(150); 10776 ins_encode %{ 10777 int vector_len = 2; 10778 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10779 %} 10780 ins_pipe( pipe_slow ); 10781 %} 10782 10783 // a * b + c 10784 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 10785 predicate(UseFMA && n->as_Vector()->length() == 16); 10786 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10787 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10788 ins_cost(150); 10789 ins_encode %{ 10790 int vector_len = 2; 10791 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10792 %} 10793 ins_pipe( pipe_slow ); 10794 %} 10795 10796 // --------------------------------- PopCount -------------------------------------- 10797 10798 instruct vpopcount2I(vecD dst, vecD src) %{ 10799 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 10800 match(Set dst (PopCountVI src)); 10801 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 10802 ins_encode %{ 10803 int vector_len = 0; 10804 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10805 %} 10806 ins_pipe( pipe_slow ); 10807 %} 10808 10809 instruct vpopcount4I(vecX dst, vecX src) %{ 10810 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 10811 match(Set dst (PopCountVI src)); 10812 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 10813 ins_encode %{ 10814 int vector_len = 0; 10815 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10816 %} 10817 ins_pipe( pipe_slow ); 10818 %} 10819 10820 instruct vpopcount8I(vecY dst, vecY src) %{ 10821 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 10822 match(Set dst (PopCountVI src)); 10823 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 10824 ins_encode %{ 10825 int vector_len = 1; 10826 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10827 %} 10828 ins_pipe( pipe_slow ); 10829 %} 10830 10831 instruct vpopcount16I(vecZ dst, vecZ src) %{ 10832 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 10833 match(Set dst (PopCountVI src)); 10834 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 10835 ins_encode %{ 10836 int vector_len = 2; 10837 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10838 %} 10839 ins_pipe( pipe_slow ); 10840 %}