1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) { 1598 ciEnv::current()->record_failure("CodeCache is full"); 1599 return 0; // CodeBuffer::expand failed 1600 } 1601 int offset = __ offset(); 1602 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1603 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1604 __ end_a_stub(); 1605 return offset; 1606 } 1607 1608 // Emit deopt handler code. 1609 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1610 1611 // Note that the code buffer's insts_mark is always relative to insts. 1612 // That's why we must use the macroassembler to generate a handler. 1613 MacroAssembler _masm(&cbuf); 1614 address base = __ start_a_stub(size_deopt_handler()); 1615 if (base == NULL) { 1616 ciEnv::current()->record_failure("CodeCache is full"); 1617 return 0; // CodeBuffer::expand failed 1618 } 1619 int offset = __ offset(); 1620 1621 #ifdef _LP64 1622 address the_pc = (address) __ pc(); 1623 Label next; 1624 // push a "the_pc" on the stack without destroying any registers 1625 // as they all may be live. 1626 1627 // push address of "next" 1628 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1629 __ bind(next); 1630 // adjust it so it matches "the_pc" 1631 __ subptr(Address(rsp, 0), __ offset() - offset); 1632 #else 1633 InternalAddress here(__ pc()); 1634 __ pushptr(here.addr()); 1635 #endif 1636 1637 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1638 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1639 __ end_a_stub(); 1640 return offset; 1641 } 1642 1643 1644 //============================================================================= 1645 1646 // Float masks come from different places depending on platform. 1647 #ifdef _LP64 1648 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1649 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1650 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1651 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1652 #else 1653 static address float_signmask() { return (address)float_signmask_pool; } 1654 static address float_signflip() { return (address)float_signflip_pool; } 1655 static address double_signmask() { return (address)double_signmask_pool; } 1656 static address double_signflip() { return (address)double_signflip_pool; } 1657 #endif 1658 1659 1660 const bool Matcher::match_rule_supported(int opcode) { 1661 if (!has_match_rule(opcode)) 1662 return false; 1663 1664 bool ret_value = true; 1665 switch (opcode) { 1666 case Op_PopCountI: 1667 case Op_PopCountL: 1668 if (!UsePopCountInstruction) 1669 ret_value = false; 1670 break; 1671 case Op_MulVI: 1672 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1673 ret_value = false; 1674 break; 1675 case Op_MulVL: 1676 case Op_MulReductionVL: 1677 if (VM_Version::supports_avx512dq() == false) 1678 ret_value = false; 1679 break; 1680 case Op_AddReductionVL: 1681 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1682 ret_value = false; 1683 break; 1684 case Op_AddReductionVI: 1685 if (UseSSE < 3) // requires at least SSE3 1686 ret_value = false; 1687 break; 1688 case Op_MulReductionVI: 1689 if (UseSSE < 4) // requires at least SSE4 1690 ret_value = false; 1691 break; 1692 case Op_AddReductionVF: 1693 case Op_AddReductionVD: 1694 case Op_MulReductionVF: 1695 case Op_MulReductionVD: 1696 if (UseSSE < 1) // requires at least SSE 1697 ret_value = false; 1698 break; 1699 case Op_SqrtVD: 1700 if (UseAVX < 1) // enabled for AVX only 1701 ret_value = false; 1702 break; 1703 case Op_CompareAndSwapL: 1704 #ifdef _LP64 1705 case Op_CompareAndSwapP: 1706 #endif 1707 if (!VM_Version::supports_cx8()) 1708 ret_value = false; 1709 break; 1710 case Op_CMoveVD: 1711 if (UseAVX < 1 || UseAVX > 2) 1712 ret_value = false; 1713 break; 1714 } 1715 1716 return ret_value; // Per default match rules are supported. 1717 } 1718 1719 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1720 if (!has_match_rule(opcode)) 1721 return false; 1722 1723 // identify extra cases that we might want to provide match rules for 1724 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1725 bool ret_value = match_rule_supported(opcode); 1726 if (ret_value) { 1727 switch (opcode) { 1728 case Op_AddVB: 1729 case Op_SubVB: 1730 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1731 ret_value = false; 1732 break; 1733 case Op_URShiftVS: 1734 case Op_RShiftVS: 1735 case Op_LShiftVS: 1736 case Op_MulVS: 1737 case Op_AddVS: 1738 case Op_SubVS: 1739 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1740 ret_value = false; 1741 break; 1742 case Op_CMoveVD: 1743 if (vlen != 4) 1744 ret_value = false; 1745 break; 1746 } 1747 } 1748 1749 return ret_value; // Per default match rules are supported. 1750 } 1751 1752 const int Matcher::float_pressure(int default_pressure_threshold) { 1753 int float_pressure_threshold = default_pressure_threshold; 1754 #ifdef _LP64 1755 if (UseAVX > 2) { 1756 // Increase pressure threshold on machines with AVX3 which have 1757 // 2x more XMM registers. 1758 float_pressure_threshold = default_pressure_threshold * 2; 1759 } 1760 #endif 1761 return float_pressure_threshold; 1762 } 1763 1764 // Max vector size in bytes. 0 if not supported. 1765 const int Matcher::vector_width_in_bytes(BasicType bt) { 1766 assert(is_java_primitive(bt), "only primitive type vectors"); 1767 if (UseSSE < 2) return 0; 1768 // SSE2 supports 128bit vectors for all types. 1769 // AVX2 supports 256bit vectors for all types. 1770 // AVX2/EVEX supports 512bit vectors for all types. 1771 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1772 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1773 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1774 size = (UseAVX > 2) ? 64 : 32; 1775 // Use flag to limit vector size. 1776 size = MIN2(size,(int)MaxVectorSize); 1777 // Minimum 2 values in vector (or 4 for bytes). 1778 switch (bt) { 1779 case T_DOUBLE: 1780 case T_LONG: 1781 if (size < 16) return 0; 1782 break; 1783 case T_FLOAT: 1784 case T_INT: 1785 if (size < 8) return 0; 1786 break; 1787 case T_BOOLEAN: 1788 if (size < 4) return 0; 1789 break; 1790 case T_CHAR: 1791 if (size < 4) return 0; 1792 break; 1793 case T_BYTE: 1794 if (size < 4) return 0; 1795 break; 1796 case T_SHORT: 1797 if (size < 4) return 0; 1798 break; 1799 default: 1800 ShouldNotReachHere(); 1801 } 1802 return size; 1803 } 1804 1805 // Limits on vector size (number of elements) loaded into vector. 1806 const int Matcher::max_vector_size(const BasicType bt) { 1807 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1808 } 1809 const int Matcher::min_vector_size(const BasicType bt) { 1810 int max_size = max_vector_size(bt); 1811 // Min size which can be loaded into vector is 4 bytes. 1812 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1813 return MIN2(size,max_size); 1814 } 1815 1816 // Vector ideal reg corresponding to specidied size in bytes 1817 const int Matcher::vector_ideal_reg(int size) { 1818 assert(MaxVectorSize >= size, ""); 1819 switch(size) { 1820 case 4: return Op_VecS; 1821 case 8: return Op_VecD; 1822 case 16: return Op_VecX; 1823 case 32: return Op_VecY; 1824 case 64: return Op_VecZ; 1825 } 1826 ShouldNotReachHere(); 1827 return 0; 1828 } 1829 1830 // Only lowest bits of xmm reg are used for vector shift count. 1831 const int Matcher::vector_shift_count_ideal_reg(int size) { 1832 return Op_VecS; 1833 } 1834 1835 // x86 supports misaligned vectors store/load. 1836 const bool Matcher::misaligned_vectors_ok() { 1837 return !AlignVector; // can be changed by flag 1838 } 1839 1840 // x86 AES instructions are compatible with SunJCE expanded 1841 // keys, hence we do not need to pass the original key to stubs 1842 const bool Matcher::pass_original_key_for_aes() { 1843 return false; 1844 } 1845 1846 // Helper methods for MachSpillCopyNode::implementation(). 1847 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1848 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1849 // In 64-bit VM size calculation is very complex. Emitting instructions 1850 // into scratch buffer is used to get size in 64-bit VM. 1851 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1852 assert(ireg == Op_VecS || // 32bit vector 1853 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1854 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1855 "no non-adjacent vector moves" ); 1856 if (cbuf) { 1857 MacroAssembler _masm(cbuf); 1858 int offset = __ offset(); 1859 switch (ireg) { 1860 case Op_VecS: // copy whole register 1861 case Op_VecD: 1862 case Op_VecX: 1863 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1864 break; 1865 case Op_VecY: 1866 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1867 break; 1868 case Op_VecZ: 1869 __ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1870 break; 1871 default: 1872 ShouldNotReachHere(); 1873 } 1874 int size = __ offset() - offset; 1875 #ifdef ASSERT 1876 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1877 assert(!do_size || size == 4, "incorrect size calculattion"); 1878 #endif 1879 return size; 1880 #ifndef PRODUCT 1881 } else if (!do_size) { 1882 switch (ireg) { 1883 case Op_VecS: 1884 case Op_VecD: 1885 case Op_VecX: 1886 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1887 break; 1888 case Op_VecY: 1889 case Op_VecZ: 1890 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1891 break; 1892 default: 1893 ShouldNotReachHere(); 1894 } 1895 #endif 1896 } 1897 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1898 return (UseAVX > 2) ? 6 : 4; 1899 } 1900 1901 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1902 int stack_offset, int reg, uint ireg, outputStream* st) { 1903 // In 64-bit VM size calculation is very complex. Emitting instructions 1904 // into scratch buffer is used to get size in 64-bit VM. 1905 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1906 if (cbuf) { 1907 MacroAssembler _masm(cbuf); 1908 int offset = __ offset(); 1909 if (is_load) { 1910 switch (ireg) { 1911 case Op_VecS: 1912 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1913 break; 1914 case Op_VecD: 1915 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1916 break; 1917 case Op_VecX: 1918 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1919 break; 1920 case Op_VecY: 1921 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1922 break; 1923 case Op_VecZ: 1924 __ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1925 break; 1926 default: 1927 ShouldNotReachHere(); 1928 } 1929 } else { // store 1930 switch (ireg) { 1931 case Op_VecS: 1932 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1933 break; 1934 case Op_VecD: 1935 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1936 break; 1937 case Op_VecX: 1938 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1939 break; 1940 case Op_VecY: 1941 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1942 break; 1943 case Op_VecZ: 1944 __ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1945 break; 1946 default: 1947 ShouldNotReachHere(); 1948 } 1949 } 1950 int size = __ offset() - offset; 1951 #ifdef ASSERT 1952 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1953 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1954 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1955 #endif 1956 return size; 1957 #ifndef PRODUCT 1958 } else if (!do_size) { 1959 if (is_load) { 1960 switch (ireg) { 1961 case Op_VecS: 1962 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1963 break; 1964 case Op_VecD: 1965 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1966 break; 1967 case Op_VecX: 1968 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1969 break; 1970 case Op_VecY: 1971 case Op_VecZ: 1972 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1973 break; 1974 default: 1975 ShouldNotReachHere(); 1976 } 1977 } else { // store 1978 switch (ireg) { 1979 case Op_VecS: 1980 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1981 break; 1982 case Op_VecD: 1983 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1984 break; 1985 case Op_VecX: 1986 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1987 break; 1988 case Op_VecY: 1989 case Op_VecZ: 1990 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1991 break; 1992 default: 1993 ShouldNotReachHere(); 1994 } 1995 } 1996 #endif 1997 } 1998 bool is_single_byte = false; 1999 int vec_len = 0; 2000 if ((UseAVX > 2) && (stack_offset != 0)) { 2001 int tuple_type = Assembler::EVEX_FVM; 2002 int input_size = Assembler::EVEX_32bit; 2003 switch (ireg) { 2004 case Op_VecS: 2005 tuple_type = Assembler::EVEX_T1S; 2006 break; 2007 case Op_VecD: 2008 tuple_type = Assembler::EVEX_T1S; 2009 input_size = Assembler::EVEX_64bit; 2010 break; 2011 case Op_VecX: 2012 break; 2013 case Op_VecY: 2014 vec_len = 1; 2015 break; 2016 case Op_VecZ: 2017 vec_len = 2; 2018 break; 2019 } 2020 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2021 } 2022 int offset_size = 0; 2023 int size = 5; 2024 if (UseAVX > 2 ) { 2025 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2026 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2027 size += 2; // Need an additional two bytes for EVEX encoding 2028 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2029 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2030 } else { 2031 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2032 size += 2; // Need an additional two bytes for EVEX encodding 2033 } 2034 } else { 2035 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2036 } 2037 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2038 return size+offset_size; 2039 } 2040 2041 static inline jfloat replicate4_imm(int con, int width) { 2042 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2043 assert(width == 1 || width == 2, "only byte or short types here"); 2044 int bit_width = width * 8; 2045 jint val = con; 2046 val &= (1 << bit_width) - 1; // mask off sign bits 2047 while(bit_width < 32) { 2048 val |= (val << bit_width); 2049 bit_width <<= 1; 2050 } 2051 jfloat fval = *((jfloat*) &val); // coerce to float type 2052 return fval; 2053 } 2054 2055 static inline jdouble replicate8_imm(int con, int width) { 2056 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2057 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2058 int bit_width = width * 8; 2059 jlong val = con; 2060 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2061 while(bit_width < 64) { 2062 val |= (val << bit_width); 2063 bit_width <<= 1; 2064 } 2065 jdouble dval = *((jdouble*) &val); // coerce to double type 2066 return dval; 2067 } 2068 2069 #ifndef PRODUCT 2070 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2071 st->print("nop \t# %d bytes pad for loops and calls", _count); 2072 } 2073 #endif 2074 2075 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2076 MacroAssembler _masm(&cbuf); 2077 __ nop(_count); 2078 } 2079 2080 uint MachNopNode::size(PhaseRegAlloc*) const { 2081 return _count; 2082 } 2083 2084 #ifndef PRODUCT 2085 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2086 st->print("# breakpoint"); 2087 } 2088 #endif 2089 2090 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2091 MacroAssembler _masm(&cbuf); 2092 __ int3(); 2093 } 2094 2095 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2096 return MachNode::size(ra_); 2097 } 2098 2099 %} 2100 2101 encode %{ 2102 2103 enc_class call_epilog %{ 2104 if (VerifyStackAtCalls) { 2105 // Check that stack depth is unchanged: find majik cookie on stack 2106 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2107 MacroAssembler _masm(&cbuf); 2108 Label L; 2109 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2110 __ jccb(Assembler::equal, L); 2111 // Die if stack mismatch 2112 __ int3(); 2113 __ bind(L); 2114 } 2115 %} 2116 2117 %} 2118 2119 2120 //----------OPERANDS----------------------------------------------------------- 2121 // Operand definitions must precede instruction definitions for correct parsing 2122 // in the ADLC because operands constitute user defined types which are used in 2123 // instruction definitions. 2124 2125 // This one generically applies only for evex, so only one version 2126 operand vecZ() %{ 2127 constraint(ALLOC_IN_RC(vectorz_reg)); 2128 match(VecZ); 2129 2130 format %{ %} 2131 interface(REG_INTER); 2132 %} 2133 2134 // Comparison Code for FP conditional move 2135 operand cmpOp_vcmppd() %{ 2136 match(Bool); 2137 2138 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2139 n->as_Bool()->_test._test != BoolTest::no_overflow); 2140 format %{ "" %} 2141 interface(COND_INTER) %{ 2142 equal (0x0, "eq"); 2143 less (0x1, "lt"); 2144 less_equal (0x2, "le"); 2145 not_equal (0xC, "ne"); 2146 greater_equal(0xD, "ge"); 2147 greater (0xE, "gt"); 2148 //TODO cannot compile (adlc breaks) without two next lines with error: 2149 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2150 // equal' for overflow. 2151 overflow (0x20, "o"); // not really supported by the instruction 2152 no_overflow (0x21, "no"); // not really supported by the instruction 2153 %} 2154 %} 2155 2156 2157 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2158 2159 // ============================================================================ 2160 2161 instruct ShouldNotReachHere() %{ 2162 match(Halt); 2163 format %{ "int3\t# ShouldNotReachHere" %} 2164 ins_encode %{ 2165 __ int3(); 2166 %} 2167 ins_pipe(pipe_slow); 2168 %} 2169 2170 // ============================================================================ 2171 2172 instruct addF_reg(regF dst, regF src) %{ 2173 predicate((UseSSE>=1) && (UseAVX == 0)); 2174 match(Set dst (AddF dst src)); 2175 2176 format %{ "addss $dst, $src" %} 2177 ins_cost(150); 2178 ins_encode %{ 2179 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2180 %} 2181 ins_pipe(pipe_slow); 2182 %} 2183 2184 instruct addF_mem(regF dst, memory src) %{ 2185 predicate((UseSSE>=1) && (UseAVX == 0)); 2186 match(Set dst (AddF dst (LoadF src))); 2187 2188 format %{ "addss $dst, $src" %} 2189 ins_cost(150); 2190 ins_encode %{ 2191 __ addss($dst$$XMMRegister, $src$$Address); 2192 %} 2193 ins_pipe(pipe_slow); 2194 %} 2195 2196 instruct addF_imm(regF dst, immF con) %{ 2197 predicate((UseSSE>=1) && (UseAVX == 0)); 2198 match(Set dst (AddF dst con)); 2199 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2200 ins_cost(150); 2201 ins_encode %{ 2202 __ addss($dst$$XMMRegister, $constantaddress($con)); 2203 %} 2204 ins_pipe(pipe_slow); 2205 %} 2206 2207 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2208 predicate(UseAVX > 0); 2209 match(Set dst (AddF src1 src2)); 2210 2211 format %{ "vaddss $dst, $src1, $src2" %} 2212 ins_cost(150); 2213 ins_encode %{ 2214 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2215 %} 2216 ins_pipe(pipe_slow); 2217 %} 2218 2219 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2220 predicate(UseAVX > 0); 2221 match(Set dst (AddF src1 (LoadF src2))); 2222 2223 format %{ "vaddss $dst, $src1, $src2" %} 2224 ins_cost(150); 2225 ins_encode %{ 2226 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2227 %} 2228 ins_pipe(pipe_slow); 2229 %} 2230 2231 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2232 predicate(UseAVX > 0); 2233 match(Set dst (AddF src con)); 2234 2235 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2236 ins_cost(150); 2237 ins_encode %{ 2238 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2239 %} 2240 ins_pipe(pipe_slow); 2241 %} 2242 2243 instruct addD_reg(regD dst, regD src) %{ 2244 predicate((UseSSE>=2) && (UseAVX == 0)); 2245 match(Set dst (AddD dst src)); 2246 2247 format %{ "addsd $dst, $src" %} 2248 ins_cost(150); 2249 ins_encode %{ 2250 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2251 %} 2252 ins_pipe(pipe_slow); 2253 %} 2254 2255 instruct addD_mem(regD dst, memory src) %{ 2256 predicate((UseSSE>=2) && (UseAVX == 0)); 2257 match(Set dst (AddD dst (LoadD src))); 2258 2259 format %{ "addsd $dst, $src" %} 2260 ins_cost(150); 2261 ins_encode %{ 2262 __ addsd($dst$$XMMRegister, $src$$Address); 2263 %} 2264 ins_pipe(pipe_slow); 2265 %} 2266 2267 instruct addD_imm(regD dst, immD con) %{ 2268 predicate((UseSSE>=2) && (UseAVX == 0)); 2269 match(Set dst (AddD dst con)); 2270 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2271 ins_cost(150); 2272 ins_encode %{ 2273 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2274 %} 2275 ins_pipe(pipe_slow); 2276 %} 2277 2278 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2279 predicate(UseAVX > 0); 2280 match(Set dst (AddD src1 src2)); 2281 2282 format %{ "vaddsd $dst, $src1, $src2" %} 2283 ins_cost(150); 2284 ins_encode %{ 2285 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2286 %} 2287 ins_pipe(pipe_slow); 2288 %} 2289 2290 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2291 predicate(UseAVX > 0); 2292 match(Set dst (AddD src1 (LoadD src2))); 2293 2294 format %{ "vaddsd $dst, $src1, $src2" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2303 predicate(UseAVX > 0); 2304 match(Set dst (AddD src con)); 2305 2306 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2307 ins_cost(150); 2308 ins_encode %{ 2309 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2310 %} 2311 ins_pipe(pipe_slow); 2312 %} 2313 2314 instruct subF_reg(regF dst, regF src) %{ 2315 predicate((UseSSE>=1) && (UseAVX == 0)); 2316 match(Set dst (SubF dst src)); 2317 2318 format %{ "subss $dst, $src" %} 2319 ins_cost(150); 2320 ins_encode %{ 2321 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2322 %} 2323 ins_pipe(pipe_slow); 2324 %} 2325 2326 instruct subF_mem(regF dst, memory src) %{ 2327 predicate((UseSSE>=1) && (UseAVX == 0)); 2328 match(Set dst (SubF dst (LoadF src))); 2329 2330 format %{ "subss $dst, $src" %} 2331 ins_cost(150); 2332 ins_encode %{ 2333 __ subss($dst$$XMMRegister, $src$$Address); 2334 %} 2335 ins_pipe(pipe_slow); 2336 %} 2337 2338 instruct subF_imm(regF dst, immF con) %{ 2339 predicate((UseSSE>=1) && (UseAVX == 0)); 2340 match(Set dst (SubF dst con)); 2341 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2342 ins_cost(150); 2343 ins_encode %{ 2344 __ subss($dst$$XMMRegister, $constantaddress($con)); 2345 %} 2346 ins_pipe(pipe_slow); 2347 %} 2348 2349 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2350 predicate(UseAVX > 0); 2351 match(Set dst (SubF src1 src2)); 2352 2353 format %{ "vsubss $dst, $src1, $src2" %} 2354 ins_cost(150); 2355 ins_encode %{ 2356 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2357 %} 2358 ins_pipe(pipe_slow); 2359 %} 2360 2361 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2362 predicate(UseAVX > 0); 2363 match(Set dst (SubF src1 (LoadF src2))); 2364 2365 format %{ "vsubss $dst, $src1, $src2" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2374 predicate(UseAVX > 0); 2375 match(Set dst (SubF src con)); 2376 2377 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2378 ins_cost(150); 2379 ins_encode %{ 2380 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2381 %} 2382 ins_pipe(pipe_slow); 2383 %} 2384 2385 instruct subD_reg(regD dst, regD src) %{ 2386 predicate((UseSSE>=2) && (UseAVX == 0)); 2387 match(Set dst (SubD dst src)); 2388 2389 format %{ "subsd $dst, $src" %} 2390 ins_cost(150); 2391 ins_encode %{ 2392 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2393 %} 2394 ins_pipe(pipe_slow); 2395 %} 2396 2397 instruct subD_mem(regD dst, memory src) %{ 2398 predicate((UseSSE>=2) && (UseAVX == 0)); 2399 match(Set dst (SubD dst (LoadD src))); 2400 2401 format %{ "subsd $dst, $src" %} 2402 ins_cost(150); 2403 ins_encode %{ 2404 __ subsd($dst$$XMMRegister, $src$$Address); 2405 %} 2406 ins_pipe(pipe_slow); 2407 %} 2408 2409 instruct subD_imm(regD dst, immD con) %{ 2410 predicate((UseSSE>=2) && (UseAVX == 0)); 2411 match(Set dst (SubD dst con)); 2412 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2413 ins_cost(150); 2414 ins_encode %{ 2415 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2416 %} 2417 ins_pipe(pipe_slow); 2418 %} 2419 2420 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2421 predicate(UseAVX > 0); 2422 match(Set dst (SubD src1 src2)); 2423 2424 format %{ "vsubsd $dst, $src1, $src2" %} 2425 ins_cost(150); 2426 ins_encode %{ 2427 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2428 %} 2429 ins_pipe(pipe_slow); 2430 %} 2431 2432 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2433 predicate(UseAVX > 0); 2434 match(Set dst (SubD src1 (LoadD src2))); 2435 2436 format %{ "vsubsd $dst, $src1, $src2" %} 2437 ins_cost(150); 2438 ins_encode %{ 2439 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2440 %} 2441 ins_pipe(pipe_slow); 2442 %} 2443 2444 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2445 predicate(UseAVX > 0); 2446 match(Set dst (SubD src con)); 2447 2448 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2449 ins_cost(150); 2450 ins_encode %{ 2451 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2452 %} 2453 ins_pipe(pipe_slow); 2454 %} 2455 2456 instruct mulF_reg(regF dst, regF src) %{ 2457 predicate((UseSSE>=1) && (UseAVX == 0)); 2458 match(Set dst (MulF dst src)); 2459 2460 format %{ "mulss $dst, $src" %} 2461 ins_cost(150); 2462 ins_encode %{ 2463 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2464 %} 2465 ins_pipe(pipe_slow); 2466 %} 2467 2468 instruct mulF_mem(regF dst, memory src) %{ 2469 predicate((UseSSE>=1) && (UseAVX == 0)); 2470 match(Set dst (MulF dst (LoadF src))); 2471 2472 format %{ "mulss $dst, $src" %} 2473 ins_cost(150); 2474 ins_encode %{ 2475 __ mulss($dst$$XMMRegister, $src$$Address); 2476 %} 2477 ins_pipe(pipe_slow); 2478 %} 2479 2480 instruct mulF_imm(regF dst, immF con) %{ 2481 predicate((UseSSE>=1) && (UseAVX == 0)); 2482 match(Set dst (MulF dst con)); 2483 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2484 ins_cost(150); 2485 ins_encode %{ 2486 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2487 %} 2488 ins_pipe(pipe_slow); 2489 %} 2490 2491 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2492 predicate(UseAVX > 0); 2493 match(Set dst (MulF src1 src2)); 2494 2495 format %{ "vmulss $dst, $src1, $src2" %} 2496 ins_cost(150); 2497 ins_encode %{ 2498 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2499 %} 2500 ins_pipe(pipe_slow); 2501 %} 2502 2503 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2504 predicate(UseAVX > 0); 2505 match(Set dst (MulF src1 (LoadF src2))); 2506 2507 format %{ "vmulss $dst, $src1, $src2" %} 2508 ins_cost(150); 2509 ins_encode %{ 2510 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2511 %} 2512 ins_pipe(pipe_slow); 2513 %} 2514 2515 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2516 predicate(UseAVX > 0); 2517 match(Set dst (MulF src con)); 2518 2519 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2520 ins_cost(150); 2521 ins_encode %{ 2522 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2523 %} 2524 ins_pipe(pipe_slow); 2525 %} 2526 2527 instruct mulD_reg(regD dst, regD src) %{ 2528 predicate((UseSSE>=2) && (UseAVX == 0)); 2529 match(Set dst (MulD dst src)); 2530 2531 format %{ "mulsd $dst, $src" %} 2532 ins_cost(150); 2533 ins_encode %{ 2534 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2535 %} 2536 ins_pipe(pipe_slow); 2537 %} 2538 2539 instruct mulD_mem(regD dst, memory src) %{ 2540 predicate((UseSSE>=2) && (UseAVX == 0)); 2541 match(Set dst (MulD dst (LoadD src))); 2542 2543 format %{ "mulsd $dst, $src" %} 2544 ins_cost(150); 2545 ins_encode %{ 2546 __ mulsd($dst$$XMMRegister, $src$$Address); 2547 %} 2548 ins_pipe(pipe_slow); 2549 %} 2550 2551 instruct mulD_imm(regD dst, immD con) %{ 2552 predicate((UseSSE>=2) && (UseAVX == 0)); 2553 match(Set dst (MulD dst con)); 2554 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2555 ins_cost(150); 2556 ins_encode %{ 2557 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2558 %} 2559 ins_pipe(pipe_slow); 2560 %} 2561 2562 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2563 predicate(UseAVX > 0); 2564 match(Set dst (MulD src1 src2)); 2565 2566 format %{ "vmulsd $dst, $src1, $src2" %} 2567 ins_cost(150); 2568 ins_encode %{ 2569 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2570 %} 2571 ins_pipe(pipe_slow); 2572 %} 2573 2574 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2575 predicate(UseAVX > 0); 2576 match(Set dst (MulD src1 (LoadD src2))); 2577 2578 format %{ "vmulsd $dst, $src1, $src2" %} 2579 ins_cost(150); 2580 ins_encode %{ 2581 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2582 %} 2583 ins_pipe(pipe_slow); 2584 %} 2585 2586 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2587 predicate(UseAVX > 0); 2588 match(Set dst (MulD src con)); 2589 2590 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2591 ins_cost(150); 2592 ins_encode %{ 2593 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2594 %} 2595 ins_pipe(pipe_slow); 2596 %} 2597 2598 instruct divF_reg(regF dst, regF src) %{ 2599 predicate((UseSSE>=1) && (UseAVX == 0)); 2600 match(Set dst (DivF dst src)); 2601 2602 format %{ "divss $dst, $src" %} 2603 ins_cost(150); 2604 ins_encode %{ 2605 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2606 %} 2607 ins_pipe(pipe_slow); 2608 %} 2609 2610 instruct divF_mem(regF dst, memory src) %{ 2611 predicate((UseSSE>=1) && (UseAVX == 0)); 2612 match(Set dst (DivF dst (LoadF src))); 2613 2614 format %{ "divss $dst, $src" %} 2615 ins_cost(150); 2616 ins_encode %{ 2617 __ divss($dst$$XMMRegister, $src$$Address); 2618 %} 2619 ins_pipe(pipe_slow); 2620 %} 2621 2622 instruct divF_imm(regF dst, immF con) %{ 2623 predicate((UseSSE>=1) && (UseAVX == 0)); 2624 match(Set dst (DivF dst con)); 2625 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2626 ins_cost(150); 2627 ins_encode %{ 2628 __ divss($dst$$XMMRegister, $constantaddress($con)); 2629 %} 2630 ins_pipe(pipe_slow); 2631 %} 2632 2633 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2634 predicate(UseAVX > 0); 2635 match(Set dst (DivF src1 src2)); 2636 2637 format %{ "vdivss $dst, $src1, $src2" %} 2638 ins_cost(150); 2639 ins_encode %{ 2640 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2641 %} 2642 ins_pipe(pipe_slow); 2643 %} 2644 2645 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2646 predicate(UseAVX > 0); 2647 match(Set dst (DivF src1 (LoadF src2))); 2648 2649 format %{ "vdivss $dst, $src1, $src2" %} 2650 ins_cost(150); 2651 ins_encode %{ 2652 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2653 %} 2654 ins_pipe(pipe_slow); 2655 %} 2656 2657 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2658 predicate(UseAVX > 0); 2659 match(Set dst (DivF src con)); 2660 2661 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2662 ins_cost(150); 2663 ins_encode %{ 2664 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2665 %} 2666 ins_pipe(pipe_slow); 2667 %} 2668 2669 instruct divD_reg(regD dst, regD src) %{ 2670 predicate((UseSSE>=2) && (UseAVX == 0)); 2671 match(Set dst (DivD dst src)); 2672 2673 format %{ "divsd $dst, $src" %} 2674 ins_cost(150); 2675 ins_encode %{ 2676 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2677 %} 2678 ins_pipe(pipe_slow); 2679 %} 2680 2681 instruct divD_mem(regD dst, memory src) %{ 2682 predicate((UseSSE>=2) && (UseAVX == 0)); 2683 match(Set dst (DivD dst (LoadD src))); 2684 2685 format %{ "divsd $dst, $src" %} 2686 ins_cost(150); 2687 ins_encode %{ 2688 __ divsd($dst$$XMMRegister, $src$$Address); 2689 %} 2690 ins_pipe(pipe_slow); 2691 %} 2692 2693 instruct divD_imm(regD dst, immD con) %{ 2694 predicate((UseSSE>=2) && (UseAVX == 0)); 2695 match(Set dst (DivD dst con)); 2696 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2697 ins_cost(150); 2698 ins_encode %{ 2699 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2700 %} 2701 ins_pipe(pipe_slow); 2702 %} 2703 2704 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2705 predicate(UseAVX > 0); 2706 match(Set dst (DivD src1 src2)); 2707 2708 format %{ "vdivsd $dst, $src1, $src2" %} 2709 ins_cost(150); 2710 ins_encode %{ 2711 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2712 %} 2713 ins_pipe(pipe_slow); 2714 %} 2715 2716 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2717 predicate(UseAVX > 0); 2718 match(Set dst (DivD src1 (LoadD src2))); 2719 2720 format %{ "vdivsd $dst, $src1, $src2" %} 2721 ins_cost(150); 2722 ins_encode %{ 2723 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2724 %} 2725 ins_pipe(pipe_slow); 2726 %} 2727 2728 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2729 predicate(UseAVX > 0); 2730 match(Set dst (DivD src con)); 2731 2732 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2733 ins_cost(150); 2734 ins_encode %{ 2735 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2736 %} 2737 ins_pipe(pipe_slow); 2738 %} 2739 2740 instruct absF_reg(regF dst) %{ 2741 predicate((UseSSE>=1) && (UseAVX == 0)); 2742 match(Set dst (AbsF dst)); 2743 ins_cost(150); 2744 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2745 ins_encode %{ 2746 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2747 %} 2748 ins_pipe(pipe_slow); 2749 %} 2750 2751 instruct absF_reg_reg(regF dst, regF src) %{ 2752 predicate(VM_Version::supports_avx256only()); 2753 match(Set dst (AbsF src)); 2754 ins_cost(150); 2755 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2756 ins_encode %{ 2757 int vector_len = 0; 2758 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2759 ExternalAddress(float_signmask()), vector_len); 2760 %} 2761 ins_pipe(pipe_slow); 2762 %} 2763 2764 #ifdef _LP64 2765 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2766 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2767 match(Set dst (AbsF src)); 2768 ins_cost(150); 2769 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2770 ins_encode %{ 2771 int vector_len = 0; 2772 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2773 ExternalAddress(float_signmask()), vector_len); 2774 %} 2775 ins_pipe(pipe_slow); 2776 %} 2777 2778 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2779 predicate(VM_Version::supports_avx512novl()); 2780 match(Set dst (AbsF src1)); 2781 effect(TEMP src2); 2782 ins_cost(150); 2783 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2784 ins_encode %{ 2785 int vector_len = 0; 2786 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2787 ExternalAddress(float_signmask()), vector_len); 2788 %} 2789 ins_pipe(pipe_slow); 2790 %} 2791 #else // _LP64 2792 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2793 predicate(UseAVX > 2); 2794 match(Set dst (AbsF src)); 2795 ins_cost(150); 2796 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2797 ins_encode %{ 2798 int vector_len = 0; 2799 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2800 ExternalAddress(float_signmask()), vector_len); 2801 %} 2802 ins_pipe(pipe_slow); 2803 %} 2804 #endif 2805 2806 instruct absD_reg(regD dst) %{ 2807 predicate((UseSSE>=2) && (UseAVX == 0)); 2808 match(Set dst (AbsD dst)); 2809 ins_cost(150); 2810 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2811 "# abs double by sign masking" %} 2812 ins_encode %{ 2813 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2814 %} 2815 ins_pipe(pipe_slow); 2816 %} 2817 2818 instruct absD_reg_reg(regD dst, regD src) %{ 2819 predicate(VM_Version::supports_avx256only()); 2820 match(Set dst (AbsD src)); 2821 ins_cost(150); 2822 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2823 "# abs double by sign masking" %} 2824 ins_encode %{ 2825 int vector_len = 0; 2826 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2827 ExternalAddress(double_signmask()), vector_len); 2828 %} 2829 ins_pipe(pipe_slow); 2830 %} 2831 2832 #ifdef _LP64 2833 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2834 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2835 match(Set dst (AbsD src)); 2836 ins_cost(150); 2837 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2838 "# abs double by sign masking" %} 2839 ins_encode %{ 2840 int vector_len = 0; 2841 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2842 ExternalAddress(double_signmask()), vector_len); 2843 %} 2844 ins_pipe(pipe_slow); 2845 %} 2846 2847 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2848 predicate(VM_Version::supports_avx512novl()); 2849 match(Set dst (AbsD src1)); 2850 effect(TEMP src2); 2851 ins_cost(150); 2852 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2853 ins_encode %{ 2854 int vector_len = 0; 2855 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2856 ExternalAddress(double_signmask()), vector_len); 2857 %} 2858 ins_pipe(pipe_slow); 2859 %} 2860 #else // _LP64 2861 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2862 predicate(UseAVX > 2); 2863 match(Set dst (AbsD src)); 2864 ins_cost(150); 2865 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2866 "# abs double by sign masking" %} 2867 ins_encode %{ 2868 int vector_len = 0; 2869 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2870 ExternalAddress(double_signmask()), vector_len); 2871 %} 2872 ins_pipe(pipe_slow); 2873 %} 2874 #endif 2875 2876 instruct negF_reg(regF dst) %{ 2877 predicate((UseSSE>=1) && (UseAVX == 0)); 2878 match(Set dst (NegF dst)); 2879 ins_cost(150); 2880 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2881 ins_encode %{ 2882 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2883 %} 2884 ins_pipe(pipe_slow); 2885 %} 2886 2887 instruct negF_reg_reg(regF dst, regF src) %{ 2888 predicate(UseAVX > 0); 2889 match(Set dst (NegF src)); 2890 ins_cost(150); 2891 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2892 ins_encode %{ 2893 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2894 ExternalAddress(float_signflip())); 2895 %} 2896 ins_pipe(pipe_slow); 2897 %} 2898 2899 instruct negD_reg(regD dst) %{ 2900 predicate((UseSSE>=2) && (UseAVX == 0)); 2901 match(Set dst (NegD dst)); 2902 ins_cost(150); 2903 format %{ "xorpd $dst, [0x8000000000000000]\t" 2904 "# neg double by sign flipping" %} 2905 ins_encode %{ 2906 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2907 %} 2908 ins_pipe(pipe_slow); 2909 %} 2910 2911 instruct negD_reg_reg(regD dst, regD src) %{ 2912 predicate(UseAVX > 0); 2913 match(Set dst (NegD src)); 2914 ins_cost(150); 2915 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2916 "# neg double by sign flipping" %} 2917 ins_encode %{ 2918 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2919 ExternalAddress(double_signflip())); 2920 %} 2921 ins_pipe(pipe_slow); 2922 %} 2923 2924 instruct sqrtF_reg(regF dst, regF src) %{ 2925 predicate(UseSSE>=1); 2926 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2927 2928 format %{ "sqrtss $dst, $src" %} 2929 ins_cost(150); 2930 ins_encode %{ 2931 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2932 %} 2933 ins_pipe(pipe_slow); 2934 %} 2935 2936 instruct sqrtF_mem(regF dst, memory src) %{ 2937 predicate(UseSSE>=1); 2938 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2939 2940 format %{ "sqrtss $dst, $src" %} 2941 ins_cost(150); 2942 ins_encode %{ 2943 __ sqrtss($dst$$XMMRegister, $src$$Address); 2944 %} 2945 ins_pipe(pipe_slow); 2946 %} 2947 2948 instruct sqrtF_imm(regF dst, immF con) %{ 2949 predicate(UseSSE>=1); 2950 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2951 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2952 ins_cost(150); 2953 ins_encode %{ 2954 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2955 %} 2956 ins_pipe(pipe_slow); 2957 %} 2958 2959 instruct sqrtD_reg(regD dst, regD src) %{ 2960 predicate(UseSSE>=2); 2961 match(Set dst (SqrtD src)); 2962 2963 format %{ "sqrtsd $dst, $src" %} 2964 ins_cost(150); 2965 ins_encode %{ 2966 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2967 %} 2968 ins_pipe(pipe_slow); 2969 %} 2970 2971 instruct sqrtD_mem(regD dst, memory src) %{ 2972 predicate(UseSSE>=2); 2973 match(Set dst (SqrtD (LoadD src))); 2974 2975 format %{ "sqrtsd $dst, $src" %} 2976 ins_cost(150); 2977 ins_encode %{ 2978 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2979 %} 2980 ins_pipe(pipe_slow); 2981 %} 2982 2983 instruct sqrtD_imm(regD dst, immD con) %{ 2984 predicate(UseSSE>=2); 2985 match(Set dst (SqrtD con)); 2986 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2987 ins_cost(150); 2988 ins_encode %{ 2989 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2990 %} 2991 ins_pipe(pipe_slow); 2992 %} 2993 2994 // ====================VECTOR INSTRUCTIONS===================================== 2995 2996 // Load vectors (4 bytes long) 2997 instruct loadV4(vecS dst, memory mem) %{ 2998 predicate(n->as_LoadVector()->memory_size() == 4); 2999 match(Set dst (LoadVector mem)); 3000 ins_cost(125); 3001 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3002 ins_encode %{ 3003 __ movdl($dst$$XMMRegister, $mem$$Address); 3004 %} 3005 ins_pipe( pipe_slow ); 3006 %} 3007 3008 // Load vectors (8 bytes long) 3009 instruct loadV8(vecD dst, memory mem) %{ 3010 predicate(n->as_LoadVector()->memory_size() == 8); 3011 match(Set dst (LoadVector mem)); 3012 ins_cost(125); 3013 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3014 ins_encode %{ 3015 __ movq($dst$$XMMRegister, $mem$$Address); 3016 %} 3017 ins_pipe( pipe_slow ); 3018 %} 3019 3020 // Load vectors (16 bytes long) 3021 instruct loadV16(vecX dst, memory mem) %{ 3022 predicate(n->as_LoadVector()->memory_size() == 16); 3023 match(Set dst (LoadVector mem)); 3024 ins_cost(125); 3025 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3026 ins_encode %{ 3027 __ movdqu($dst$$XMMRegister, $mem$$Address); 3028 %} 3029 ins_pipe( pipe_slow ); 3030 %} 3031 3032 // Load vectors (32 bytes long) 3033 instruct loadV32(vecY dst, memory mem) %{ 3034 predicate(n->as_LoadVector()->memory_size() == 32); 3035 match(Set dst (LoadVector mem)); 3036 ins_cost(125); 3037 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3038 ins_encode %{ 3039 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3040 %} 3041 ins_pipe( pipe_slow ); 3042 %} 3043 3044 // Load vectors (64 bytes long) 3045 instruct loadV64(vecZ dst, memory mem) %{ 3046 predicate(n->as_LoadVector()->memory_size() == 64); 3047 match(Set dst (LoadVector mem)); 3048 ins_cost(125); 3049 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 3050 ins_encode %{ 3051 int vector_len = 2; 3052 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3053 %} 3054 ins_pipe( pipe_slow ); 3055 %} 3056 3057 // Store vectors 3058 instruct storeV4(memory mem, vecS src) %{ 3059 predicate(n->as_StoreVector()->memory_size() == 4); 3060 match(Set mem (StoreVector mem src)); 3061 ins_cost(145); 3062 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3063 ins_encode %{ 3064 __ movdl($mem$$Address, $src$$XMMRegister); 3065 %} 3066 ins_pipe( pipe_slow ); 3067 %} 3068 3069 instruct storeV8(memory mem, vecD src) %{ 3070 predicate(n->as_StoreVector()->memory_size() == 8); 3071 match(Set mem (StoreVector mem src)); 3072 ins_cost(145); 3073 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3074 ins_encode %{ 3075 __ movq($mem$$Address, $src$$XMMRegister); 3076 %} 3077 ins_pipe( pipe_slow ); 3078 %} 3079 3080 instruct storeV16(memory mem, vecX src) %{ 3081 predicate(n->as_StoreVector()->memory_size() == 16); 3082 match(Set mem (StoreVector mem src)); 3083 ins_cost(145); 3084 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3085 ins_encode %{ 3086 __ movdqu($mem$$Address, $src$$XMMRegister); 3087 %} 3088 ins_pipe( pipe_slow ); 3089 %} 3090 3091 instruct storeV32(memory mem, vecY src) %{ 3092 predicate(n->as_StoreVector()->memory_size() == 32); 3093 match(Set mem (StoreVector mem src)); 3094 ins_cost(145); 3095 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3096 ins_encode %{ 3097 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3098 %} 3099 ins_pipe( pipe_slow ); 3100 %} 3101 3102 instruct storeV64(memory mem, vecZ src) %{ 3103 predicate(n->as_StoreVector()->memory_size() == 64); 3104 match(Set mem (StoreVector mem src)); 3105 ins_cost(145); 3106 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 3107 ins_encode %{ 3108 int vector_len = 2; 3109 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3110 %} 3111 ins_pipe( pipe_slow ); 3112 %} 3113 3114 // ====================LEGACY REPLICATE======================================= 3115 3116 instruct Repl4B_mem(vecS dst, memory mem) %{ 3117 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3118 match(Set dst (ReplicateB (LoadB mem))); 3119 format %{ "punpcklbw $dst,$mem\n\t" 3120 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3121 ins_encode %{ 3122 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3123 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3124 %} 3125 ins_pipe( pipe_slow ); 3126 %} 3127 3128 instruct Repl8B_mem(vecD dst, memory mem) %{ 3129 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3130 match(Set dst (ReplicateB (LoadB mem))); 3131 format %{ "punpcklbw $dst,$mem\n\t" 3132 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3133 ins_encode %{ 3134 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3135 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3136 %} 3137 ins_pipe( pipe_slow ); 3138 %} 3139 3140 instruct Repl16B(vecX dst, rRegI src) %{ 3141 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3142 match(Set dst (ReplicateB src)); 3143 format %{ "movd $dst,$src\n\t" 3144 "punpcklbw $dst,$dst\n\t" 3145 "pshuflw $dst,$dst,0x00\n\t" 3146 "punpcklqdq $dst,$dst\t! replicate16B" %} 3147 ins_encode %{ 3148 __ movdl($dst$$XMMRegister, $src$$Register); 3149 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3150 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3151 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3152 %} 3153 ins_pipe( pipe_slow ); 3154 %} 3155 3156 instruct Repl16B_mem(vecX dst, memory mem) %{ 3157 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3158 match(Set dst (ReplicateB (LoadB mem))); 3159 format %{ "punpcklbw $dst,$mem\n\t" 3160 "pshuflw $dst,$dst,0x00\n\t" 3161 "punpcklqdq $dst,$dst\t! replicate16B" %} 3162 ins_encode %{ 3163 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3164 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3165 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3166 %} 3167 ins_pipe( pipe_slow ); 3168 %} 3169 3170 instruct Repl32B(vecY dst, rRegI src) %{ 3171 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3172 match(Set dst (ReplicateB src)); 3173 format %{ "movd $dst,$src\n\t" 3174 "punpcklbw $dst,$dst\n\t" 3175 "pshuflw $dst,$dst,0x00\n\t" 3176 "punpcklqdq $dst,$dst\n\t" 3177 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 3178 ins_encode %{ 3179 __ movdl($dst$$XMMRegister, $src$$Register); 3180 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3181 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3182 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3183 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3184 %} 3185 ins_pipe( pipe_slow ); 3186 %} 3187 3188 instruct Repl32B_mem(vecY dst, memory mem) %{ 3189 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3190 match(Set dst (ReplicateB (LoadB mem))); 3191 format %{ "punpcklbw $dst,$mem\n\t" 3192 "pshuflw $dst,$dst,0x00\n\t" 3193 "punpcklqdq $dst,$dst\n\t" 3194 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 3195 ins_encode %{ 3196 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3197 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3198 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3199 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3200 %} 3201 ins_pipe( pipe_slow ); 3202 %} 3203 3204 instruct Repl16B_imm(vecX dst, immI con) %{ 3205 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3206 match(Set dst (ReplicateB con)); 3207 format %{ "movq $dst,[$constantaddress]\n\t" 3208 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3209 ins_encode %{ 3210 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3211 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3212 %} 3213 ins_pipe( pipe_slow ); 3214 %} 3215 3216 instruct Repl32B_imm(vecY dst, immI con) %{ 3217 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3218 match(Set dst (ReplicateB con)); 3219 format %{ "movq $dst,[$constantaddress]\n\t" 3220 "punpcklqdq $dst,$dst\n\t" 3221 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3222 ins_encode %{ 3223 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3224 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3225 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3226 %} 3227 ins_pipe( pipe_slow ); 3228 %} 3229 3230 instruct Repl4S(vecD dst, rRegI src) %{ 3231 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3232 match(Set dst (ReplicateS src)); 3233 format %{ "movd $dst,$src\n\t" 3234 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3235 ins_encode %{ 3236 __ movdl($dst$$XMMRegister, $src$$Register); 3237 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3238 %} 3239 ins_pipe( pipe_slow ); 3240 %} 3241 3242 instruct Repl4S_mem(vecD dst, memory mem) %{ 3243 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3244 match(Set dst (ReplicateS (LoadS mem))); 3245 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3246 ins_encode %{ 3247 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3248 %} 3249 ins_pipe( pipe_slow ); 3250 %} 3251 3252 instruct Repl8S(vecX dst, rRegI src) %{ 3253 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3254 match(Set dst (ReplicateS src)); 3255 format %{ "movd $dst,$src\n\t" 3256 "pshuflw $dst,$dst,0x00\n\t" 3257 "punpcklqdq $dst,$dst\t! replicate8S" %} 3258 ins_encode %{ 3259 __ movdl($dst$$XMMRegister, $src$$Register); 3260 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3261 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3262 %} 3263 ins_pipe( pipe_slow ); 3264 %} 3265 3266 instruct Repl8S_mem(vecX dst, memory mem) %{ 3267 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3268 match(Set dst (ReplicateS (LoadS mem))); 3269 format %{ "pshuflw $dst,$mem,0x00\n\t" 3270 "punpcklqdq $dst,$dst\t! replicate8S" %} 3271 ins_encode %{ 3272 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3273 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct Repl8S_imm(vecX dst, immI con) %{ 3279 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3280 match(Set dst (ReplicateS con)); 3281 format %{ "movq $dst,[$constantaddress]\n\t" 3282 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3283 ins_encode %{ 3284 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3285 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3286 %} 3287 ins_pipe( pipe_slow ); 3288 %} 3289 3290 instruct Repl16S(vecY dst, rRegI src) %{ 3291 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3292 match(Set dst (ReplicateS src)); 3293 format %{ "movd $dst,$src\n\t" 3294 "pshuflw $dst,$dst,0x00\n\t" 3295 "punpcklqdq $dst,$dst\n\t" 3296 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3297 ins_encode %{ 3298 __ movdl($dst$$XMMRegister, $src$$Register); 3299 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3300 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3301 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3302 %} 3303 ins_pipe( pipe_slow ); 3304 %} 3305 3306 instruct Repl16S_mem(vecY dst, memory mem) %{ 3307 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3308 match(Set dst (ReplicateS (LoadS mem))); 3309 format %{ "pshuflw $dst,$mem,0x00\n\t" 3310 "punpcklqdq $dst,$dst\n\t" 3311 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3312 ins_encode %{ 3313 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3314 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3315 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3316 %} 3317 ins_pipe( pipe_slow ); 3318 %} 3319 3320 instruct Repl16S_imm(vecY dst, immI con) %{ 3321 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3322 match(Set dst (ReplicateS con)); 3323 format %{ "movq $dst,[$constantaddress]\n\t" 3324 "punpcklqdq $dst,$dst\n\t" 3325 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3326 ins_encode %{ 3327 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3328 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3329 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3330 %} 3331 ins_pipe( pipe_slow ); 3332 %} 3333 3334 instruct Repl4I(vecX dst, rRegI src) %{ 3335 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3336 match(Set dst (ReplicateI src)); 3337 format %{ "movd $dst,$src\n\t" 3338 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3339 ins_encode %{ 3340 __ movdl($dst$$XMMRegister, $src$$Register); 3341 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3342 %} 3343 ins_pipe( pipe_slow ); 3344 %} 3345 3346 instruct Repl4I_mem(vecX dst, memory mem) %{ 3347 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3348 match(Set dst (ReplicateI (LoadI mem))); 3349 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3350 ins_encode %{ 3351 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct Repl8I(vecY dst, rRegI src) %{ 3357 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3358 match(Set dst (ReplicateI src)); 3359 format %{ "movd $dst,$src\n\t" 3360 "pshufd $dst,$dst,0x00\n\t" 3361 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3362 ins_encode %{ 3363 __ movdl($dst$$XMMRegister, $src$$Register); 3364 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3365 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3366 %} 3367 ins_pipe( pipe_slow ); 3368 %} 3369 3370 instruct Repl8I_mem(vecY dst, memory mem) %{ 3371 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3372 match(Set dst (ReplicateI (LoadI mem))); 3373 format %{ "pshufd $dst,$mem,0x00\n\t" 3374 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3375 ins_encode %{ 3376 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3377 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3378 %} 3379 ins_pipe( pipe_slow ); 3380 %} 3381 3382 instruct Repl4I_imm(vecX dst, immI con) %{ 3383 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3384 match(Set dst (ReplicateI con)); 3385 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3386 "punpcklqdq $dst,$dst" %} 3387 ins_encode %{ 3388 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3389 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3390 %} 3391 ins_pipe( pipe_slow ); 3392 %} 3393 3394 instruct Repl8I_imm(vecY dst, immI con) %{ 3395 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3396 match(Set dst (ReplicateI con)); 3397 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3398 "punpcklqdq $dst,$dst\n\t" 3399 "vinserti128h $dst,$dst,$dst" %} 3400 ins_encode %{ 3401 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3402 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3403 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3404 %} 3405 ins_pipe( pipe_slow ); 3406 %} 3407 3408 // Long could be loaded into xmm register directly from memory. 3409 instruct Repl2L_mem(vecX dst, memory mem) %{ 3410 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3411 match(Set dst (ReplicateL (LoadL mem))); 3412 format %{ "movq $dst,$mem\n\t" 3413 "punpcklqdq $dst,$dst\t! replicate2L" %} 3414 ins_encode %{ 3415 __ movq($dst$$XMMRegister, $mem$$Address); 3416 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3417 %} 3418 ins_pipe( pipe_slow ); 3419 %} 3420 3421 // Replicate long (8 byte) scalar to be vector 3422 #ifdef _LP64 3423 instruct Repl4L(vecY dst, rRegL src) %{ 3424 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3425 match(Set dst (ReplicateL src)); 3426 format %{ "movdq $dst,$src\n\t" 3427 "punpcklqdq $dst,$dst\n\t" 3428 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3429 ins_encode %{ 3430 __ movdq($dst$$XMMRegister, $src$$Register); 3431 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3432 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3433 %} 3434 ins_pipe( pipe_slow ); 3435 %} 3436 #else // _LP64 3437 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3438 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3439 match(Set dst (ReplicateL src)); 3440 effect(TEMP dst, USE src, TEMP tmp); 3441 format %{ "movdl $dst,$src.lo\n\t" 3442 "movdl $tmp,$src.hi\n\t" 3443 "punpckldq $dst,$tmp\n\t" 3444 "punpcklqdq $dst,$dst\n\t" 3445 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3446 ins_encode %{ 3447 __ movdl($dst$$XMMRegister, $src$$Register); 3448 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3449 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3450 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3451 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3452 %} 3453 ins_pipe( pipe_slow ); 3454 %} 3455 #endif // _LP64 3456 3457 instruct Repl4L_imm(vecY dst, immL con) %{ 3458 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3459 match(Set dst (ReplicateL con)); 3460 format %{ "movq $dst,[$constantaddress]\n\t" 3461 "punpcklqdq $dst,$dst\n\t" 3462 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3463 ins_encode %{ 3464 __ movq($dst$$XMMRegister, $constantaddress($con)); 3465 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3466 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3467 %} 3468 ins_pipe( pipe_slow ); 3469 %} 3470 3471 instruct Repl4L_mem(vecY dst, memory mem) %{ 3472 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3473 match(Set dst (ReplicateL (LoadL mem))); 3474 format %{ "movq $dst,$mem\n\t" 3475 "punpcklqdq $dst,$dst\n\t" 3476 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3477 ins_encode %{ 3478 __ movq($dst$$XMMRegister, $mem$$Address); 3479 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3480 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3481 %} 3482 ins_pipe( pipe_slow ); 3483 %} 3484 3485 instruct Repl2F_mem(vecD dst, memory mem) %{ 3486 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3487 match(Set dst (ReplicateF (LoadF mem))); 3488 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3489 ins_encode %{ 3490 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3491 %} 3492 ins_pipe( pipe_slow ); 3493 %} 3494 3495 instruct Repl4F_mem(vecX dst, memory mem) %{ 3496 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3497 match(Set dst (ReplicateF (LoadF mem))); 3498 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3499 ins_encode %{ 3500 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3501 %} 3502 ins_pipe( pipe_slow ); 3503 %} 3504 3505 instruct Repl8F(vecY dst, regF src) %{ 3506 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3507 match(Set dst (ReplicateF src)); 3508 format %{ "pshufd $dst,$src,0x00\n\t" 3509 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3510 ins_encode %{ 3511 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3512 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3513 %} 3514 ins_pipe( pipe_slow ); 3515 %} 3516 3517 instruct Repl8F_mem(vecY dst, memory mem) %{ 3518 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3519 match(Set dst (ReplicateF (LoadF mem))); 3520 format %{ "pshufd $dst,$mem,0x00\n\t" 3521 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3522 ins_encode %{ 3523 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3524 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3525 %} 3526 ins_pipe( pipe_slow ); 3527 %} 3528 3529 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3530 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3531 match(Set dst (ReplicateF zero)); 3532 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3533 ins_encode %{ 3534 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3535 %} 3536 ins_pipe( fpu_reg_reg ); 3537 %} 3538 3539 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3540 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3541 match(Set dst (ReplicateF zero)); 3542 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3543 ins_encode %{ 3544 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3545 %} 3546 ins_pipe( fpu_reg_reg ); 3547 %} 3548 3549 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3550 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3551 match(Set dst (ReplicateF zero)); 3552 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3553 ins_encode %{ 3554 int vector_len = 1; 3555 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3556 %} 3557 ins_pipe( fpu_reg_reg ); 3558 %} 3559 3560 instruct Repl2D_mem(vecX dst, memory mem) %{ 3561 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3562 match(Set dst (ReplicateD (LoadD mem))); 3563 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3564 ins_encode %{ 3565 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3566 %} 3567 ins_pipe( pipe_slow ); 3568 %} 3569 3570 instruct Repl4D(vecY dst, regD src) %{ 3571 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3572 match(Set dst (ReplicateD src)); 3573 format %{ "pshufd $dst,$src,0x44\n\t" 3574 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3575 ins_encode %{ 3576 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3577 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3578 %} 3579 ins_pipe( pipe_slow ); 3580 %} 3581 3582 instruct Repl4D_mem(vecY dst, memory mem) %{ 3583 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3584 match(Set dst (ReplicateD (LoadD mem))); 3585 format %{ "pshufd $dst,$mem,0x44\n\t" 3586 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3587 ins_encode %{ 3588 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3589 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3590 %} 3591 ins_pipe( pipe_slow ); 3592 %} 3593 3594 // Replicate double (8 byte) scalar zero to be vector 3595 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3596 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3597 match(Set dst (ReplicateD zero)); 3598 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3599 ins_encode %{ 3600 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3601 %} 3602 ins_pipe( fpu_reg_reg ); 3603 %} 3604 3605 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3606 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3607 match(Set dst (ReplicateD zero)); 3608 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3609 ins_encode %{ 3610 int vector_len = 1; 3611 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3612 %} 3613 ins_pipe( fpu_reg_reg ); 3614 %} 3615 3616 // ====================GENERIC REPLICATE========================================== 3617 3618 // Replicate byte scalar to be vector 3619 instruct Repl4B(vecS dst, rRegI src) %{ 3620 predicate(n->as_Vector()->length() == 4); 3621 match(Set dst (ReplicateB src)); 3622 format %{ "movd $dst,$src\n\t" 3623 "punpcklbw $dst,$dst\n\t" 3624 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3625 ins_encode %{ 3626 __ movdl($dst$$XMMRegister, $src$$Register); 3627 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3628 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 instruct Repl8B(vecD dst, rRegI src) %{ 3634 predicate(n->as_Vector()->length() == 8); 3635 match(Set dst (ReplicateB src)); 3636 format %{ "movd $dst,$src\n\t" 3637 "punpcklbw $dst,$dst\n\t" 3638 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3639 ins_encode %{ 3640 __ movdl($dst$$XMMRegister, $src$$Register); 3641 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3642 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3643 %} 3644 ins_pipe( pipe_slow ); 3645 %} 3646 3647 // Replicate byte scalar immediate to be vector by loading from const table. 3648 instruct Repl4B_imm(vecS dst, immI con) %{ 3649 predicate(n->as_Vector()->length() == 4); 3650 match(Set dst (ReplicateB con)); 3651 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3652 ins_encode %{ 3653 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3654 %} 3655 ins_pipe( pipe_slow ); 3656 %} 3657 3658 instruct Repl8B_imm(vecD dst, immI con) %{ 3659 predicate(n->as_Vector()->length() == 8); 3660 match(Set dst (ReplicateB con)); 3661 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3662 ins_encode %{ 3663 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3664 %} 3665 ins_pipe( pipe_slow ); 3666 %} 3667 3668 // Replicate byte scalar zero to be vector 3669 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3670 predicate(n->as_Vector()->length() == 4); 3671 match(Set dst (ReplicateB zero)); 3672 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3673 ins_encode %{ 3674 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3675 %} 3676 ins_pipe( fpu_reg_reg ); 3677 %} 3678 3679 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3680 predicate(n->as_Vector()->length() == 8); 3681 match(Set dst (ReplicateB zero)); 3682 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3683 ins_encode %{ 3684 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3685 %} 3686 ins_pipe( fpu_reg_reg ); 3687 %} 3688 3689 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3690 predicate(n->as_Vector()->length() == 16); 3691 match(Set dst (ReplicateB zero)); 3692 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3693 ins_encode %{ 3694 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3695 %} 3696 ins_pipe( fpu_reg_reg ); 3697 %} 3698 3699 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3700 predicate(n->as_Vector()->length() == 32); 3701 match(Set dst (ReplicateB zero)); 3702 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3703 ins_encode %{ 3704 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3705 int vector_len = 1; 3706 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3707 %} 3708 ins_pipe( fpu_reg_reg ); 3709 %} 3710 3711 // Replicate char/short (2 byte) scalar to be vector 3712 instruct Repl2S(vecS dst, rRegI src) %{ 3713 predicate(n->as_Vector()->length() == 2); 3714 match(Set dst (ReplicateS src)); 3715 format %{ "movd $dst,$src\n\t" 3716 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3717 ins_encode %{ 3718 __ movdl($dst$$XMMRegister, $src$$Register); 3719 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3720 %} 3721 ins_pipe( fpu_reg_reg ); 3722 %} 3723 3724 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3725 instruct Repl2S_imm(vecS dst, immI con) %{ 3726 predicate(n->as_Vector()->length() == 2); 3727 match(Set dst (ReplicateS con)); 3728 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3729 ins_encode %{ 3730 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3731 %} 3732 ins_pipe( fpu_reg_reg ); 3733 %} 3734 3735 instruct Repl4S_imm(vecD dst, immI con) %{ 3736 predicate(n->as_Vector()->length() == 4); 3737 match(Set dst (ReplicateS con)); 3738 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3739 ins_encode %{ 3740 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3741 %} 3742 ins_pipe( fpu_reg_reg ); 3743 %} 3744 3745 // Replicate char/short (2 byte) scalar zero to be vector 3746 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3747 predicate(n->as_Vector()->length() == 2); 3748 match(Set dst (ReplicateS zero)); 3749 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3750 ins_encode %{ 3751 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3752 %} 3753 ins_pipe( fpu_reg_reg ); 3754 %} 3755 3756 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3757 predicate(n->as_Vector()->length() == 4); 3758 match(Set dst (ReplicateS zero)); 3759 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3760 ins_encode %{ 3761 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3762 %} 3763 ins_pipe( fpu_reg_reg ); 3764 %} 3765 3766 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3767 predicate(n->as_Vector()->length() == 8); 3768 match(Set dst (ReplicateS zero)); 3769 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3770 ins_encode %{ 3771 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3772 %} 3773 ins_pipe( fpu_reg_reg ); 3774 %} 3775 3776 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3777 predicate(n->as_Vector()->length() == 16); 3778 match(Set dst (ReplicateS zero)); 3779 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3780 ins_encode %{ 3781 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3782 int vector_len = 1; 3783 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3784 %} 3785 ins_pipe( fpu_reg_reg ); 3786 %} 3787 3788 // Replicate integer (4 byte) scalar to be vector 3789 instruct Repl2I(vecD dst, rRegI src) %{ 3790 predicate(n->as_Vector()->length() == 2); 3791 match(Set dst (ReplicateI src)); 3792 format %{ "movd $dst,$src\n\t" 3793 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3794 ins_encode %{ 3795 __ movdl($dst$$XMMRegister, $src$$Register); 3796 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3797 %} 3798 ins_pipe( fpu_reg_reg ); 3799 %} 3800 3801 // Integer could be loaded into xmm register directly from memory. 3802 instruct Repl2I_mem(vecD dst, memory mem) %{ 3803 predicate(n->as_Vector()->length() == 2); 3804 match(Set dst (ReplicateI (LoadI mem))); 3805 format %{ "movd $dst,$mem\n\t" 3806 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3807 ins_encode %{ 3808 __ movdl($dst$$XMMRegister, $mem$$Address); 3809 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3810 %} 3811 ins_pipe( fpu_reg_reg ); 3812 %} 3813 3814 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3815 instruct Repl2I_imm(vecD dst, immI con) %{ 3816 predicate(n->as_Vector()->length() == 2); 3817 match(Set dst (ReplicateI con)); 3818 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3819 ins_encode %{ 3820 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3821 %} 3822 ins_pipe( fpu_reg_reg ); 3823 %} 3824 3825 // Replicate integer (4 byte) scalar zero to be vector 3826 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3827 predicate(n->as_Vector()->length() == 2); 3828 match(Set dst (ReplicateI zero)); 3829 format %{ "pxor $dst,$dst\t! replicate2I" %} 3830 ins_encode %{ 3831 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3832 %} 3833 ins_pipe( fpu_reg_reg ); 3834 %} 3835 3836 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3837 predicate(n->as_Vector()->length() == 4); 3838 match(Set dst (ReplicateI zero)); 3839 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3840 ins_encode %{ 3841 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3842 %} 3843 ins_pipe( fpu_reg_reg ); 3844 %} 3845 3846 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3847 predicate(n->as_Vector()->length() == 8); 3848 match(Set dst (ReplicateI zero)); 3849 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3850 ins_encode %{ 3851 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3852 int vector_len = 1; 3853 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3854 %} 3855 ins_pipe( fpu_reg_reg ); 3856 %} 3857 3858 // Replicate long (8 byte) scalar to be vector 3859 #ifdef _LP64 3860 instruct Repl2L(vecX dst, rRegL src) %{ 3861 predicate(n->as_Vector()->length() == 2); 3862 match(Set dst (ReplicateL src)); 3863 format %{ "movdq $dst,$src\n\t" 3864 "punpcklqdq $dst,$dst\t! replicate2L" %} 3865 ins_encode %{ 3866 __ movdq($dst$$XMMRegister, $src$$Register); 3867 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3868 %} 3869 ins_pipe( pipe_slow ); 3870 %} 3871 #else // _LP64 3872 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3873 predicate(n->as_Vector()->length() == 2); 3874 match(Set dst (ReplicateL src)); 3875 effect(TEMP dst, USE src, TEMP tmp); 3876 format %{ "movdl $dst,$src.lo\n\t" 3877 "movdl $tmp,$src.hi\n\t" 3878 "punpckldq $dst,$tmp\n\t" 3879 "punpcklqdq $dst,$dst\t! replicate2L"%} 3880 ins_encode %{ 3881 __ movdl($dst$$XMMRegister, $src$$Register); 3882 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3883 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3884 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3885 %} 3886 ins_pipe( pipe_slow ); 3887 %} 3888 #endif // _LP64 3889 3890 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3891 instruct Repl2L_imm(vecX dst, immL con) %{ 3892 predicate(n->as_Vector()->length() == 2); 3893 match(Set dst (ReplicateL con)); 3894 format %{ "movq $dst,[$constantaddress]\n\t" 3895 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3896 ins_encode %{ 3897 __ movq($dst$$XMMRegister, $constantaddress($con)); 3898 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3899 %} 3900 ins_pipe( pipe_slow ); 3901 %} 3902 3903 // Replicate long (8 byte) scalar zero to be vector 3904 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3905 predicate(n->as_Vector()->length() == 2); 3906 match(Set dst (ReplicateL zero)); 3907 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3908 ins_encode %{ 3909 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3910 %} 3911 ins_pipe( fpu_reg_reg ); 3912 %} 3913 3914 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3915 predicate(n->as_Vector()->length() == 4); 3916 match(Set dst (ReplicateL zero)); 3917 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3918 ins_encode %{ 3919 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3920 int vector_len = 1; 3921 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3922 %} 3923 ins_pipe( fpu_reg_reg ); 3924 %} 3925 3926 // Replicate float (4 byte) scalar to be vector 3927 instruct Repl2F(vecD dst, regF src) %{ 3928 predicate(n->as_Vector()->length() == 2); 3929 match(Set dst (ReplicateF src)); 3930 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3931 ins_encode %{ 3932 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3933 %} 3934 ins_pipe( fpu_reg_reg ); 3935 %} 3936 3937 instruct Repl4F(vecX dst, regF src) %{ 3938 predicate(n->as_Vector()->length() == 4); 3939 match(Set dst (ReplicateF src)); 3940 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3941 ins_encode %{ 3942 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3943 %} 3944 ins_pipe( pipe_slow ); 3945 %} 3946 3947 // Replicate double (8 bytes) scalar to be vector 3948 instruct Repl2D(vecX dst, regD src) %{ 3949 predicate(n->as_Vector()->length() == 2); 3950 match(Set dst (ReplicateD src)); 3951 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3952 ins_encode %{ 3953 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3954 %} 3955 ins_pipe( pipe_slow ); 3956 %} 3957 3958 // ====================EVEX REPLICATE============================================= 3959 3960 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3961 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3962 match(Set dst (ReplicateB (LoadB mem))); 3963 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3964 ins_encode %{ 3965 int vector_len = 0; 3966 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3967 %} 3968 ins_pipe( pipe_slow ); 3969 %} 3970 3971 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3972 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3973 match(Set dst (ReplicateB (LoadB mem))); 3974 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3975 ins_encode %{ 3976 int vector_len = 0; 3977 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3983 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3984 match(Set dst (ReplicateB src)); 3985 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3986 ins_encode %{ 3987 int vector_len = 0; 3988 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3989 %} 3990 ins_pipe( pipe_slow ); 3991 %} 3992 3993 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3994 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3995 match(Set dst (ReplicateB (LoadB mem))); 3996 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3997 ins_encode %{ 3998 int vector_len = 0; 3999 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4000 %} 4001 ins_pipe( pipe_slow ); 4002 %} 4003 4004 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4005 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4006 match(Set dst (ReplicateB src)); 4007 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4008 ins_encode %{ 4009 int vector_len = 1; 4010 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4011 %} 4012 ins_pipe( pipe_slow ); 4013 %} 4014 4015 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4016 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4017 match(Set dst (ReplicateB (LoadB mem))); 4018 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4019 ins_encode %{ 4020 int vector_len = 1; 4021 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4022 %} 4023 ins_pipe( pipe_slow ); 4024 %} 4025 4026 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4027 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4028 match(Set dst (ReplicateB src)); 4029 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4030 ins_encode %{ 4031 int vector_len = 2; 4032 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4033 %} 4034 ins_pipe( pipe_slow ); 4035 %} 4036 4037 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4038 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4039 match(Set dst (ReplicateB (LoadB mem))); 4040 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4041 ins_encode %{ 4042 int vector_len = 2; 4043 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4044 %} 4045 ins_pipe( pipe_slow ); 4046 %} 4047 4048 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4049 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4050 match(Set dst (ReplicateB con)); 4051 format %{ "movq $dst,[$constantaddress]\n\t" 4052 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4053 ins_encode %{ 4054 int vector_len = 0; 4055 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4056 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4057 %} 4058 ins_pipe( pipe_slow ); 4059 %} 4060 4061 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4062 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4063 match(Set dst (ReplicateB con)); 4064 format %{ "movq $dst,[$constantaddress]\n\t" 4065 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4066 ins_encode %{ 4067 int vector_len = 1; 4068 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4069 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4070 %} 4071 ins_pipe( pipe_slow ); 4072 %} 4073 4074 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4075 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4076 match(Set dst (ReplicateB con)); 4077 format %{ "movq $dst,[$constantaddress]\n\t" 4078 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4079 ins_encode %{ 4080 int vector_len = 2; 4081 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4082 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4083 %} 4084 ins_pipe( pipe_slow ); 4085 %} 4086 4087 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4088 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4089 match(Set dst (ReplicateB zero)); 4090 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4091 ins_encode %{ 4092 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4093 int vector_len = 2; 4094 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4095 %} 4096 ins_pipe( fpu_reg_reg ); 4097 %} 4098 4099 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4100 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4101 match(Set dst (ReplicateS src)); 4102 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4103 ins_encode %{ 4104 int vector_len = 0; 4105 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4106 %} 4107 ins_pipe( pipe_slow ); 4108 %} 4109 4110 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4111 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4112 match(Set dst (ReplicateS (LoadS mem))); 4113 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4114 ins_encode %{ 4115 int vector_len = 0; 4116 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4117 %} 4118 ins_pipe( pipe_slow ); 4119 %} 4120 4121 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4122 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4123 match(Set dst (ReplicateS src)); 4124 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4125 ins_encode %{ 4126 int vector_len = 0; 4127 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4128 %} 4129 ins_pipe( pipe_slow ); 4130 %} 4131 4132 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4133 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4134 match(Set dst (ReplicateS (LoadS mem))); 4135 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4136 ins_encode %{ 4137 int vector_len = 0; 4138 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4139 %} 4140 ins_pipe( pipe_slow ); 4141 %} 4142 4143 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4144 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4145 match(Set dst (ReplicateS src)); 4146 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4147 ins_encode %{ 4148 int vector_len = 1; 4149 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4150 %} 4151 ins_pipe( pipe_slow ); 4152 %} 4153 4154 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4155 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4156 match(Set dst (ReplicateS (LoadS mem))); 4157 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4158 ins_encode %{ 4159 int vector_len = 1; 4160 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4161 %} 4162 ins_pipe( pipe_slow ); 4163 %} 4164 4165 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4166 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4167 match(Set dst (ReplicateS src)); 4168 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4169 ins_encode %{ 4170 int vector_len = 2; 4171 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4172 %} 4173 ins_pipe( pipe_slow ); 4174 %} 4175 4176 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4177 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4178 match(Set dst (ReplicateS (LoadS mem))); 4179 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4180 ins_encode %{ 4181 int vector_len = 2; 4182 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4183 %} 4184 ins_pipe( pipe_slow ); 4185 %} 4186 4187 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4188 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4189 match(Set dst (ReplicateS con)); 4190 format %{ "movq $dst,[$constantaddress]\n\t" 4191 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4192 ins_encode %{ 4193 int vector_len = 0; 4194 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4195 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4196 %} 4197 ins_pipe( pipe_slow ); 4198 %} 4199 4200 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4201 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4202 match(Set dst (ReplicateS con)); 4203 format %{ "movq $dst,[$constantaddress]\n\t" 4204 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4205 ins_encode %{ 4206 int vector_len = 1; 4207 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4208 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4209 %} 4210 ins_pipe( pipe_slow ); 4211 %} 4212 4213 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4214 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4215 match(Set dst (ReplicateS con)); 4216 format %{ "movq $dst,[$constantaddress]\n\t" 4217 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4218 ins_encode %{ 4219 int vector_len = 2; 4220 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4221 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4222 %} 4223 ins_pipe( pipe_slow ); 4224 %} 4225 4226 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4227 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4228 match(Set dst (ReplicateS zero)); 4229 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4230 ins_encode %{ 4231 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4232 int vector_len = 2; 4233 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4234 %} 4235 ins_pipe( fpu_reg_reg ); 4236 %} 4237 4238 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4239 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4240 match(Set dst (ReplicateI src)); 4241 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4242 ins_encode %{ 4243 int vector_len = 0; 4244 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4245 %} 4246 ins_pipe( pipe_slow ); 4247 %} 4248 4249 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4250 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4251 match(Set dst (ReplicateI (LoadI mem))); 4252 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4253 ins_encode %{ 4254 int vector_len = 0; 4255 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4256 %} 4257 ins_pipe( pipe_slow ); 4258 %} 4259 4260 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4261 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4262 match(Set dst (ReplicateI src)); 4263 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4264 ins_encode %{ 4265 int vector_len = 1; 4266 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4267 %} 4268 ins_pipe( pipe_slow ); 4269 %} 4270 4271 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4272 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4273 match(Set dst (ReplicateI (LoadI mem))); 4274 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4275 ins_encode %{ 4276 int vector_len = 1; 4277 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4278 %} 4279 ins_pipe( pipe_slow ); 4280 %} 4281 4282 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4283 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4284 match(Set dst (ReplicateI src)); 4285 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4286 ins_encode %{ 4287 int vector_len = 2; 4288 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4289 %} 4290 ins_pipe( pipe_slow ); 4291 %} 4292 4293 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4294 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4295 match(Set dst (ReplicateI (LoadI mem))); 4296 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4297 ins_encode %{ 4298 int vector_len = 2; 4299 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4300 %} 4301 ins_pipe( pipe_slow ); 4302 %} 4303 4304 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4305 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4306 match(Set dst (ReplicateI con)); 4307 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4308 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4309 ins_encode %{ 4310 int vector_len = 0; 4311 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4312 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 4317 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4318 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4319 match(Set dst (ReplicateI con)); 4320 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4321 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4322 ins_encode %{ 4323 int vector_len = 1; 4324 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4325 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4326 %} 4327 ins_pipe( pipe_slow ); 4328 %} 4329 4330 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4331 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4332 match(Set dst (ReplicateI con)); 4333 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4334 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4335 ins_encode %{ 4336 int vector_len = 2; 4337 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4338 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4339 %} 4340 ins_pipe( pipe_slow ); 4341 %} 4342 4343 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4344 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4345 match(Set dst (ReplicateI zero)); 4346 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4347 ins_encode %{ 4348 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4349 int vector_len = 2; 4350 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4351 %} 4352 ins_pipe( fpu_reg_reg ); 4353 %} 4354 4355 // Replicate long (8 byte) scalar to be vector 4356 #ifdef _LP64 4357 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4358 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4359 match(Set dst (ReplicateL src)); 4360 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4361 ins_encode %{ 4362 int vector_len = 1; 4363 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4364 %} 4365 ins_pipe( pipe_slow ); 4366 %} 4367 4368 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4369 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4370 match(Set dst (ReplicateL src)); 4371 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4372 ins_encode %{ 4373 int vector_len = 2; 4374 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4375 %} 4376 ins_pipe( pipe_slow ); 4377 %} 4378 #else // _LP64 4379 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4380 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4381 match(Set dst (ReplicateL src)); 4382 effect(TEMP dst, USE src, TEMP tmp); 4383 format %{ "movdl $dst,$src.lo\n\t" 4384 "movdl $tmp,$src.hi\n\t" 4385 "punpckldq $dst,$tmp\n\t" 4386 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4387 ins_encode %{ 4388 int vector_len = 1; 4389 __ movdl($dst$$XMMRegister, $src$$Register); 4390 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4391 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4392 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4393 %} 4394 ins_pipe( pipe_slow ); 4395 %} 4396 4397 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4398 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4399 match(Set dst (ReplicateL src)); 4400 effect(TEMP dst, USE src, TEMP tmp); 4401 format %{ "movdl $dst,$src.lo\n\t" 4402 "movdl $tmp,$src.hi\n\t" 4403 "punpckldq $dst,$tmp\n\t" 4404 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4405 ins_encode %{ 4406 int vector_len = 2; 4407 __ movdl($dst$$XMMRegister, $src$$Register); 4408 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4409 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4410 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4411 %} 4412 ins_pipe( pipe_slow ); 4413 %} 4414 #endif // _LP64 4415 4416 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4417 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4418 match(Set dst (ReplicateL con)); 4419 format %{ "movq $dst,[$constantaddress]\n\t" 4420 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4421 ins_encode %{ 4422 int vector_len = 1; 4423 __ movq($dst$$XMMRegister, $constantaddress($con)); 4424 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4425 %} 4426 ins_pipe( pipe_slow ); 4427 %} 4428 4429 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4430 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4431 match(Set dst (ReplicateL con)); 4432 format %{ "movq $dst,[$constantaddress]\n\t" 4433 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4434 ins_encode %{ 4435 int vector_len = 2; 4436 __ movq($dst$$XMMRegister, $constantaddress($con)); 4437 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4438 %} 4439 ins_pipe( pipe_slow ); 4440 %} 4441 4442 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4443 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4444 match(Set dst (ReplicateL (LoadL mem))); 4445 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4446 ins_encode %{ 4447 int vector_len = 0; 4448 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4449 %} 4450 ins_pipe( pipe_slow ); 4451 %} 4452 4453 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4454 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4455 match(Set dst (ReplicateL (LoadL mem))); 4456 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4457 ins_encode %{ 4458 int vector_len = 1; 4459 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4460 %} 4461 ins_pipe( pipe_slow ); 4462 %} 4463 4464 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4465 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4466 match(Set dst (ReplicateL (LoadL mem))); 4467 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4468 ins_encode %{ 4469 int vector_len = 2; 4470 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4471 %} 4472 ins_pipe( pipe_slow ); 4473 %} 4474 4475 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4476 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4477 match(Set dst (ReplicateL zero)); 4478 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4479 ins_encode %{ 4480 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4481 int vector_len = 2; 4482 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4483 %} 4484 ins_pipe( fpu_reg_reg ); 4485 %} 4486 4487 instruct Repl8F_evex(vecY dst, regF src) %{ 4488 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4489 match(Set dst (ReplicateF src)); 4490 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4491 ins_encode %{ 4492 int vector_len = 1; 4493 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4494 %} 4495 ins_pipe( pipe_slow ); 4496 %} 4497 4498 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4499 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4500 match(Set dst (ReplicateF (LoadF mem))); 4501 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4502 ins_encode %{ 4503 int vector_len = 1; 4504 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4505 %} 4506 ins_pipe( pipe_slow ); 4507 %} 4508 4509 instruct Repl16F_evex(vecZ dst, regF src) %{ 4510 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4511 match(Set dst (ReplicateF src)); 4512 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4513 ins_encode %{ 4514 int vector_len = 2; 4515 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4516 %} 4517 ins_pipe( pipe_slow ); 4518 %} 4519 4520 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4521 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4522 match(Set dst (ReplicateF (LoadF mem))); 4523 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4524 ins_encode %{ 4525 int vector_len = 2; 4526 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4527 %} 4528 ins_pipe( pipe_slow ); 4529 %} 4530 4531 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4532 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4533 match(Set dst (ReplicateF zero)); 4534 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4535 ins_encode %{ 4536 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4537 int vector_len = 2; 4538 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4539 %} 4540 ins_pipe( fpu_reg_reg ); 4541 %} 4542 4543 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4544 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4545 match(Set dst (ReplicateF zero)); 4546 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4547 ins_encode %{ 4548 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4549 int vector_len = 2; 4550 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4551 %} 4552 ins_pipe( fpu_reg_reg ); 4553 %} 4554 4555 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4556 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4557 match(Set dst (ReplicateF zero)); 4558 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4559 ins_encode %{ 4560 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4561 int vector_len = 2; 4562 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4563 %} 4564 ins_pipe( fpu_reg_reg ); 4565 %} 4566 4567 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4568 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4569 match(Set dst (ReplicateF zero)); 4570 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4571 ins_encode %{ 4572 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4573 int vector_len = 2; 4574 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4575 %} 4576 ins_pipe( fpu_reg_reg ); 4577 %} 4578 4579 instruct Repl4D_evex(vecY dst, regD src) %{ 4580 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4581 match(Set dst (ReplicateD src)); 4582 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4583 ins_encode %{ 4584 int vector_len = 1; 4585 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4586 %} 4587 ins_pipe( pipe_slow ); 4588 %} 4589 4590 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4591 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4592 match(Set dst (ReplicateD (LoadD mem))); 4593 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4594 ins_encode %{ 4595 int vector_len = 1; 4596 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4597 %} 4598 ins_pipe( pipe_slow ); 4599 %} 4600 4601 instruct Repl8D_evex(vecZ dst, regD src) %{ 4602 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4603 match(Set dst (ReplicateD src)); 4604 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4605 ins_encode %{ 4606 int vector_len = 2; 4607 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4608 %} 4609 ins_pipe( pipe_slow ); 4610 %} 4611 4612 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4613 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4614 match(Set dst (ReplicateD (LoadD mem))); 4615 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4616 ins_encode %{ 4617 int vector_len = 2; 4618 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4619 %} 4620 ins_pipe( pipe_slow ); 4621 %} 4622 4623 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4624 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4625 match(Set dst (ReplicateD zero)); 4626 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4627 ins_encode %{ 4628 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4629 int vector_len = 2; 4630 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4631 %} 4632 ins_pipe( fpu_reg_reg ); 4633 %} 4634 4635 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4636 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4637 match(Set dst (ReplicateD zero)); 4638 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4639 ins_encode %{ 4640 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4641 int vector_len = 2; 4642 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4643 %} 4644 ins_pipe( fpu_reg_reg ); 4645 %} 4646 4647 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4648 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4649 match(Set dst (ReplicateD zero)); 4650 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4651 ins_encode %{ 4652 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4653 int vector_len = 2; 4654 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4655 %} 4656 ins_pipe( fpu_reg_reg ); 4657 %} 4658 4659 // ====================REDUCTION ARITHMETIC======================================= 4660 4661 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4662 predicate(UseSSE > 2 && UseAVX == 0); 4663 match(Set dst (AddReductionVI src1 src2)); 4664 effect(TEMP tmp2, TEMP tmp); 4665 format %{ "movdqu $tmp2,$src2\n\t" 4666 "phaddd $tmp2,$tmp2\n\t" 4667 "movd $tmp,$src1\n\t" 4668 "paddd $tmp,$tmp2\n\t" 4669 "movd $dst,$tmp\t! add reduction2I" %} 4670 ins_encode %{ 4671 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4672 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4673 __ movdl($tmp$$XMMRegister, $src1$$Register); 4674 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4675 __ movdl($dst$$Register, $tmp$$XMMRegister); 4676 %} 4677 ins_pipe( pipe_slow ); 4678 %} 4679 4680 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4681 predicate(VM_Version::supports_avx256only()); 4682 match(Set dst (AddReductionVI src1 src2)); 4683 effect(TEMP tmp, TEMP tmp2); 4684 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4685 "movd $tmp2,$src1\n\t" 4686 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4687 "movd $dst,$tmp2\t! add reduction2I" %} 4688 ins_encode %{ 4689 int vector_len = 0; 4690 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4691 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4692 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4693 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4694 %} 4695 ins_pipe( pipe_slow ); 4696 %} 4697 4698 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4699 predicate(UseAVX > 2); 4700 match(Set dst (AddReductionVI src1 src2)); 4701 effect(TEMP tmp, TEMP tmp2); 4702 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4703 "vpaddd $tmp,$src2,$tmp2\n\t" 4704 "movd $tmp2,$src1\n\t" 4705 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4706 "movd $dst,$tmp2\t! add reduction2I" %} 4707 ins_encode %{ 4708 int vector_len = 0; 4709 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4710 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4711 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4712 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4713 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4714 %} 4715 ins_pipe( pipe_slow ); 4716 %} 4717 4718 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4719 predicate(UseSSE > 2 && UseAVX == 0); 4720 match(Set dst (AddReductionVI src1 src2)); 4721 effect(TEMP tmp2, TEMP tmp); 4722 format %{ "movdqu $tmp2,$src2\n\t" 4723 "phaddd $tmp2,$tmp2\n\t" 4724 "phaddd $tmp2,$tmp2\n\t" 4725 "movd $tmp,$src1\n\t" 4726 "paddd $tmp,$tmp2\n\t" 4727 "movd $dst,$tmp\t! add reduction4I" %} 4728 ins_encode %{ 4729 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4730 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4731 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4732 __ movdl($tmp$$XMMRegister, $src1$$Register); 4733 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4734 __ movdl($dst$$Register, $tmp$$XMMRegister); 4735 %} 4736 ins_pipe( pipe_slow ); 4737 %} 4738 4739 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4740 predicate(VM_Version::supports_avx256only()); 4741 match(Set dst (AddReductionVI src1 src2)); 4742 effect(TEMP tmp, TEMP tmp2); 4743 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4744 "vphaddd $tmp,$tmp,$tmp2\n\t" 4745 "movd $tmp2,$src1\n\t" 4746 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4747 "movd $dst,$tmp2\t! add reduction4I" %} 4748 ins_encode %{ 4749 int vector_len = 0; 4750 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4751 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4752 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4753 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4754 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4755 %} 4756 ins_pipe( pipe_slow ); 4757 %} 4758 4759 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4760 predicate(UseAVX > 2); 4761 match(Set dst (AddReductionVI src1 src2)); 4762 effect(TEMP tmp, TEMP tmp2); 4763 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4764 "vpaddd $tmp,$src2,$tmp2\n\t" 4765 "pshufd $tmp2,$tmp,0x1\n\t" 4766 "vpaddd $tmp,$tmp,$tmp2\n\t" 4767 "movd $tmp2,$src1\n\t" 4768 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4769 "movd $dst,$tmp2\t! add reduction4I" %} 4770 ins_encode %{ 4771 int vector_len = 0; 4772 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4773 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4774 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4775 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4776 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4777 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4778 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4779 %} 4780 ins_pipe( pipe_slow ); 4781 %} 4782 4783 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4784 predicate(VM_Version::supports_avx256only()); 4785 match(Set dst (AddReductionVI src1 src2)); 4786 effect(TEMP tmp, TEMP tmp2); 4787 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4788 "vphaddd $tmp,$tmp,$tmp2\n\t" 4789 "vextracti128 $tmp2,$tmp\n\t" 4790 "vpaddd $tmp,$tmp,$tmp2\n\t" 4791 "movd $tmp2,$src1\n\t" 4792 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4793 "movd $dst,$tmp2\t! add reduction8I" %} 4794 ins_encode %{ 4795 int vector_len = 1; 4796 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4797 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4798 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4799 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4800 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4801 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4802 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4803 %} 4804 ins_pipe( pipe_slow ); 4805 %} 4806 4807 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4808 predicate(UseAVX > 2); 4809 match(Set dst (AddReductionVI src1 src2)); 4810 effect(TEMP tmp, TEMP tmp2); 4811 format %{ "vextracti128 $tmp,$src2\n\t" 4812 "vpaddd $tmp,$tmp,$src2\n\t" 4813 "pshufd $tmp2,$tmp,0xE\n\t" 4814 "vpaddd $tmp,$tmp,$tmp2\n\t" 4815 "pshufd $tmp2,$tmp,0x1\n\t" 4816 "vpaddd $tmp,$tmp,$tmp2\n\t" 4817 "movd $tmp2,$src1\n\t" 4818 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4819 "movd $dst,$tmp2\t! add reduction8I" %} 4820 ins_encode %{ 4821 int vector_len = 0; 4822 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4823 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4824 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4825 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4826 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4827 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4828 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4829 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4830 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4831 %} 4832 ins_pipe( pipe_slow ); 4833 %} 4834 4835 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4836 predicate(UseAVX > 2); 4837 match(Set dst (AddReductionVI src1 src2)); 4838 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4839 format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" 4840 "vpaddd $tmp3,$tmp3,$src2\n\t" 4841 "vextracti128 $tmp,$tmp3\n\t" 4842 "vpaddd $tmp,$tmp,$tmp3\n\t" 4843 "pshufd $tmp2,$tmp,0xE\n\t" 4844 "vpaddd $tmp,$tmp,$tmp2\n\t" 4845 "pshufd $tmp2,$tmp,0x1\n\t" 4846 "vpaddd $tmp,$tmp,$tmp2\n\t" 4847 "movd $tmp2,$src1\n\t" 4848 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4849 "movd $dst,$tmp2\t! mul reduction16I" %} 4850 ins_encode %{ 4851 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4852 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4853 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4854 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4855 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4856 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4857 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4858 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4859 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4860 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4861 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4862 %} 4863 ins_pipe( pipe_slow ); 4864 %} 4865 4866 #ifdef _LP64 4867 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4868 predicate(UseAVX > 2); 4869 match(Set dst (AddReductionVL src1 src2)); 4870 effect(TEMP tmp, TEMP tmp2); 4871 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4872 "vpaddq $tmp,$src2,$tmp2\n\t" 4873 "movdq $tmp2,$src1\n\t" 4874 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4875 "movdq $dst,$tmp2\t! add reduction2L" %} 4876 ins_encode %{ 4877 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4878 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4879 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4880 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4881 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4882 %} 4883 ins_pipe( pipe_slow ); 4884 %} 4885 4886 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4887 predicate(UseAVX > 2); 4888 match(Set dst (AddReductionVL src1 src2)); 4889 effect(TEMP tmp, TEMP tmp2); 4890 format %{ "vextracti128 $tmp,$src2\n\t" 4891 "vpaddq $tmp2,$tmp,$src2\n\t" 4892 "pshufd $tmp,$tmp2,0xE\n\t" 4893 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4894 "movdq $tmp,$src1\n\t" 4895 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4896 "movdq $dst,$tmp2\t! add reduction4L" %} 4897 ins_encode %{ 4898 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4899 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4900 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4901 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4902 __ movdq($tmp$$XMMRegister, $src1$$Register); 4903 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4904 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4905 %} 4906 ins_pipe( pipe_slow ); 4907 %} 4908 4909 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4910 predicate(UseAVX > 2); 4911 match(Set dst (AddReductionVL src1 src2)); 4912 effect(TEMP tmp, TEMP tmp2); 4913 format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" 4914 "vpaddq $tmp2,$tmp2,$src2\n\t" 4915 "vextracti128 $tmp,$tmp2\n\t" 4916 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4917 "pshufd $tmp,$tmp2,0xE\n\t" 4918 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4919 "movdq $tmp,$src1\n\t" 4920 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4921 "movdq $dst,$tmp2\t! add reduction8L" %} 4922 ins_encode %{ 4923 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4924 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4925 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4926 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4927 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4928 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4929 __ movdq($tmp$$XMMRegister, $src1$$Register); 4930 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4931 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4932 %} 4933 ins_pipe( pipe_slow ); 4934 %} 4935 #endif 4936 4937 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4938 predicate(UseSSE >= 1 && UseAVX == 0); 4939 match(Set dst (AddReductionVF dst src2)); 4940 effect(TEMP dst, TEMP tmp); 4941 format %{ "addss $dst,$src2\n\t" 4942 "pshufd $tmp,$src2,0x01\n\t" 4943 "addss $dst,$tmp\t! add reduction2F" %} 4944 ins_encode %{ 4945 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4946 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4947 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4948 %} 4949 ins_pipe( pipe_slow ); 4950 %} 4951 4952 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4953 predicate(UseAVX > 0); 4954 match(Set dst (AddReductionVF dst src2)); 4955 effect(TEMP dst, TEMP tmp); 4956 format %{ "vaddss $dst,$dst,$src2\n\t" 4957 "pshufd $tmp,$src2,0x01\n\t" 4958 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4959 ins_encode %{ 4960 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4961 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4962 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4963 %} 4964 ins_pipe( pipe_slow ); 4965 %} 4966 4967 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4968 predicate(UseSSE >= 1 && UseAVX == 0); 4969 match(Set dst (AddReductionVF dst src2)); 4970 effect(TEMP dst, TEMP tmp); 4971 format %{ "addss $dst,$src2\n\t" 4972 "pshufd $tmp,$src2,0x01\n\t" 4973 "addss $dst,$tmp\n\t" 4974 "pshufd $tmp,$src2,0x02\n\t" 4975 "addss $dst,$tmp\n\t" 4976 "pshufd $tmp,$src2,0x03\n\t" 4977 "addss $dst,$tmp\t! add reduction4F" %} 4978 ins_encode %{ 4979 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4980 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4981 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4982 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4983 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4984 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4985 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4986 %} 4987 ins_pipe( pipe_slow ); 4988 %} 4989 4990 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4991 predicate(UseAVX > 0); 4992 match(Set dst (AddReductionVF dst src2)); 4993 effect(TEMP tmp, TEMP dst); 4994 format %{ "vaddss $dst,dst,$src2\n\t" 4995 "pshufd $tmp,$src2,0x01\n\t" 4996 "vaddss $dst,$dst,$tmp\n\t" 4997 "pshufd $tmp,$src2,0x02\n\t" 4998 "vaddss $dst,$dst,$tmp\n\t" 4999 "pshufd $tmp,$src2,0x03\n\t" 5000 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5001 ins_encode %{ 5002 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5003 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5004 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5005 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5006 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5007 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5008 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5009 %} 5010 ins_pipe( pipe_slow ); 5011 %} 5012 5013 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5014 predicate(UseAVX > 0); 5015 match(Set dst (AddReductionVF dst src2)); 5016 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5017 format %{ "vaddss $dst,$dst,$src2\n\t" 5018 "pshufd $tmp,$src2,0x01\n\t" 5019 "vaddss $dst,$dst,$tmp\n\t" 5020 "pshufd $tmp,$src2,0x02\n\t" 5021 "vaddss $dst,$dst,$tmp\n\t" 5022 "pshufd $tmp,$src2,0x03\n\t" 5023 "vaddss $dst,$dst,$tmp\n\t" 5024 "vextractf128 $tmp2,$src2\n\t" 5025 "vaddss $dst,$dst,$tmp2\n\t" 5026 "pshufd $tmp,$tmp2,0x01\n\t" 5027 "vaddss $dst,$dst,$tmp\n\t" 5028 "pshufd $tmp,$tmp2,0x02\n\t" 5029 "vaddss $dst,$dst,$tmp\n\t" 5030 "pshufd $tmp,$tmp2,0x03\n\t" 5031 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5032 ins_encode %{ 5033 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5034 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5035 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5036 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5037 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5038 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5039 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5040 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5041 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5042 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5043 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5044 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5045 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5046 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5047 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5048 %} 5049 ins_pipe( pipe_slow ); 5050 %} 5051 5052 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5053 predicate(UseAVX > 2); 5054 match(Set dst (AddReductionVF dst src2)); 5055 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5056 format %{ "vaddss $dst,$dst,$src2\n\t" 5057 "pshufd $tmp,$src2,0x01\n\t" 5058 "vaddss $dst,$dst,$tmp\n\t" 5059 "pshufd $tmp,$src2,0x02\n\t" 5060 "vaddss $dst,$dst,$tmp\n\t" 5061 "pshufd $tmp,$src2,0x03\n\t" 5062 "vaddss $dst,$dst,$tmp\n\t" 5063 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5064 "vaddss $dst,$dst,$tmp2\n\t" 5065 "pshufd $tmp,$tmp2,0x01\n\t" 5066 "vaddss $dst,$dst,$tmp\n\t" 5067 "pshufd $tmp,$tmp2,0x02\n\t" 5068 "vaddss $dst,$dst,$tmp\n\t" 5069 "pshufd $tmp,$tmp2,0x03\n\t" 5070 "vaddss $dst,$dst,$tmp\n\t" 5071 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5072 "vaddss $dst,$dst,$tmp2\n\t" 5073 "pshufd $tmp,$tmp2,0x01\n\t" 5074 "vaddss $dst,$dst,$tmp\n\t" 5075 "pshufd $tmp,$tmp2,0x02\n\t" 5076 "vaddss $dst,$dst,$tmp\n\t" 5077 "pshufd $tmp,$tmp2,0x03\n\t" 5078 "vaddss $dst,$dst,$tmp\n\t" 5079 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5080 "vaddss $dst,$dst,$tmp2\n\t" 5081 "pshufd $tmp,$tmp2,0x01\n\t" 5082 "vaddss $dst,$dst,$tmp\n\t" 5083 "pshufd $tmp,$tmp2,0x02\n\t" 5084 "vaddss $dst,$dst,$tmp\n\t" 5085 "pshufd $tmp,$tmp2,0x03\n\t" 5086 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5087 ins_encode %{ 5088 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5089 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5090 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5091 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5092 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5093 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5094 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5095 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5096 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5097 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5098 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5099 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5100 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5101 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5102 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5103 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5104 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5105 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5106 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5107 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5108 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5109 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5110 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5111 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5112 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5113 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5114 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5115 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5116 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5117 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5118 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5119 %} 5120 ins_pipe( pipe_slow ); 5121 %} 5122 5123 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5124 predicate(UseSSE >= 1 && UseAVX == 0); 5125 match(Set dst (AddReductionVD dst src2)); 5126 effect(TEMP tmp, TEMP dst); 5127 format %{ "addsd $dst,$src2\n\t" 5128 "pshufd $tmp,$src2,0xE\n\t" 5129 "addsd $dst,$tmp\t! add reduction2D" %} 5130 ins_encode %{ 5131 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5132 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5133 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5134 %} 5135 ins_pipe( pipe_slow ); 5136 %} 5137 5138 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5139 predicate(UseAVX > 0); 5140 match(Set dst (AddReductionVD dst src2)); 5141 effect(TEMP tmp, TEMP dst); 5142 format %{ "vaddsd $dst,$dst,$src2\n\t" 5143 "pshufd $tmp,$src2,0xE\n\t" 5144 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5145 ins_encode %{ 5146 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5147 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5148 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5149 %} 5150 ins_pipe( pipe_slow ); 5151 %} 5152 5153 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5154 predicate(UseAVX > 0); 5155 match(Set dst (AddReductionVD dst src2)); 5156 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5157 format %{ "vaddsd $dst,$dst,$src2\n\t" 5158 "pshufd $tmp,$src2,0xE\n\t" 5159 "vaddsd $dst,$dst,$tmp\n\t" 5160 "vextractf32x4h $tmp2,$src2, 0x1\n\t" 5161 "vaddsd $dst,$dst,$tmp2\n\t" 5162 "pshufd $tmp,$tmp2,0xE\n\t" 5163 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5164 ins_encode %{ 5165 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5166 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5167 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5168 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5169 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5170 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5171 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5172 %} 5173 ins_pipe( pipe_slow ); 5174 %} 5175 5176 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5177 predicate(UseAVX > 2); 5178 match(Set dst (AddReductionVD dst src2)); 5179 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5180 format %{ "vaddsd $dst,$dst,$src2\n\t" 5181 "pshufd $tmp,$src2,0xE\n\t" 5182 "vaddsd $dst,$dst,$tmp\n\t" 5183 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5184 "vaddsd $dst,$dst,$tmp2\n\t" 5185 "pshufd $tmp,$tmp2,0xE\n\t" 5186 "vaddsd $dst,$dst,$tmp\n\t" 5187 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5188 "vaddsd $dst,$dst,$tmp2\n\t" 5189 "pshufd $tmp,$tmp2,0xE\n\t" 5190 "vaddsd $dst,$dst,$tmp\n\t" 5191 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5192 "vaddsd $dst,$dst,$tmp2\n\t" 5193 "pshufd $tmp,$tmp2,0xE\n\t" 5194 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5195 ins_encode %{ 5196 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5197 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5198 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5199 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5200 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5201 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5202 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5203 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5204 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5205 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5206 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5207 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5208 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5209 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5210 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5211 %} 5212 ins_pipe( pipe_slow ); 5213 %} 5214 5215 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5216 predicate(UseSSE > 3 && UseAVX == 0); 5217 match(Set dst (MulReductionVI src1 src2)); 5218 effect(TEMP tmp, TEMP tmp2); 5219 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5220 "pmulld $tmp2,$src2\n\t" 5221 "movd $tmp,$src1\n\t" 5222 "pmulld $tmp2,$tmp\n\t" 5223 "movd $dst,$tmp2\t! mul reduction2I" %} 5224 ins_encode %{ 5225 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5226 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5227 __ movdl($tmp$$XMMRegister, $src1$$Register); 5228 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5229 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5230 %} 5231 ins_pipe( pipe_slow ); 5232 %} 5233 5234 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5235 predicate(UseAVX > 0); 5236 match(Set dst (MulReductionVI src1 src2)); 5237 effect(TEMP tmp, TEMP tmp2); 5238 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5239 "vpmulld $tmp,$src2,$tmp2\n\t" 5240 "movd $tmp2,$src1\n\t" 5241 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5242 "movd $dst,$tmp2\t! mul reduction2I" %} 5243 ins_encode %{ 5244 int vector_len = 0; 5245 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5246 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5247 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5248 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5249 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5250 %} 5251 ins_pipe( pipe_slow ); 5252 %} 5253 5254 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5255 predicate(UseSSE > 3 && UseAVX == 0); 5256 match(Set dst (MulReductionVI src1 src2)); 5257 effect(TEMP tmp, TEMP tmp2); 5258 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5259 "pmulld $tmp2,$src2\n\t" 5260 "pshufd $tmp,$tmp2,0x1\n\t" 5261 "pmulld $tmp2,$tmp\n\t" 5262 "movd $tmp,$src1\n\t" 5263 "pmulld $tmp2,$tmp\n\t" 5264 "movd $dst,$tmp2\t! mul reduction4I" %} 5265 ins_encode %{ 5266 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5267 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5268 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5269 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5270 __ movdl($tmp$$XMMRegister, $src1$$Register); 5271 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5272 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5273 %} 5274 ins_pipe( pipe_slow ); 5275 %} 5276 5277 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5278 predicate(UseAVX > 0); 5279 match(Set dst (MulReductionVI src1 src2)); 5280 effect(TEMP tmp, TEMP tmp2); 5281 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5282 "vpmulld $tmp,$src2,$tmp2\n\t" 5283 "pshufd $tmp2,$tmp,0x1\n\t" 5284 "vpmulld $tmp,$tmp,$tmp2\n\t" 5285 "movd $tmp2,$src1\n\t" 5286 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5287 "movd $dst,$tmp2\t! mul reduction4I" %} 5288 ins_encode %{ 5289 int vector_len = 0; 5290 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5291 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5292 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5293 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5294 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5295 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5296 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5297 %} 5298 ins_pipe( pipe_slow ); 5299 %} 5300 5301 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5302 predicate(UseAVX > 0); 5303 match(Set dst (MulReductionVI src1 src2)); 5304 effect(TEMP tmp, TEMP tmp2); 5305 format %{ "vextracti128 $tmp,$src2\n\t" 5306 "vpmulld $tmp,$tmp,$src2\n\t" 5307 "pshufd $tmp2,$tmp,0xE\n\t" 5308 "vpmulld $tmp,$tmp,$tmp2\n\t" 5309 "pshufd $tmp2,$tmp,0x1\n\t" 5310 "vpmulld $tmp,$tmp,$tmp2\n\t" 5311 "movd $tmp2,$src1\n\t" 5312 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5313 "movd $dst,$tmp2\t! mul reduction8I" %} 5314 ins_encode %{ 5315 int vector_len = 0; 5316 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5317 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5318 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5319 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5320 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5321 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5322 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5323 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5324 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5325 %} 5326 ins_pipe( pipe_slow ); 5327 %} 5328 5329 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5330 predicate(UseAVX > 2); 5331 match(Set dst (MulReductionVI src1 src2)); 5332 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5333 format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" 5334 "vpmulld $tmp3,$tmp3,$src2\n\t" 5335 "vextracti128 $tmp,$tmp3\n\t" 5336 "vpmulld $tmp,$tmp,$src2\n\t" 5337 "pshufd $tmp2,$tmp,0xE\n\t" 5338 "vpmulld $tmp,$tmp,$tmp2\n\t" 5339 "pshufd $tmp2,$tmp,0x1\n\t" 5340 "vpmulld $tmp,$tmp,$tmp2\n\t" 5341 "movd $tmp2,$src1\n\t" 5342 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5343 "movd $dst,$tmp2\t! mul reduction16I" %} 5344 ins_encode %{ 5345 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5346 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5347 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5348 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5349 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5350 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5351 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5352 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5353 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5354 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5355 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5356 %} 5357 ins_pipe( pipe_slow ); 5358 %} 5359 5360 #ifdef _LP64 5361 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5362 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5363 match(Set dst (MulReductionVL src1 src2)); 5364 effect(TEMP tmp, TEMP tmp2); 5365 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5366 "vpmullq $tmp,$src2,$tmp2\n\t" 5367 "movdq $tmp2,$src1\n\t" 5368 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5369 "movdq $dst,$tmp2\t! mul reduction2L" %} 5370 ins_encode %{ 5371 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5372 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5373 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5374 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5375 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5376 %} 5377 ins_pipe( pipe_slow ); 5378 %} 5379 5380 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5381 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5382 match(Set dst (MulReductionVL src1 src2)); 5383 effect(TEMP tmp, TEMP tmp2); 5384 format %{ "vextracti128 $tmp,$src2\n\t" 5385 "vpmullq $tmp2,$tmp,$src2\n\t" 5386 "pshufd $tmp,$tmp2,0xE\n\t" 5387 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5388 "movdq $tmp,$src1\n\t" 5389 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5390 "movdq $dst,$tmp2\t! mul reduction4L" %} 5391 ins_encode %{ 5392 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5393 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5394 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5395 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5396 __ movdq($tmp$$XMMRegister, $src1$$Register); 5397 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5398 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5399 %} 5400 ins_pipe( pipe_slow ); 5401 %} 5402 5403 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5404 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5405 match(Set dst (MulReductionVL src1 src2)); 5406 effect(TEMP tmp, TEMP tmp2); 5407 format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" 5408 "vpmullq $tmp2,$tmp2,$src2\n\t" 5409 "vextracti128 $tmp,$tmp2\n\t" 5410 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5411 "pshufd $tmp,$tmp2,0xE\n\t" 5412 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5413 "movdq $tmp,$src1\n\t" 5414 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5415 "movdq $dst,$tmp2\t! mul reduction8L" %} 5416 ins_encode %{ 5417 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5418 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5419 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5420 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5421 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5422 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5423 __ movdq($tmp$$XMMRegister, $src1$$Register); 5424 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5425 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5426 %} 5427 ins_pipe( pipe_slow ); 5428 %} 5429 #endif 5430 5431 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5432 predicate(UseSSE >= 1 && UseAVX == 0); 5433 match(Set dst (MulReductionVF dst src2)); 5434 effect(TEMP dst, TEMP tmp); 5435 format %{ "mulss $dst,$src2\n\t" 5436 "pshufd $tmp,$src2,0x01\n\t" 5437 "mulss $dst,$tmp\t! mul reduction2F" %} 5438 ins_encode %{ 5439 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5440 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5441 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5442 %} 5443 ins_pipe( pipe_slow ); 5444 %} 5445 5446 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5447 predicate(UseAVX > 0); 5448 match(Set dst (MulReductionVF dst src2)); 5449 effect(TEMP tmp, TEMP dst); 5450 format %{ "vmulss $dst,$dst,$src2\n\t" 5451 "pshufd $tmp,$src2,0x01\n\t" 5452 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5453 ins_encode %{ 5454 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5455 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5456 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5457 %} 5458 ins_pipe( pipe_slow ); 5459 %} 5460 5461 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5462 predicate(UseSSE >= 1 && UseAVX == 0); 5463 match(Set dst (MulReductionVF dst src2)); 5464 effect(TEMP dst, TEMP tmp); 5465 format %{ "mulss $dst,$src2\n\t" 5466 "pshufd $tmp,$src2,0x01\n\t" 5467 "mulss $dst,$tmp\n\t" 5468 "pshufd $tmp,$src2,0x02\n\t" 5469 "mulss $dst,$tmp\n\t" 5470 "pshufd $tmp,$src2,0x03\n\t" 5471 "mulss $dst,$tmp\t! mul reduction4F" %} 5472 ins_encode %{ 5473 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5474 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5475 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5476 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5477 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5478 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5479 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5480 %} 5481 ins_pipe( pipe_slow ); 5482 %} 5483 5484 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5485 predicate(UseAVX > 0); 5486 match(Set dst (MulReductionVF dst src2)); 5487 effect(TEMP tmp, TEMP dst); 5488 format %{ "vmulss $dst,$dst,$src2\n\t" 5489 "pshufd $tmp,$src2,0x01\n\t" 5490 "vmulss $dst,$dst,$tmp\n\t" 5491 "pshufd $tmp,$src2,0x02\n\t" 5492 "vmulss $dst,$dst,$tmp\n\t" 5493 "pshufd $tmp,$src2,0x03\n\t" 5494 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5495 ins_encode %{ 5496 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5497 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5498 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5499 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5500 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5501 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5502 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5503 %} 5504 ins_pipe( pipe_slow ); 5505 %} 5506 5507 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5508 predicate(UseAVX > 0); 5509 match(Set dst (MulReductionVF dst src2)); 5510 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5511 format %{ "vmulss $dst,$dst,$src2\n\t" 5512 "pshufd $tmp,$src2,0x01\n\t" 5513 "vmulss $dst,$dst,$tmp\n\t" 5514 "pshufd $tmp,$src2,0x02\n\t" 5515 "vmulss $dst,$dst,$tmp\n\t" 5516 "pshufd $tmp,$src2,0x03\n\t" 5517 "vmulss $dst,$dst,$tmp\n\t" 5518 "vextractf128 $tmp2,$src2\n\t" 5519 "vmulss $dst,$dst,$tmp2\n\t" 5520 "pshufd $tmp,$tmp2,0x01\n\t" 5521 "vmulss $dst,$dst,$tmp\n\t" 5522 "pshufd $tmp,$tmp2,0x02\n\t" 5523 "vmulss $dst,$dst,$tmp\n\t" 5524 "pshufd $tmp,$tmp2,0x03\n\t" 5525 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5526 ins_encode %{ 5527 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5528 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5529 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5530 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5531 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5532 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5533 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5534 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5535 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5536 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5537 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5538 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5539 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5540 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5541 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5542 %} 5543 ins_pipe( pipe_slow ); 5544 %} 5545 5546 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5547 predicate(UseAVX > 2); 5548 match(Set dst (MulReductionVF dst src2)); 5549 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5550 format %{ "vmulss $dst,$dst,$src2\n\t" 5551 "pshufd $tmp,$src2,0x01\n\t" 5552 "vmulss $dst,$dst,$tmp\n\t" 5553 "pshufd $tmp,$src2,0x02\n\t" 5554 "vmulss $dst,$dst,$tmp\n\t" 5555 "pshufd $tmp,$src2,0x03\n\t" 5556 "vmulss $dst,$dst,$tmp\n\t" 5557 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5558 "vmulss $dst,$dst,$tmp2\n\t" 5559 "pshufd $tmp,$tmp2,0x01\n\t" 5560 "vmulss $dst,$dst,$tmp\n\t" 5561 "pshufd $tmp,$tmp2,0x02\n\t" 5562 "vmulss $dst,$dst,$tmp\n\t" 5563 "pshufd $tmp,$tmp2,0x03\n\t" 5564 "vmulss $dst,$dst,$tmp\n\t" 5565 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5566 "vmulss $dst,$dst,$tmp2\n\t" 5567 "pshufd $tmp,$tmp2,0x01\n\t" 5568 "vmulss $dst,$dst,$tmp\n\t" 5569 "pshufd $tmp,$tmp2,0x02\n\t" 5570 "vmulss $dst,$dst,$tmp\n\t" 5571 "pshufd $tmp,$tmp2,0x03\n\t" 5572 "vmulss $dst,$dst,$tmp\n\t" 5573 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5574 "vmulss $dst,$dst,$tmp2\n\t" 5575 "pshufd $tmp,$tmp2,0x01\n\t" 5576 "vmulss $dst,$dst,$tmp\n\t" 5577 "pshufd $tmp,$tmp2,0x02\n\t" 5578 "vmulss $dst,$dst,$tmp\n\t" 5579 "pshufd $tmp,$tmp2,0x03\n\t" 5580 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5581 ins_encode %{ 5582 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5583 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5584 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5585 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5586 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5587 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5588 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5589 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5590 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5591 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5592 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5593 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5594 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5595 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5596 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5597 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5598 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5599 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5600 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5601 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5602 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5603 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5604 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5605 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5606 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5607 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5608 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5609 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5610 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5611 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5612 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5613 %} 5614 ins_pipe( pipe_slow ); 5615 %} 5616 5617 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5618 predicate(UseSSE >= 1 && UseAVX == 0); 5619 match(Set dst (MulReductionVD dst src2)); 5620 effect(TEMP dst, TEMP tmp); 5621 format %{ "mulsd $dst,$src2\n\t" 5622 "pshufd $tmp,$src2,0xE\n\t" 5623 "mulsd $dst,$tmp\t! mul reduction2D" %} 5624 ins_encode %{ 5625 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5626 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5627 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5628 %} 5629 ins_pipe( pipe_slow ); 5630 %} 5631 5632 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5633 predicate(UseAVX > 0); 5634 match(Set dst (MulReductionVD dst src2)); 5635 effect(TEMP tmp, TEMP dst); 5636 format %{ "vmulsd $dst,$dst,$src2\n\t" 5637 "pshufd $tmp,$src2,0xE\n\t" 5638 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5639 ins_encode %{ 5640 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5641 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5642 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5643 %} 5644 ins_pipe( pipe_slow ); 5645 %} 5646 5647 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5648 predicate(UseAVX > 0); 5649 match(Set dst (MulReductionVD dst src2)); 5650 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5651 format %{ "vmulsd $dst,$dst,$src2\n\t" 5652 "pshufd $tmp,$src2,0xE\n\t" 5653 "vmulsd $dst,$dst,$tmp\n\t" 5654 "vextractf128 $tmp2,$src2\n\t" 5655 "vmulsd $dst,$dst,$tmp2\n\t" 5656 "pshufd $tmp,$tmp2,0xE\n\t" 5657 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5658 ins_encode %{ 5659 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5660 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5661 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5662 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5663 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5664 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5665 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5666 %} 5667 ins_pipe( pipe_slow ); 5668 %} 5669 5670 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5671 predicate(UseAVX > 2); 5672 match(Set dst (MulReductionVD dst src2)); 5673 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5674 format %{ "vmulsd $dst,$dst,$src2\n\t" 5675 "pshufd $tmp,$src2,0xE\n\t" 5676 "vmulsd $dst,$dst,$tmp\n\t" 5677 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5678 "vmulsd $dst,$dst,$tmp2\n\t" 5679 "pshufd $tmp,$src2,0xE\n\t" 5680 "vmulsd $dst,$dst,$tmp\n\t" 5681 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5682 "vmulsd $dst,$dst,$tmp2\n\t" 5683 "pshufd $tmp,$tmp2,0xE\n\t" 5684 "vmulsd $dst,$dst,$tmp\n\t" 5685 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5686 "vmulsd $dst,$dst,$tmp2\n\t" 5687 "pshufd $tmp,$tmp2,0xE\n\t" 5688 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5689 ins_encode %{ 5690 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5691 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5692 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5693 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5694 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5695 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5696 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5697 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5698 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5699 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5700 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5701 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5702 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5703 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5704 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5705 %} 5706 ins_pipe( pipe_slow ); 5707 %} 5708 5709 // ====================VECTOR ARITHMETIC======================================= 5710 5711 // --------------------------------- ADD -------------------------------------- 5712 5713 // Bytes vector add 5714 instruct vadd4B(vecS dst, vecS src) %{ 5715 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5716 match(Set dst (AddVB dst src)); 5717 format %{ "paddb $dst,$src\t! add packed4B" %} 5718 ins_encode %{ 5719 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5720 %} 5721 ins_pipe( pipe_slow ); 5722 %} 5723 5724 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5725 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 5726 match(Set dst (AddVB src1 src2)); 5727 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5728 ins_encode %{ 5729 int vector_len = 0; 5730 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5731 %} 5732 ins_pipe( pipe_slow ); 5733 %} 5734 5735 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5736 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5737 match(Set dst (AddVB src1 src2)); 5738 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5739 ins_encode %{ 5740 int vector_len = 0; 5741 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5742 %} 5743 ins_pipe( pipe_slow ); 5744 %} 5745 5746 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5747 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5748 match(Set dst (AddVB dst src2)); 5749 effect(TEMP src1); 5750 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5751 ins_encode %{ 5752 int vector_len = 0; 5753 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5754 %} 5755 ins_pipe( pipe_slow ); 5756 %} 5757 5758 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5759 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 5760 match(Set dst (AddVB src (LoadVector mem))); 5761 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5762 ins_encode %{ 5763 int vector_len = 0; 5764 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5765 %} 5766 ins_pipe( pipe_slow ); 5767 %} 5768 5769 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5770 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5771 match(Set dst (AddVB src (LoadVector mem))); 5772 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5773 ins_encode %{ 5774 int vector_len = 0; 5775 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5776 %} 5777 ins_pipe( pipe_slow ); 5778 %} 5779 5780 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5781 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5782 match(Set dst (AddVB dst (LoadVector mem))); 5783 effect(TEMP src); 5784 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5785 ins_encode %{ 5786 int vector_len = 0; 5787 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vadd8B(vecD dst, vecD src) %{ 5793 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5794 match(Set dst (AddVB dst src)); 5795 format %{ "paddb $dst,$src\t! add packed8B" %} 5796 ins_encode %{ 5797 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5798 %} 5799 ins_pipe( pipe_slow ); 5800 %} 5801 5802 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecS src2) %{ 5803 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 5804 match(Set dst (AddVB src1 src2)); 5805 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5806 ins_encode %{ 5807 int vector_len = 0; 5808 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5809 %} 5810 ins_pipe( pipe_slow ); 5811 %} 5812 5813 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5814 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5815 match(Set dst (AddVB src1 src2)); 5816 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5817 ins_encode %{ 5818 int vector_len = 0; 5819 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5825 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5826 match(Set dst (AddVB dst src2)); 5827 effect(TEMP src1); 5828 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5829 ins_encode %{ 5830 int vector_len = 0; 5831 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5837 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 5838 match(Set dst (AddVB src (LoadVector mem))); 5839 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5840 ins_encode %{ 5841 int vector_len = 0; 5842 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5843 %} 5844 ins_pipe( pipe_slow ); 5845 %} 5846 5847 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5848 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5849 match(Set dst (AddVB src (LoadVector mem))); 5850 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5851 ins_encode %{ 5852 int vector_len = 0; 5853 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5859 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5860 match(Set dst (AddVB dst (LoadVector mem))); 5861 effect(TEMP src); 5862 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5863 ins_encode %{ 5864 int vector_len = 0; 5865 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5866 %} 5867 ins_pipe( pipe_slow ); 5868 %} 5869 5870 instruct vadd16B(vecX dst, vecX src) %{ 5871 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5872 match(Set dst (AddVB dst src)); 5873 format %{ "paddb $dst,$src\t! add packed16B" %} 5874 ins_encode %{ 5875 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5876 %} 5877 ins_pipe( pipe_slow ); 5878 %} 5879 5880 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5881 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 5882 match(Set dst (AddVB src1 src2)); 5883 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5884 ins_encode %{ 5885 int vector_len = 0; 5886 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5887 %} 5888 ins_pipe( pipe_slow ); 5889 %} 5890 5891 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5892 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5893 match(Set dst (AddVB src1 src2)); 5894 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5895 ins_encode %{ 5896 int vector_len = 0; 5897 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5903 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5904 match(Set dst (AddVB dst src2)); 5905 effect(TEMP src1); 5906 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5907 ins_encode %{ 5908 int vector_len = 0; 5909 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5910 %} 5911 ins_pipe( pipe_slow ); 5912 %} 5913 5914 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5915 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 5916 match(Set dst (AddVB src (LoadVector mem))); 5917 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5918 ins_encode %{ 5919 int vector_len = 0; 5920 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5926 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5927 match(Set dst (AddVB src (LoadVector mem))); 5928 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5929 ins_encode %{ 5930 int vector_len = 0; 5931 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5932 %} 5933 ins_pipe( pipe_slow ); 5934 %} 5935 5936 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5937 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5938 match(Set dst (AddVB dst (LoadVector mem))); 5939 effect(TEMP src); 5940 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5941 ins_encode %{ 5942 int vector_len = 0; 5943 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5944 %} 5945 ins_pipe( pipe_slow ); 5946 %} 5947 5948 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5949 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5950 match(Set dst (AddVB src1 src2)); 5951 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5952 ins_encode %{ 5953 int vector_len = 1; 5954 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5955 %} 5956 ins_pipe( pipe_slow ); 5957 %} 5958 5959 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5960 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5961 match(Set dst (AddVB src1 src2)); 5962 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5963 ins_encode %{ 5964 int vector_len = 1; 5965 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5966 %} 5967 ins_pipe( pipe_slow ); 5968 %} 5969 5970 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5971 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5972 match(Set dst (AddVB dst src2)); 5973 effect(TEMP src1); 5974 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5975 ins_encode %{ 5976 int vector_len = 1; 5977 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5978 %} 5979 ins_pipe( pipe_slow ); 5980 %} 5981 5982 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5983 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5984 match(Set dst (AddVB src (LoadVector mem))); 5985 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5986 ins_encode %{ 5987 int vector_len = 1; 5988 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5989 %} 5990 ins_pipe( pipe_slow ); 5991 %} 5992 5993 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5994 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5995 match(Set dst (AddVB src (LoadVector mem))); 5996 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5997 ins_encode %{ 5998 int vector_len = 1; 5999 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6000 %} 6001 ins_pipe( pipe_slow ); 6002 %} 6003 6004 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6005 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6006 match(Set dst (AddVB dst (LoadVector mem))); 6007 effect(TEMP src); 6008 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6009 ins_encode %{ 6010 int vector_len = 1; 6011 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6012 %} 6013 ins_pipe( pipe_slow ); 6014 %} 6015 6016 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6017 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6018 match(Set dst (AddVB src1 src2)); 6019 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6020 ins_encode %{ 6021 int vector_len = 2; 6022 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6023 %} 6024 ins_pipe( pipe_slow ); 6025 %} 6026 6027 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6028 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6029 match(Set dst (AddVB src (LoadVector mem))); 6030 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6031 ins_encode %{ 6032 int vector_len = 2; 6033 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6034 %} 6035 ins_pipe( pipe_slow ); 6036 %} 6037 6038 // Shorts/Chars vector add 6039 instruct vadd2S(vecS dst, vecS src) %{ 6040 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6041 match(Set dst (AddVS dst src)); 6042 format %{ "paddw $dst,$src\t! add packed2S" %} 6043 ins_encode %{ 6044 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6045 %} 6046 ins_pipe( pipe_slow ); 6047 %} 6048 6049 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6050 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 6051 match(Set dst (AddVS src1 src2)); 6052 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6053 ins_encode %{ 6054 int vector_len = 0; 6055 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6056 %} 6057 ins_pipe( pipe_slow ); 6058 %} 6059 6060 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6061 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6062 match(Set dst (AddVS src1 src2)); 6063 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6064 ins_encode %{ 6065 int vector_len = 0; 6066 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6072 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6073 match(Set dst (AddVS dst src2)); 6074 effect(TEMP src1); 6075 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6076 ins_encode %{ 6077 int vector_len = 0; 6078 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6079 %} 6080 ins_pipe( pipe_slow ); 6081 %} 6082 6083 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6084 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 6085 match(Set dst (AddVS src (LoadVector mem))); 6086 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6087 ins_encode %{ 6088 int vector_len = 0; 6089 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6090 %} 6091 ins_pipe( pipe_slow ); 6092 %} 6093 6094 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6095 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6096 match(Set dst (AddVS src (LoadVector mem))); 6097 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6098 ins_encode %{ 6099 int vector_len = 0; 6100 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6101 %} 6102 ins_pipe( pipe_slow ); 6103 %} 6104 6105 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6106 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6107 match(Set dst (AddVS dst (LoadVector mem))); 6108 effect(TEMP src); 6109 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6110 ins_encode %{ 6111 int vector_len = 0; 6112 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6113 %} 6114 ins_pipe( pipe_slow ); 6115 %} 6116 6117 instruct vadd4S(vecD dst, vecD src) %{ 6118 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6119 match(Set dst (AddVS dst src)); 6120 format %{ "paddw $dst,$src\t! add packed4S" %} 6121 ins_encode %{ 6122 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6123 %} 6124 ins_pipe( pipe_slow ); 6125 %} 6126 6127 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecS src2) %{ 6128 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 6129 match(Set dst (AddVS src1 src2)); 6130 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6131 ins_encode %{ 6132 int vector_len = 0; 6133 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6134 %} 6135 ins_pipe( pipe_slow ); 6136 %} 6137 6138 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6139 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6140 match(Set dst (AddVS src1 src2)); 6141 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6142 ins_encode %{ 6143 int vector_len = 0; 6144 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6145 %} 6146 ins_pipe( pipe_slow ); 6147 %} 6148 6149 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6150 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6151 match(Set dst (AddVS dst src2)); 6152 effect(TEMP src1); 6153 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6154 ins_encode %{ 6155 int vector_len = 0; 6156 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6157 %} 6158 ins_pipe( pipe_slow ); 6159 %} 6160 6161 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6162 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 6163 match(Set dst (AddVS src (LoadVector mem))); 6164 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6165 ins_encode %{ 6166 int vector_len = 0; 6167 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6168 %} 6169 ins_pipe( pipe_slow ); 6170 %} 6171 6172 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6173 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6174 match(Set dst (AddVS src (LoadVector mem))); 6175 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6176 ins_encode %{ 6177 int vector_len = 0; 6178 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6179 %} 6180 ins_pipe( pipe_slow ); 6181 %} 6182 6183 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6184 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6185 match(Set dst (AddVS dst (LoadVector mem))); 6186 effect(TEMP src); 6187 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6188 ins_encode %{ 6189 int vector_len = 0; 6190 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6191 %} 6192 ins_pipe( pipe_slow ); 6193 %} 6194 6195 instruct vadd8S(vecX dst, vecX src) %{ 6196 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6197 match(Set dst (AddVS dst src)); 6198 format %{ "paddw $dst,$src\t! add packed8S" %} 6199 ins_encode %{ 6200 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6201 %} 6202 ins_pipe( pipe_slow ); 6203 %} 6204 6205 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6206 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 6207 match(Set dst (AddVS src1 src2)); 6208 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6209 ins_encode %{ 6210 int vector_len = 0; 6211 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6212 %} 6213 ins_pipe( pipe_slow ); 6214 %} 6215 6216 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6217 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6218 match(Set dst (AddVS src1 src2)); 6219 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6220 ins_encode %{ 6221 int vector_len = 0; 6222 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6223 %} 6224 ins_pipe( pipe_slow ); 6225 %} 6226 6227 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6228 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6229 match(Set dst (AddVS dst src2)); 6230 effect(TEMP src1); 6231 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6232 ins_encode %{ 6233 int vector_len = 0; 6234 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6235 %} 6236 ins_pipe( pipe_slow ); 6237 %} 6238 6239 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6240 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 6241 match(Set dst (AddVS src (LoadVector mem))); 6242 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6243 ins_encode %{ 6244 int vector_len = 0; 6245 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6246 %} 6247 ins_pipe( pipe_slow ); 6248 %} 6249 6250 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6251 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6252 match(Set dst (AddVS src (LoadVector mem))); 6253 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6254 ins_encode %{ 6255 int vector_len = 0; 6256 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6257 %} 6258 ins_pipe( pipe_slow ); 6259 %} 6260 6261 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6262 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6263 match(Set dst (AddVS dst (LoadVector mem))); 6264 effect(TEMP src); 6265 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6266 ins_encode %{ 6267 int vector_len = 0; 6268 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6274 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6275 match(Set dst (AddVS src1 src2)); 6276 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6277 ins_encode %{ 6278 int vector_len = 1; 6279 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6280 %} 6281 ins_pipe( pipe_slow ); 6282 %} 6283 6284 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6285 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6286 match(Set dst (AddVS src1 src2)); 6287 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6288 ins_encode %{ 6289 int vector_len = 1; 6290 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6296 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6297 match(Set dst (AddVS dst src2)); 6298 effect(TEMP src1); 6299 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6300 ins_encode %{ 6301 int vector_len = 1; 6302 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6303 %} 6304 ins_pipe( pipe_slow ); 6305 %} 6306 6307 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6308 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6309 match(Set dst (AddVS src (LoadVector mem))); 6310 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6311 ins_encode %{ 6312 int vector_len = 1; 6313 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6314 %} 6315 ins_pipe( pipe_slow ); 6316 %} 6317 6318 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6319 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6320 match(Set dst (AddVS src (LoadVector mem))); 6321 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6322 ins_encode %{ 6323 int vector_len = 1; 6324 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6325 %} 6326 ins_pipe( pipe_slow ); 6327 %} 6328 6329 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6330 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6331 match(Set dst (AddVS dst (LoadVector mem))); 6332 effect(TEMP src); 6333 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6334 ins_encode %{ 6335 int vector_len = 1; 6336 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6337 %} 6338 ins_pipe( pipe_slow ); 6339 %} 6340 6341 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6342 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6343 match(Set dst (AddVS src1 src2)); 6344 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6345 ins_encode %{ 6346 int vector_len = 2; 6347 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6348 %} 6349 ins_pipe( pipe_slow ); 6350 %} 6351 6352 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6353 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6354 match(Set dst (AddVS src (LoadVector mem))); 6355 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6356 ins_encode %{ 6357 int vector_len = 2; 6358 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6359 %} 6360 ins_pipe( pipe_slow ); 6361 %} 6362 6363 // Integers vector add 6364 instruct vadd2I(vecD dst, vecD src) %{ 6365 predicate(n->as_Vector()->length() == 2); 6366 match(Set dst (AddVI dst src)); 6367 format %{ "paddd $dst,$src\t! add packed2I" %} 6368 ins_encode %{ 6369 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6375 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6376 match(Set dst (AddVI src1 src2)); 6377 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6378 ins_encode %{ 6379 int vector_len = 0; 6380 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6381 %} 6382 ins_pipe( pipe_slow ); 6383 %} 6384 6385 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6386 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6387 match(Set dst (AddVI src (LoadVector mem))); 6388 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6389 ins_encode %{ 6390 int vector_len = 0; 6391 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6392 %} 6393 ins_pipe( pipe_slow ); 6394 %} 6395 6396 instruct vadd4I(vecX dst, vecX src) %{ 6397 predicate(n->as_Vector()->length() == 4); 6398 match(Set dst (AddVI dst src)); 6399 format %{ "paddd $dst,$src\t! add packed4I" %} 6400 ins_encode %{ 6401 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6402 %} 6403 ins_pipe( pipe_slow ); 6404 %} 6405 6406 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6407 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6408 match(Set dst (AddVI src1 src2)); 6409 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6410 ins_encode %{ 6411 int vector_len = 0; 6412 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6413 %} 6414 ins_pipe( pipe_slow ); 6415 %} 6416 6417 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6418 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6419 match(Set dst (AddVI src (LoadVector mem))); 6420 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6421 ins_encode %{ 6422 int vector_len = 0; 6423 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6424 %} 6425 ins_pipe( pipe_slow ); 6426 %} 6427 6428 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6429 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6430 match(Set dst (AddVI src1 src2)); 6431 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6432 ins_encode %{ 6433 int vector_len = 1; 6434 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6435 %} 6436 ins_pipe( pipe_slow ); 6437 %} 6438 6439 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6440 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6441 match(Set dst (AddVI src (LoadVector mem))); 6442 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6443 ins_encode %{ 6444 int vector_len = 1; 6445 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6446 %} 6447 ins_pipe( pipe_slow ); 6448 %} 6449 6450 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6451 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6452 match(Set dst (AddVI src1 src2)); 6453 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6454 ins_encode %{ 6455 int vector_len = 2; 6456 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6457 %} 6458 ins_pipe( pipe_slow ); 6459 %} 6460 6461 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6462 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6463 match(Set dst (AddVI src (LoadVector mem))); 6464 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6465 ins_encode %{ 6466 int vector_len = 2; 6467 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6468 %} 6469 ins_pipe( pipe_slow ); 6470 %} 6471 6472 // Longs vector add 6473 instruct vadd2L(vecX dst, vecX src) %{ 6474 predicate(n->as_Vector()->length() == 2); 6475 match(Set dst (AddVL dst src)); 6476 format %{ "paddq $dst,$src\t! add packed2L" %} 6477 ins_encode %{ 6478 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6479 %} 6480 ins_pipe( pipe_slow ); 6481 %} 6482 6483 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6484 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6485 match(Set dst (AddVL src1 src2)); 6486 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6487 ins_encode %{ 6488 int vector_len = 0; 6489 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6490 %} 6491 ins_pipe( pipe_slow ); 6492 %} 6493 6494 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6495 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6496 match(Set dst (AddVL src (LoadVector mem))); 6497 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6498 ins_encode %{ 6499 int vector_len = 0; 6500 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6501 %} 6502 ins_pipe( pipe_slow ); 6503 %} 6504 6505 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6506 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6507 match(Set dst (AddVL src1 src2)); 6508 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6509 ins_encode %{ 6510 int vector_len = 1; 6511 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6512 %} 6513 ins_pipe( pipe_slow ); 6514 %} 6515 6516 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6517 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6518 match(Set dst (AddVL src (LoadVector mem))); 6519 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6520 ins_encode %{ 6521 int vector_len = 1; 6522 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6523 %} 6524 ins_pipe( pipe_slow ); 6525 %} 6526 6527 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6528 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6529 match(Set dst (AddVL src1 src2)); 6530 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6531 ins_encode %{ 6532 int vector_len = 2; 6533 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6534 %} 6535 ins_pipe( pipe_slow ); 6536 %} 6537 6538 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6539 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6540 match(Set dst (AddVL src (LoadVector mem))); 6541 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6542 ins_encode %{ 6543 int vector_len = 2; 6544 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6545 %} 6546 ins_pipe( pipe_slow ); 6547 %} 6548 6549 // Floats vector add 6550 instruct vadd2F(vecD dst, vecD src) %{ 6551 predicate(n->as_Vector()->length() == 2); 6552 match(Set dst (AddVF dst src)); 6553 format %{ "addps $dst,$src\t! add packed2F" %} 6554 ins_encode %{ 6555 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6556 %} 6557 ins_pipe( pipe_slow ); 6558 %} 6559 6560 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6561 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6562 match(Set dst (AddVF src1 src2)); 6563 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6564 ins_encode %{ 6565 int vector_len = 0; 6566 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6567 %} 6568 ins_pipe( pipe_slow ); 6569 %} 6570 6571 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6572 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6573 match(Set dst (AddVF src (LoadVector mem))); 6574 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6575 ins_encode %{ 6576 int vector_len = 0; 6577 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6578 %} 6579 ins_pipe( pipe_slow ); 6580 %} 6581 6582 instruct vadd4F(vecX dst, vecX src) %{ 6583 predicate(n->as_Vector()->length() == 4); 6584 match(Set dst (AddVF dst src)); 6585 format %{ "addps $dst,$src\t! add packed4F" %} 6586 ins_encode %{ 6587 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6588 %} 6589 ins_pipe( pipe_slow ); 6590 %} 6591 6592 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6593 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6594 match(Set dst (AddVF src1 src2)); 6595 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6596 ins_encode %{ 6597 int vector_len = 0; 6598 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6599 %} 6600 ins_pipe( pipe_slow ); 6601 %} 6602 6603 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6604 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6605 match(Set dst (AddVF src (LoadVector mem))); 6606 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6607 ins_encode %{ 6608 int vector_len = 0; 6609 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6610 %} 6611 ins_pipe( pipe_slow ); 6612 %} 6613 6614 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6615 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6616 match(Set dst (AddVF src1 src2)); 6617 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6618 ins_encode %{ 6619 int vector_len = 1; 6620 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6626 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6627 match(Set dst (AddVF src (LoadVector mem))); 6628 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6629 ins_encode %{ 6630 int vector_len = 1; 6631 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6632 %} 6633 ins_pipe( pipe_slow ); 6634 %} 6635 6636 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6637 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6638 match(Set dst (AddVF src1 src2)); 6639 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6640 ins_encode %{ 6641 int vector_len = 2; 6642 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6643 %} 6644 ins_pipe( pipe_slow ); 6645 %} 6646 6647 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6648 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6649 match(Set dst (AddVF src (LoadVector mem))); 6650 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6651 ins_encode %{ 6652 int vector_len = 2; 6653 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6654 %} 6655 ins_pipe( pipe_slow ); 6656 %} 6657 6658 // Doubles vector add 6659 instruct vadd2D(vecX dst, vecX src) %{ 6660 predicate(n->as_Vector()->length() == 2); 6661 match(Set dst (AddVD dst src)); 6662 format %{ "addpd $dst,$src\t! add packed2D" %} 6663 ins_encode %{ 6664 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6665 %} 6666 ins_pipe( pipe_slow ); 6667 %} 6668 6669 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6670 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6671 match(Set dst (AddVD src1 src2)); 6672 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6673 ins_encode %{ 6674 int vector_len = 0; 6675 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6676 %} 6677 ins_pipe( pipe_slow ); 6678 %} 6679 6680 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6681 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6682 match(Set dst (AddVD src (LoadVector mem))); 6683 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6684 ins_encode %{ 6685 int vector_len = 0; 6686 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6687 %} 6688 ins_pipe( pipe_slow ); 6689 %} 6690 6691 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6692 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6693 match(Set dst (AddVD src1 src2)); 6694 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6695 ins_encode %{ 6696 int vector_len = 1; 6697 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6698 %} 6699 ins_pipe( pipe_slow ); 6700 %} 6701 6702 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6703 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6704 match(Set dst (AddVD src (LoadVector mem))); 6705 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6706 ins_encode %{ 6707 int vector_len = 1; 6708 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6709 %} 6710 ins_pipe( pipe_slow ); 6711 %} 6712 6713 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6714 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6715 match(Set dst (AddVD src1 src2)); 6716 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6717 ins_encode %{ 6718 int vector_len = 2; 6719 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6720 %} 6721 ins_pipe( pipe_slow ); 6722 %} 6723 6724 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6725 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6726 match(Set dst (AddVD src (LoadVector mem))); 6727 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6728 ins_encode %{ 6729 int vector_len = 2; 6730 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6731 %} 6732 ins_pipe( pipe_slow ); 6733 %} 6734 6735 // --------------------------------- SUB -------------------------------------- 6736 6737 // Bytes vector sub 6738 instruct vsub4B(vecS dst, vecS src) %{ 6739 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6740 match(Set dst (SubVB dst src)); 6741 format %{ "psubb $dst,$src\t! sub packed4B" %} 6742 ins_encode %{ 6743 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6744 %} 6745 ins_pipe( pipe_slow ); 6746 %} 6747 6748 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6749 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 6750 match(Set dst (SubVB src1 src2)); 6751 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6752 ins_encode %{ 6753 int vector_len = 0; 6754 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6755 %} 6756 ins_pipe( pipe_slow ); 6757 %} 6758 6759 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6760 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6761 match(Set dst (SubVB src1 src2)); 6762 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6763 ins_encode %{ 6764 int vector_len = 0; 6765 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6771 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6772 match(Set dst (SubVB dst src2)); 6773 effect(TEMP src1); 6774 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6775 ins_encode %{ 6776 int vector_len = 0; 6777 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6778 %} 6779 ins_pipe( pipe_slow ); 6780 %} 6781 6782 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6783 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 6784 match(Set dst (SubVB src (LoadVector mem))); 6785 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6786 ins_encode %{ 6787 int vector_len = 0; 6788 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6789 %} 6790 ins_pipe( pipe_slow ); 6791 %} 6792 6793 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6794 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6795 match(Set dst (SubVB src (LoadVector mem))); 6796 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6797 ins_encode %{ 6798 int vector_len = 0; 6799 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6800 %} 6801 ins_pipe( pipe_slow ); 6802 %} 6803 6804 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6805 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6806 match(Set dst (SubVB dst (LoadVector mem))); 6807 effect(TEMP src); 6808 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6809 ins_encode %{ 6810 int vector_len = 0; 6811 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6812 %} 6813 ins_pipe( pipe_slow ); 6814 %} 6815 6816 instruct vsub8B(vecD dst, vecD src) %{ 6817 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6818 match(Set dst (SubVB dst src)); 6819 format %{ "psubb $dst,$src\t! sub packed8B" %} 6820 ins_encode %{ 6821 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6822 %} 6823 ins_pipe( pipe_slow ); 6824 %} 6825 6826 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6827 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 6828 match(Set dst (SubVB src1 src2)); 6829 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6830 ins_encode %{ 6831 int vector_len = 0; 6832 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6833 %} 6834 ins_pipe( pipe_slow ); 6835 %} 6836 6837 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6838 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6839 match(Set dst (SubVB src1 src2)); 6840 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6841 ins_encode %{ 6842 int vector_len = 0; 6843 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6844 %} 6845 ins_pipe( pipe_slow ); 6846 %} 6847 6848 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6849 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6850 match(Set dst (SubVB dst src2)); 6851 effect(TEMP src1); 6852 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6853 ins_encode %{ 6854 int vector_len = 0; 6855 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6861 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 6862 match(Set dst (SubVB src (LoadVector mem))); 6863 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6864 ins_encode %{ 6865 int vector_len = 0; 6866 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6867 %} 6868 ins_pipe( pipe_slow ); 6869 %} 6870 6871 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6872 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6873 match(Set dst (SubVB src (LoadVector mem))); 6874 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6875 ins_encode %{ 6876 int vector_len = 0; 6877 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6878 %} 6879 ins_pipe( pipe_slow ); 6880 %} 6881 6882 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6883 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6884 match(Set dst (SubVB dst (LoadVector mem))); 6885 effect(TEMP src); 6886 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6887 ins_encode %{ 6888 int vector_len = 0; 6889 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6890 %} 6891 ins_pipe( pipe_slow ); 6892 %} 6893 6894 instruct vsub16B(vecX dst, vecX src) %{ 6895 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6896 match(Set dst (SubVB dst src)); 6897 format %{ "psubb $dst,$src\t! sub packed16B" %} 6898 ins_encode %{ 6899 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6900 %} 6901 ins_pipe( pipe_slow ); 6902 %} 6903 6904 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6905 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6906 match(Set dst (SubVB src1 src2)); 6907 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6908 ins_encode %{ 6909 int vector_len = 0; 6910 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6911 %} 6912 ins_pipe( pipe_slow ); 6913 %} 6914 6915 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6916 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6917 match(Set dst (SubVB src1 src2)); 6918 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6919 ins_encode %{ 6920 int vector_len = 0; 6921 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6922 %} 6923 ins_pipe( pipe_slow ); 6924 %} 6925 6926 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6927 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6928 match(Set dst (SubVB dst src2)); 6929 effect(TEMP src1); 6930 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6931 ins_encode %{ 6932 int vector_len = 0; 6933 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6934 %} 6935 ins_pipe( pipe_slow ); 6936 %} 6937 6938 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6939 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6940 match(Set dst (SubVB src (LoadVector mem))); 6941 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6942 ins_encode %{ 6943 int vector_len = 0; 6944 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6945 %} 6946 ins_pipe( pipe_slow ); 6947 %} 6948 6949 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6950 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6951 match(Set dst (SubVB src (LoadVector mem))); 6952 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6953 ins_encode %{ 6954 int vector_len = 0; 6955 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6956 %} 6957 ins_pipe( pipe_slow ); 6958 %} 6959 6960 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6961 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6962 match(Set dst (SubVB dst (LoadVector mem))); 6963 effect(TEMP src); 6964 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6965 ins_encode %{ 6966 int vector_len = 0; 6967 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6968 %} 6969 ins_pipe( pipe_slow ); 6970 %} 6971 6972 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6973 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6974 match(Set dst (SubVB src1 src2)); 6975 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6976 ins_encode %{ 6977 int vector_len = 1; 6978 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6979 %} 6980 ins_pipe( pipe_slow ); 6981 %} 6982 6983 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6984 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6985 match(Set dst (SubVB src1 src2)); 6986 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6987 ins_encode %{ 6988 int vector_len = 1; 6989 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6990 %} 6991 ins_pipe( pipe_slow ); 6992 %} 6993 6994 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6995 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6996 match(Set dst (SubVB dst src2)); 6997 effect(TEMP src1); 6998 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6999 ins_encode %{ 7000 int vector_len = 1; 7001 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7002 %} 7003 ins_pipe( pipe_slow ); 7004 %} 7005 7006 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7007 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7008 match(Set dst (SubVB src (LoadVector mem))); 7009 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7010 ins_encode %{ 7011 int vector_len = 1; 7012 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7013 %} 7014 ins_pipe( pipe_slow ); 7015 %} 7016 7017 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7018 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7019 match(Set dst (SubVB src (LoadVector mem))); 7020 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7021 ins_encode %{ 7022 int vector_len = 1; 7023 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7024 %} 7025 ins_pipe( pipe_slow ); 7026 %} 7027 7028 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7029 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7030 match(Set dst (SubVB dst (LoadVector mem))); 7031 effect(TEMP src); 7032 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7033 ins_encode %{ 7034 int vector_len = 1; 7035 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7041 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7042 match(Set dst (SubVB src1 src2)); 7043 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7044 ins_encode %{ 7045 int vector_len = 2; 7046 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7052 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7053 match(Set dst (SubVB src (LoadVector mem))); 7054 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7055 ins_encode %{ 7056 int vector_len = 2; 7057 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 // Shorts/Chars vector sub 7063 instruct vsub2S(vecS dst, vecS src) %{ 7064 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7065 match(Set dst (SubVS dst src)); 7066 format %{ "psubw $dst,$src\t! sub packed2S" %} 7067 ins_encode %{ 7068 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7074 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 7075 match(Set dst (SubVS src1 src2)); 7076 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7077 ins_encode %{ 7078 int vector_len = 0; 7079 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7080 %} 7081 ins_pipe( pipe_slow ); 7082 %} 7083 7084 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7085 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7086 match(Set dst (SubVS src1 src2)); 7087 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7088 ins_encode %{ 7089 int vector_len = 0; 7090 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7091 %} 7092 ins_pipe( pipe_slow ); 7093 %} 7094 7095 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7096 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7097 match(Set dst (SubVS dst src2)); 7098 effect(TEMP src1); 7099 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7100 ins_encode %{ 7101 int vector_len = 0; 7102 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7103 %} 7104 ins_pipe( pipe_slow ); 7105 %} 7106 7107 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7108 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 7109 match(Set dst (SubVS src (LoadVector mem))); 7110 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7111 ins_encode %{ 7112 int vector_len = 0; 7113 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7114 %} 7115 ins_pipe( pipe_slow ); 7116 %} 7117 7118 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7119 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7120 match(Set dst (SubVS src (LoadVector mem))); 7121 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7122 ins_encode %{ 7123 int vector_len = 0; 7124 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7125 %} 7126 ins_pipe( pipe_slow ); 7127 %} 7128 7129 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7130 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7131 match(Set dst (SubVS dst (LoadVector mem))); 7132 effect(TEMP src); 7133 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7134 ins_encode %{ 7135 int vector_len = 0; 7136 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7137 %} 7138 ins_pipe( pipe_slow ); 7139 %} 7140 7141 instruct vsub4S(vecD dst, vecD src) %{ 7142 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7143 match(Set dst (SubVS dst src)); 7144 format %{ "psubw $dst,$src\t! sub packed4S" %} 7145 ins_encode %{ 7146 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7147 %} 7148 ins_pipe( pipe_slow ); 7149 %} 7150 7151 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7152 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 7153 match(Set dst (SubVS src1 src2)); 7154 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7155 ins_encode %{ 7156 int vector_len = 0; 7157 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7158 %} 7159 ins_pipe( pipe_slow ); 7160 %} 7161 7162 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7163 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7164 match(Set dst (SubVS src1 src2)); 7165 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7166 ins_encode %{ 7167 int vector_len = 0; 7168 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7169 %} 7170 ins_pipe( pipe_slow ); 7171 %} 7172 7173 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7174 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7175 match(Set dst (SubVS dst src2)); 7176 effect(TEMP src1); 7177 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7178 ins_encode %{ 7179 int vector_len = 0; 7180 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7181 %} 7182 ins_pipe( pipe_slow ); 7183 %} 7184 7185 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7186 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 7187 match(Set dst (SubVS src (LoadVector mem))); 7188 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7189 ins_encode %{ 7190 int vector_len = 0; 7191 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7192 %} 7193 ins_pipe( pipe_slow ); 7194 %} 7195 7196 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7197 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7198 match(Set dst (SubVS src (LoadVector mem))); 7199 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7200 ins_encode %{ 7201 int vector_len = 0; 7202 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7203 %} 7204 ins_pipe( pipe_slow ); 7205 %} 7206 7207 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7208 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7209 match(Set dst (SubVS dst (LoadVector mem))); 7210 effect(TEMP src); 7211 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7212 ins_encode %{ 7213 int vector_len = 0; 7214 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 instruct vsub8S(vecX dst, vecX src) %{ 7220 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7221 match(Set dst (SubVS dst src)); 7222 format %{ "psubw $dst,$src\t! sub packed8S" %} 7223 ins_encode %{ 7224 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7225 %} 7226 ins_pipe( pipe_slow ); 7227 %} 7228 7229 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7230 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 7231 match(Set dst (SubVS src1 src2)); 7232 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7233 ins_encode %{ 7234 int vector_len = 0; 7235 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7236 %} 7237 ins_pipe( pipe_slow ); 7238 %} 7239 7240 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7241 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7242 match(Set dst (SubVS src1 src2)); 7243 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7244 ins_encode %{ 7245 int vector_len = 0; 7246 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7247 %} 7248 ins_pipe( pipe_slow ); 7249 %} 7250 7251 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7252 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7253 match(Set dst (SubVS dst src2)); 7254 effect(TEMP src1); 7255 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7256 ins_encode %{ 7257 int vector_len = 0; 7258 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7259 %} 7260 ins_pipe( pipe_slow ); 7261 %} 7262 7263 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7264 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 7265 match(Set dst (SubVS src (LoadVector mem))); 7266 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7267 ins_encode %{ 7268 int vector_len = 0; 7269 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7270 %} 7271 ins_pipe( pipe_slow ); 7272 %} 7273 7274 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7275 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7276 match(Set dst (SubVS src (LoadVector mem))); 7277 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7278 ins_encode %{ 7279 int vector_len = 0; 7280 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7286 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7287 match(Set dst (SubVS dst (LoadVector mem))); 7288 effect(TEMP src); 7289 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7290 ins_encode %{ 7291 int vector_len = 0; 7292 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7293 %} 7294 ins_pipe( pipe_slow ); 7295 %} 7296 7297 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7298 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7299 match(Set dst (SubVS src1 src2)); 7300 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7301 ins_encode %{ 7302 int vector_len = 1; 7303 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7304 %} 7305 ins_pipe( pipe_slow ); 7306 %} 7307 7308 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7309 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7310 match(Set dst (SubVS src1 src2)); 7311 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7312 ins_encode %{ 7313 int vector_len = 1; 7314 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7315 %} 7316 ins_pipe( pipe_slow ); 7317 %} 7318 7319 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7320 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7321 match(Set dst (SubVS dst src2)); 7322 effect(TEMP src1); 7323 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7324 ins_encode %{ 7325 int vector_len = 1; 7326 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7327 %} 7328 ins_pipe( pipe_slow ); 7329 %} 7330 7331 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7332 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7333 match(Set dst (SubVS src (LoadVector mem))); 7334 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7335 ins_encode %{ 7336 int vector_len = 1; 7337 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7338 %} 7339 ins_pipe( pipe_slow ); 7340 %} 7341 7342 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7343 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7344 match(Set dst (SubVS src (LoadVector mem))); 7345 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7346 ins_encode %{ 7347 int vector_len = 1; 7348 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7349 %} 7350 ins_pipe( pipe_slow ); 7351 %} 7352 7353 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7354 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7355 match(Set dst (SubVS dst (LoadVector mem))); 7356 effect(TEMP src); 7357 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7358 ins_encode %{ 7359 int vector_len = 1; 7360 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7361 %} 7362 ins_pipe( pipe_slow ); 7363 %} 7364 7365 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7366 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7367 match(Set dst (SubVS src1 src2)); 7368 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7369 ins_encode %{ 7370 int vector_len = 2; 7371 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7372 %} 7373 ins_pipe( pipe_slow ); 7374 %} 7375 7376 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7377 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7378 match(Set dst (SubVS src (LoadVector mem))); 7379 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7380 ins_encode %{ 7381 int vector_len = 2; 7382 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7383 %} 7384 ins_pipe( pipe_slow ); 7385 %} 7386 7387 // Integers vector sub 7388 instruct vsub2I(vecD dst, vecD src) %{ 7389 predicate(n->as_Vector()->length() == 2); 7390 match(Set dst (SubVI dst src)); 7391 format %{ "psubd $dst,$src\t! sub packed2I" %} 7392 ins_encode %{ 7393 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7394 %} 7395 ins_pipe( pipe_slow ); 7396 %} 7397 7398 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7399 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7400 match(Set dst (SubVI src1 src2)); 7401 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7402 ins_encode %{ 7403 int vector_len = 0; 7404 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7405 %} 7406 ins_pipe( pipe_slow ); 7407 %} 7408 7409 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7410 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7411 match(Set dst (SubVI src (LoadVector mem))); 7412 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7413 ins_encode %{ 7414 int vector_len = 0; 7415 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7416 %} 7417 ins_pipe( pipe_slow ); 7418 %} 7419 7420 instruct vsub4I(vecX dst, vecX src) %{ 7421 predicate(n->as_Vector()->length() == 4); 7422 match(Set dst (SubVI dst src)); 7423 format %{ "psubd $dst,$src\t! sub packed4I" %} 7424 ins_encode %{ 7425 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7426 %} 7427 ins_pipe( pipe_slow ); 7428 %} 7429 7430 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7431 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7432 match(Set dst (SubVI src1 src2)); 7433 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7434 ins_encode %{ 7435 int vector_len = 0; 7436 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7437 %} 7438 ins_pipe( pipe_slow ); 7439 %} 7440 7441 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7442 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7443 match(Set dst (SubVI src (LoadVector mem))); 7444 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7445 ins_encode %{ 7446 int vector_len = 0; 7447 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7448 %} 7449 ins_pipe( pipe_slow ); 7450 %} 7451 7452 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7453 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7454 match(Set dst (SubVI src1 src2)); 7455 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7456 ins_encode %{ 7457 int vector_len = 1; 7458 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7459 %} 7460 ins_pipe( pipe_slow ); 7461 %} 7462 7463 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7464 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7465 match(Set dst (SubVI src (LoadVector mem))); 7466 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7467 ins_encode %{ 7468 int vector_len = 1; 7469 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7470 %} 7471 ins_pipe( pipe_slow ); 7472 %} 7473 7474 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7475 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7476 match(Set dst (SubVI src1 src2)); 7477 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7478 ins_encode %{ 7479 int vector_len = 2; 7480 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7481 %} 7482 ins_pipe( pipe_slow ); 7483 %} 7484 7485 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7486 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7487 match(Set dst (SubVI src (LoadVector mem))); 7488 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7489 ins_encode %{ 7490 int vector_len = 2; 7491 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7492 %} 7493 ins_pipe( pipe_slow ); 7494 %} 7495 7496 // Longs vector sub 7497 instruct vsub2L(vecX dst, vecX src) %{ 7498 predicate(n->as_Vector()->length() == 2); 7499 match(Set dst (SubVL dst src)); 7500 format %{ "psubq $dst,$src\t! sub packed2L" %} 7501 ins_encode %{ 7502 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7503 %} 7504 ins_pipe( pipe_slow ); 7505 %} 7506 7507 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7508 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7509 match(Set dst (SubVL src1 src2)); 7510 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7511 ins_encode %{ 7512 int vector_len = 0; 7513 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7514 %} 7515 ins_pipe( pipe_slow ); 7516 %} 7517 7518 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7519 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7520 match(Set dst (SubVL src (LoadVector mem))); 7521 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7522 ins_encode %{ 7523 int vector_len = 0; 7524 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7525 %} 7526 ins_pipe( pipe_slow ); 7527 %} 7528 7529 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7530 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7531 match(Set dst (SubVL src1 src2)); 7532 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7533 ins_encode %{ 7534 int vector_len = 1; 7535 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7536 %} 7537 ins_pipe( pipe_slow ); 7538 %} 7539 7540 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7541 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7542 match(Set dst (SubVL src (LoadVector mem))); 7543 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7544 ins_encode %{ 7545 int vector_len = 1; 7546 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7547 %} 7548 ins_pipe( pipe_slow ); 7549 %} 7550 7551 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7552 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7553 match(Set dst (SubVL src1 src2)); 7554 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7555 ins_encode %{ 7556 int vector_len = 2; 7557 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7558 %} 7559 ins_pipe( pipe_slow ); 7560 %} 7561 7562 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7563 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7564 match(Set dst (SubVL src (LoadVector mem))); 7565 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7566 ins_encode %{ 7567 int vector_len = 2; 7568 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7569 %} 7570 ins_pipe( pipe_slow ); 7571 %} 7572 7573 // Floats vector sub 7574 instruct vsub2F(vecD dst, vecD src) %{ 7575 predicate(n->as_Vector()->length() == 2); 7576 match(Set dst (SubVF dst src)); 7577 format %{ "subps $dst,$src\t! sub packed2F" %} 7578 ins_encode %{ 7579 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7580 %} 7581 ins_pipe( pipe_slow ); 7582 %} 7583 7584 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7585 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7586 match(Set dst (SubVF src1 src2)); 7587 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7588 ins_encode %{ 7589 int vector_len = 0; 7590 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7591 %} 7592 ins_pipe( pipe_slow ); 7593 %} 7594 7595 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7596 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7597 match(Set dst (SubVF src (LoadVector mem))); 7598 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7599 ins_encode %{ 7600 int vector_len = 0; 7601 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7602 %} 7603 ins_pipe( pipe_slow ); 7604 %} 7605 7606 instruct vsub4F(vecX dst, vecX src) %{ 7607 predicate(n->as_Vector()->length() == 4); 7608 match(Set dst (SubVF dst src)); 7609 format %{ "subps $dst,$src\t! sub packed4F" %} 7610 ins_encode %{ 7611 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7612 %} 7613 ins_pipe( pipe_slow ); 7614 %} 7615 7616 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7617 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7618 match(Set dst (SubVF src1 src2)); 7619 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7620 ins_encode %{ 7621 int vector_len = 0; 7622 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7623 %} 7624 ins_pipe( pipe_slow ); 7625 %} 7626 7627 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7628 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7629 match(Set dst (SubVF src (LoadVector mem))); 7630 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7631 ins_encode %{ 7632 int vector_len = 0; 7633 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7634 %} 7635 ins_pipe( pipe_slow ); 7636 %} 7637 7638 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7639 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7640 match(Set dst (SubVF src1 src2)); 7641 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7642 ins_encode %{ 7643 int vector_len = 1; 7644 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7645 %} 7646 ins_pipe( pipe_slow ); 7647 %} 7648 7649 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7650 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7651 match(Set dst (SubVF src (LoadVector mem))); 7652 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7653 ins_encode %{ 7654 int vector_len = 1; 7655 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7656 %} 7657 ins_pipe( pipe_slow ); 7658 %} 7659 7660 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7661 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7662 match(Set dst (SubVF src1 src2)); 7663 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7664 ins_encode %{ 7665 int vector_len = 2; 7666 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7667 %} 7668 ins_pipe( pipe_slow ); 7669 %} 7670 7671 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7672 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7673 match(Set dst (SubVF src (LoadVector mem))); 7674 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7675 ins_encode %{ 7676 int vector_len = 2; 7677 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7678 %} 7679 ins_pipe( pipe_slow ); 7680 %} 7681 7682 // Doubles vector sub 7683 instruct vsub2D(vecX dst, vecX src) %{ 7684 predicate(n->as_Vector()->length() == 2); 7685 match(Set dst (SubVD dst src)); 7686 format %{ "subpd $dst,$src\t! sub packed2D" %} 7687 ins_encode %{ 7688 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7689 %} 7690 ins_pipe( pipe_slow ); 7691 %} 7692 7693 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7694 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7695 match(Set dst (SubVD src1 src2)); 7696 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7697 ins_encode %{ 7698 int vector_len = 0; 7699 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7700 %} 7701 ins_pipe( pipe_slow ); 7702 %} 7703 7704 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7705 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7706 match(Set dst (SubVD src (LoadVector mem))); 7707 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7708 ins_encode %{ 7709 int vector_len = 0; 7710 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7711 %} 7712 ins_pipe( pipe_slow ); 7713 %} 7714 7715 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7716 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7717 match(Set dst (SubVD src1 src2)); 7718 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7719 ins_encode %{ 7720 int vector_len = 1; 7721 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7722 %} 7723 ins_pipe( pipe_slow ); 7724 %} 7725 7726 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7727 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7728 match(Set dst (SubVD src (LoadVector mem))); 7729 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7730 ins_encode %{ 7731 int vector_len = 1; 7732 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7733 %} 7734 ins_pipe( pipe_slow ); 7735 %} 7736 7737 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7738 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7739 match(Set dst (SubVD src1 src2)); 7740 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7741 ins_encode %{ 7742 int vector_len = 2; 7743 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7744 %} 7745 ins_pipe( pipe_slow ); 7746 %} 7747 7748 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7749 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7750 match(Set dst (SubVD src (LoadVector mem))); 7751 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7752 ins_encode %{ 7753 int vector_len = 2; 7754 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7755 %} 7756 ins_pipe( pipe_slow ); 7757 %} 7758 7759 // --------------------------------- MUL -------------------------------------- 7760 7761 // Shorts/Chars vector mul 7762 instruct vmul2S(vecS dst, vecS src) %{ 7763 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7764 match(Set dst (MulVS dst src)); 7765 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7766 ins_encode %{ 7767 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7773 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 7774 match(Set dst (MulVS src1 src2)); 7775 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7776 ins_encode %{ 7777 int vector_len = 0; 7778 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7779 %} 7780 ins_pipe( pipe_slow ); 7781 %} 7782 7783 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7784 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7785 match(Set dst (MulVS src1 src2)); 7786 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7787 ins_encode %{ 7788 int vector_len = 0; 7789 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7790 %} 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7795 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7796 match(Set dst (MulVS dst src2)); 7797 effect(TEMP src1); 7798 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7799 ins_encode %{ 7800 int vector_len = 0; 7801 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7802 %} 7803 ins_pipe( pipe_slow ); 7804 %} 7805 7806 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7807 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 7808 match(Set dst (MulVS src (LoadVector mem))); 7809 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7810 ins_encode %{ 7811 int vector_len = 0; 7812 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7813 %} 7814 ins_pipe( pipe_slow ); 7815 %} 7816 7817 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7818 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7819 match(Set dst (MulVS src (LoadVector mem))); 7820 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7821 ins_encode %{ 7822 int vector_len = 0; 7823 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7824 %} 7825 ins_pipe( pipe_slow ); 7826 %} 7827 7828 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7829 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7830 match(Set dst (MulVS dst (LoadVector mem))); 7831 effect(TEMP src); 7832 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7833 ins_encode %{ 7834 int vector_len = 0; 7835 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7836 %} 7837 ins_pipe( pipe_slow ); 7838 %} 7839 7840 instruct vmul4S(vecD dst, vecD src) %{ 7841 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7842 match(Set dst (MulVS dst src)); 7843 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7844 ins_encode %{ 7845 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7846 %} 7847 ins_pipe( pipe_slow ); 7848 %} 7849 7850 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7851 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 7852 match(Set dst (MulVS src1 src2)); 7853 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7854 ins_encode %{ 7855 int vector_len = 0; 7856 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7857 %} 7858 ins_pipe( pipe_slow ); 7859 %} 7860 7861 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7862 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7863 match(Set dst (MulVS src1 src2)); 7864 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7865 ins_encode %{ 7866 int vector_len = 0; 7867 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7868 %} 7869 ins_pipe( pipe_slow ); 7870 %} 7871 7872 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7873 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7874 match(Set dst (MulVS dst src2)); 7875 effect(TEMP src1); 7876 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7877 ins_encode %{ 7878 int vector_len = 0; 7879 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7885 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 7886 match(Set dst (MulVS src (LoadVector mem))); 7887 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7888 ins_encode %{ 7889 int vector_len = 0; 7890 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7891 %} 7892 ins_pipe( pipe_slow ); 7893 %} 7894 7895 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7896 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7897 match(Set dst (MulVS src (LoadVector mem))); 7898 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7899 ins_encode %{ 7900 int vector_len = 0; 7901 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7902 %} 7903 ins_pipe( pipe_slow ); 7904 %} 7905 7906 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7907 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7908 match(Set dst (MulVS dst (LoadVector mem))); 7909 effect(TEMP src); 7910 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7911 ins_encode %{ 7912 int vector_len = 0; 7913 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7914 %} 7915 ins_pipe( pipe_slow ); 7916 %} 7917 7918 instruct vmul8S(vecX dst, vecX src) %{ 7919 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7920 match(Set dst (MulVS dst src)); 7921 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7922 ins_encode %{ 7923 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7924 %} 7925 ins_pipe( pipe_slow ); 7926 %} 7927 7928 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7929 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 7930 match(Set dst (MulVS src1 src2)); 7931 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7932 ins_encode %{ 7933 int vector_len = 0; 7934 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7935 %} 7936 ins_pipe( pipe_slow ); 7937 %} 7938 7939 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7940 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7941 match(Set dst (MulVS src1 src2)); 7942 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7943 ins_encode %{ 7944 int vector_len = 0; 7945 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7946 %} 7947 ins_pipe( pipe_slow ); 7948 %} 7949 7950 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7951 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7952 match(Set dst (MulVS dst src2)); 7953 effect(TEMP src1); 7954 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7955 ins_encode %{ 7956 int vector_len = 0; 7957 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7958 %} 7959 ins_pipe( pipe_slow ); 7960 %} 7961 7962 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7963 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 7964 match(Set dst (MulVS src (LoadVector mem))); 7965 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7966 ins_encode %{ 7967 int vector_len = 0; 7968 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7969 %} 7970 ins_pipe( pipe_slow ); 7971 %} 7972 7973 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7974 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7975 match(Set dst (MulVS src (LoadVector mem))); 7976 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7977 ins_encode %{ 7978 int vector_len = 0; 7979 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7980 %} 7981 ins_pipe( pipe_slow ); 7982 %} 7983 7984 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7985 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7986 match(Set dst (MulVS dst (LoadVector mem))); 7987 effect(TEMP src); 7988 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7989 ins_encode %{ 7990 int vector_len = 0; 7991 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7992 %} 7993 ins_pipe( pipe_slow ); 7994 %} 7995 7996 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7997 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7998 match(Set dst (MulVS src1 src2)); 7999 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8000 ins_encode %{ 8001 int vector_len = 1; 8002 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8008 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8009 match(Set dst (MulVS src1 src2)); 8010 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8011 ins_encode %{ 8012 int vector_len = 1; 8013 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8014 %} 8015 ins_pipe( pipe_slow ); 8016 %} 8017 8018 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8019 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8020 match(Set dst (MulVS dst src2)); 8021 effect(TEMP src1); 8022 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8023 ins_encode %{ 8024 int vector_len = 1; 8025 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8026 %} 8027 ins_pipe( pipe_slow ); 8028 %} 8029 8030 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8031 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8032 match(Set dst (MulVS src (LoadVector mem))); 8033 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8034 ins_encode %{ 8035 int vector_len = 1; 8036 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8037 %} 8038 ins_pipe( pipe_slow ); 8039 %} 8040 8041 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8042 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8043 match(Set dst (MulVS src (LoadVector mem))); 8044 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8045 ins_encode %{ 8046 int vector_len = 1; 8047 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8048 %} 8049 ins_pipe( pipe_slow ); 8050 %} 8051 8052 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8053 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8054 match(Set dst (MulVS dst (LoadVector mem))); 8055 effect(TEMP src); 8056 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8057 ins_encode %{ 8058 int vector_len = 1; 8059 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8060 %} 8061 ins_pipe( pipe_slow ); 8062 %} 8063 8064 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8065 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8066 match(Set dst (MulVS src1 src2)); 8067 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8068 ins_encode %{ 8069 int vector_len = 2; 8070 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8071 %} 8072 ins_pipe( pipe_slow ); 8073 %} 8074 8075 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8076 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8077 match(Set dst (MulVS src (LoadVector mem))); 8078 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8079 ins_encode %{ 8080 int vector_len = 2; 8081 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8082 %} 8083 ins_pipe( pipe_slow ); 8084 %} 8085 8086 // Integers vector mul (sse4_1) 8087 instruct vmul2I(vecD dst, vecD src) %{ 8088 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8089 match(Set dst (MulVI dst src)); 8090 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8091 ins_encode %{ 8092 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8093 %} 8094 ins_pipe( pipe_slow ); 8095 %} 8096 8097 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 8098 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8099 match(Set dst (MulVI src1 src2)); 8100 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 8101 ins_encode %{ 8102 int vector_len = 0; 8103 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8104 %} 8105 ins_pipe( pipe_slow ); 8106 %} 8107 8108 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 8109 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8110 match(Set dst (MulVI src (LoadVector mem))); 8111 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 8112 ins_encode %{ 8113 int vector_len = 0; 8114 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8115 %} 8116 ins_pipe( pipe_slow ); 8117 %} 8118 8119 instruct vmul4I(vecX dst, vecX src) %{ 8120 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8121 match(Set dst (MulVI dst src)); 8122 format %{ "pmulld $dst,$src\t! mul packed4I" %} 8123 ins_encode %{ 8124 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8125 %} 8126 ins_pipe( pipe_slow ); 8127 %} 8128 8129 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 8130 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8131 match(Set dst (MulVI src1 src2)); 8132 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8133 ins_encode %{ 8134 int vector_len = 0; 8135 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8136 %} 8137 ins_pipe( pipe_slow ); 8138 %} 8139 8140 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8141 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8142 match(Set dst (MulVI src (LoadVector mem))); 8143 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8144 ins_encode %{ 8145 int vector_len = 0; 8146 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8147 %} 8148 ins_pipe( pipe_slow ); 8149 %} 8150 8151 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8152 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8153 match(Set dst (MulVL src1 src2)); 8154 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8155 ins_encode %{ 8156 int vector_len = 0; 8157 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8158 %} 8159 ins_pipe( pipe_slow ); 8160 %} 8161 8162 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8163 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8164 match(Set dst (MulVL src (LoadVector mem))); 8165 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8166 ins_encode %{ 8167 int vector_len = 0; 8168 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8169 %} 8170 ins_pipe( pipe_slow ); 8171 %} 8172 8173 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8174 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8175 match(Set dst (MulVL src1 src2)); 8176 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8177 ins_encode %{ 8178 int vector_len = 1; 8179 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8180 %} 8181 ins_pipe( pipe_slow ); 8182 %} 8183 8184 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8185 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8186 match(Set dst (MulVL src (LoadVector mem))); 8187 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8188 ins_encode %{ 8189 int vector_len = 1; 8190 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8191 %} 8192 ins_pipe( pipe_slow ); 8193 %} 8194 8195 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8196 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8197 match(Set dst (MulVL src1 src2)); 8198 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8199 ins_encode %{ 8200 int vector_len = 2; 8201 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8202 %} 8203 ins_pipe( pipe_slow ); 8204 %} 8205 8206 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8207 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8208 match(Set dst (MulVL src (LoadVector mem))); 8209 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8210 ins_encode %{ 8211 int vector_len = 2; 8212 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8213 %} 8214 ins_pipe( pipe_slow ); 8215 %} 8216 8217 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8218 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8219 match(Set dst (MulVI src1 src2)); 8220 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8221 ins_encode %{ 8222 int vector_len = 1; 8223 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8224 %} 8225 ins_pipe( pipe_slow ); 8226 %} 8227 8228 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8229 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8230 match(Set dst (MulVI src (LoadVector mem))); 8231 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8232 ins_encode %{ 8233 int vector_len = 1; 8234 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8235 %} 8236 ins_pipe( pipe_slow ); 8237 %} 8238 8239 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8240 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8241 match(Set dst (MulVI src1 src2)); 8242 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8243 ins_encode %{ 8244 int vector_len = 2; 8245 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8246 %} 8247 ins_pipe( pipe_slow ); 8248 %} 8249 8250 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8251 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8252 match(Set dst (MulVI src (LoadVector mem))); 8253 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8254 ins_encode %{ 8255 int vector_len = 2; 8256 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8257 %} 8258 ins_pipe( pipe_slow ); 8259 %} 8260 8261 // Floats vector mul 8262 instruct vmul2F(vecD dst, vecD src) %{ 8263 predicate(n->as_Vector()->length() == 2); 8264 match(Set dst (MulVF dst src)); 8265 format %{ "mulps $dst,$src\t! mul packed2F" %} 8266 ins_encode %{ 8267 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8268 %} 8269 ins_pipe( pipe_slow ); 8270 %} 8271 8272 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8273 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8274 match(Set dst (MulVF src1 src2)); 8275 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8276 ins_encode %{ 8277 int vector_len = 0; 8278 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8279 %} 8280 ins_pipe( pipe_slow ); 8281 %} 8282 8283 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8284 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8285 match(Set dst (MulVF src (LoadVector mem))); 8286 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8287 ins_encode %{ 8288 int vector_len = 0; 8289 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8290 %} 8291 ins_pipe( pipe_slow ); 8292 %} 8293 8294 instruct vmul4F(vecX dst, vecX src) %{ 8295 predicate(n->as_Vector()->length() == 4); 8296 match(Set dst (MulVF dst src)); 8297 format %{ "mulps $dst,$src\t! mul packed4F" %} 8298 ins_encode %{ 8299 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8300 %} 8301 ins_pipe( pipe_slow ); 8302 %} 8303 8304 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8305 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8306 match(Set dst (MulVF src1 src2)); 8307 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8308 ins_encode %{ 8309 int vector_len = 0; 8310 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8311 %} 8312 ins_pipe( pipe_slow ); 8313 %} 8314 8315 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8316 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8317 match(Set dst (MulVF src (LoadVector mem))); 8318 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8319 ins_encode %{ 8320 int vector_len = 0; 8321 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8322 %} 8323 ins_pipe( pipe_slow ); 8324 %} 8325 8326 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8327 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8328 match(Set dst (MulVF src1 src2)); 8329 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8330 ins_encode %{ 8331 int vector_len = 1; 8332 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8333 %} 8334 ins_pipe( pipe_slow ); 8335 %} 8336 8337 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8338 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8339 match(Set dst (MulVF src (LoadVector mem))); 8340 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8341 ins_encode %{ 8342 int vector_len = 1; 8343 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8344 %} 8345 ins_pipe( pipe_slow ); 8346 %} 8347 8348 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8349 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8350 match(Set dst (MulVF src1 src2)); 8351 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8352 ins_encode %{ 8353 int vector_len = 2; 8354 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8355 %} 8356 ins_pipe( pipe_slow ); 8357 %} 8358 8359 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8360 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8361 match(Set dst (MulVF src (LoadVector mem))); 8362 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8363 ins_encode %{ 8364 int vector_len = 2; 8365 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8366 %} 8367 ins_pipe( pipe_slow ); 8368 %} 8369 8370 // Doubles vector mul 8371 instruct vmul2D(vecX dst, vecX src) %{ 8372 predicate(n->as_Vector()->length() == 2); 8373 match(Set dst (MulVD dst src)); 8374 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8375 ins_encode %{ 8376 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8377 %} 8378 ins_pipe( pipe_slow ); 8379 %} 8380 8381 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8382 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8383 match(Set dst (MulVD src1 src2)); 8384 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8385 ins_encode %{ 8386 int vector_len = 0; 8387 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8388 %} 8389 ins_pipe( pipe_slow ); 8390 %} 8391 8392 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8394 match(Set dst (MulVD src (LoadVector mem))); 8395 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8396 ins_encode %{ 8397 int vector_len = 0; 8398 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8399 %} 8400 ins_pipe( pipe_slow ); 8401 %} 8402 8403 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8404 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8405 match(Set dst (MulVD src1 src2)); 8406 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8407 ins_encode %{ 8408 int vector_len = 1; 8409 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8410 %} 8411 ins_pipe( pipe_slow ); 8412 %} 8413 8414 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8415 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8416 match(Set dst (MulVD src (LoadVector mem))); 8417 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8418 ins_encode %{ 8419 int vector_len = 1; 8420 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8421 %} 8422 ins_pipe( pipe_slow ); 8423 %} 8424 8425 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8426 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8427 match(Set dst (MulVD src1 src2)); 8428 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8429 ins_encode %{ 8430 int vector_len = 2; 8431 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8432 %} 8433 ins_pipe( pipe_slow ); 8434 %} 8435 8436 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8437 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8438 match(Set dst (MulVD src (LoadVector mem))); 8439 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8440 ins_encode %{ 8441 int vector_len = 2; 8442 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8443 %} 8444 ins_pipe( pipe_slow ); 8445 %} 8446 8447 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8448 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8449 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8450 effect(TEMP dst, USE src1, USE src2); 8451 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8452 "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8453 %} 8454 ins_encode %{ 8455 int vector_len = 1; 8456 int cond = (Assembler::Condition)($copnd$$cmpcode); 8457 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8458 __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8459 %} 8460 ins_pipe( pipe_slow ); 8461 %} 8462 8463 // --------------------------------- DIV -------------------------------------- 8464 8465 // Floats vector div 8466 instruct vdiv2F(vecD dst, vecD src) %{ 8467 predicate(n->as_Vector()->length() == 2); 8468 match(Set dst (DivVF dst src)); 8469 format %{ "divps $dst,$src\t! div packed2F" %} 8470 ins_encode %{ 8471 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8472 %} 8473 ins_pipe( pipe_slow ); 8474 %} 8475 8476 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8477 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8478 match(Set dst (DivVF src1 src2)); 8479 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8480 ins_encode %{ 8481 int vector_len = 0; 8482 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8483 %} 8484 ins_pipe( pipe_slow ); 8485 %} 8486 8487 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8488 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8489 match(Set dst (DivVF src (LoadVector mem))); 8490 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8491 ins_encode %{ 8492 int vector_len = 0; 8493 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8494 %} 8495 ins_pipe( pipe_slow ); 8496 %} 8497 8498 instruct vdiv4F(vecX dst, vecX src) %{ 8499 predicate(n->as_Vector()->length() == 4); 8500 match(Set dst (DivVF dst src)); 8501 format %{ "divps $dst,$src\t! div packed4F" %} 8502 ins_encode %{ 8503 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8504 %} 8505 ins_pipe( pipe_slow ); 8506 %} 8507 8508 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8509 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8510 match(Set dst (DivVF src1 src2)); 8511 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8512 ins_encode %{ 8513 int vector_len = 0; 8514 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8515 %} 8516 ins_pipe( pipe_slow ); 8517 %} 8518 8519 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8520 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8521 match(Set dst (DivVF src (LoadVector mem))); 8522 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8523 ins_encode %{ 8524 int vector_len = 0; 8525 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8526 %} 8527 ins_pipe( pipe_slow ); 8528 %} 8529 8530 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8531 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8532 match(Set dst (DivVF src1 src2)); 8533 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8534 ins_encode %{ 8535 int vector_len = 1; 8536 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8537 %} 8538 ins_pipe( pipe_slow ); 8539 %} 8540 8541 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8542 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8543 match(Set dst (DivVF src (LoadVector mem))); 8544 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8545 ins_encode %{ 8546 int vector_len = 1; 8547 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8548 %} 8549 ins_pipe( pipe_slow ); 8550 %} 8551 8552 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8553 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8554 match(Set dst (DivVF src1 src2)); 8555 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8556 ins_encode %{ 8557 int vector_len = 2; 8558 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8559 %} 8560 ins_pipe( pipe_slow ); 8561 %} 8562 8563 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8564 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8565 match(Set dst (DivVF src (LoadVector mem))); 8566 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8567 ins_encode %{ 8568 int vector_len = 2; 8569 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8570 %} 8571 ins_pipe( pipe_slow ); 8572 %} 8573 8574 // Doubles vector div 8575 instruct vdiv2D(vecX dst, vecX src) %{ 8576 predicate(n->as_Vector()->length() == 2); 8577 match(Set dst (DivVD dst src)); 8578 format %{ "divpd $dst,$src\t! div packed2D" %} 8579 ins_encode %{ 8580 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8581 %} 8582 ins_pipe( pipe_slow ); 8583 %} 8584 8585 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8586 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8587 match(Set dst (DivVD src1 src2)); 8588 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8589 ins_encode %{ 8590 int vector_len = 0; 8591 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8592 %} 8593 ins_pipe( pipe_slow ); 8594 %} 8595 8596 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8597 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8598 match(Set dst (DivVD src (LoadVector mem))); 8599 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8600 ins_encode %{ 8601 int vector_len = 0; 8602 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8603 %} 8604 ins_pipe( pipe_slow ); 8605 %} 8606 8607 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8608 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8609 match(Set dst (DivVD src1 src2)); 8610 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8611 ins_encode %{ 8612 int vector_len = 1; 8613 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8614 %} 8615 ins_pipe( pipe_slow ); 8616 %} 8617 8618 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8619 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8620 match(Set dst (DivVD src (LoadVector mem))); 8621 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8622 ins_encode %{ 8623 int vector_len = 1; 8624 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8625 %} 8626 ins_pipe( pipe_slow ); 8627 %} 8628 8629 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8630 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8631 match(Set dst (DivVD src1 src2)); 8632 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8633 ins_encode %{ 8634 int vector_len = 2; 8635 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8636 %} 8637 ins_pipe( pipe_slow ); 8638 %} 8639 8640 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8641 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8642 match(Set dst (DivVD src (LoadVector mem))); 8643 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8644 ins_encode %{ 8645 int vector_len = 2; 8646 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8647 %} 8648 ins_pipe( pipe_slow ); 8649 %} 8650 8651 // ------------------------------ Shift --------------------------------------- 8652 8653 // Left and right shift count vectors are the same on x86 8654 // (only lowest bits of xmm reg are used for count). 8655 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8656 match(Set dst (LShiftCntV cnt)); 8657 match(Set dst (RShiftCntV cnt)); 8658 format %{ "movd $dst,$cnt\t! load shift count" %} 8659 ins_encode %{ 8660 __ movdl($dst$$XMMRegister, $cnt$$Register); 8661 %} 8662 ins_pipe( pipe_slow ); 8663 %} 8664 8665 // --------------------------------- Sqrt -------------------------------------- 8666 8667 // Floating point vector sqrt - double precision only 8668 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8669 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8670 match(Set dst (SqrtVD src)); 8671 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8672 ins_encode %{ 8673 int vector_len = 0; 8674 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8680 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8681 match(Set dst (SqrtVD (LoadVector mem))); 8682 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8683 ins_encode %{ 8684 int vector_len = 0; 8685 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8691 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8692 match(Set dst (SqrtVD src)); 8693 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8694 ins_encode %{ 8695 int vector_len = 1; 8696 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8702 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8703 match(Set dst (SqrtVD (LoadVector mem))); 8704 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8705 ins_encode %{ 8706 int vector_len = 1; 8707 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8708 %} 8709 ins_pipe( pipe_slow ); 8710 %} 8711 8712 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8713 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8714 match(Set dst (SqrtVD src)); 8715 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8716 ins_encode %{ 8717 int vector_len = 2; 8718 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8719 %} 8720 ins_pipe( pipe_slow ); 8721 %} 8722 8723 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8724 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8725 match(Set dst (SqrtVD (LoadVector mem))); 8726 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8727 ins_encode %{ 8728 int vector_len = 2; 8729 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8730 %} 8731 ins_pipe( pipe_slow ); 8732 %} 8733 8734 // ------------------------------ LeftShift ----------------------------------- 8735 8736 // Shorts/Chars vector left shift 8737 instruct vsll2S(vecS dst, vecS shift) %{ 8738 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8739 match(Set dst (LShiftVS dst shift)); 8740 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8741 ins_encode %{ 8742 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8743 %} 8744 ins_pipe( pipe_slow ); 8745 %} 8746 8747 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8748 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8749 match(Set dst (LShiftVS dst shift)); 8750 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8751 ins_encode %{ 8752 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8753 %} 8754 ins_pipe( pipe_slow ); 8755 %} 8756 8757 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8758 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 8759 match(Set dst (LShiftVS src shift)); 8760 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8761 ins_encode %{ 8762 int vector_len = 0; 8763 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8764 %} 8765 ins_pipe( pipe_slow ); 8766 %} 8767 8768 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8769 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8770 match(Set dst (LShiftVS src shift)); 8771 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8772 ins_encode %{ 8773 int vector_len = 0; 8774 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8775 %} 8776 ins_pipe( pipe_slow ); 8777 %} 8778 8779 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8780 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8781 match(Set dst (LShiftVS dst shift)); 8782 effect(TEMP src); 8783 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8784 ins_encode %{ 8785 int vector_len = 0; 8786 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8787 %} 8788 ins_pipe( pipe_slow ); 8789 %} 8790 8791 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8792 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 8793 match(Set dst (LShiftVS src shift)); 8794 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8795 ins_encode %{ 8796 int vector_len = 0; 8797 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8798 %} 8799 ins_pipe( pipe_slow ); 8800 %} 8801 8802 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8803 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8804 match(Set dst (LShiftVS src shift)); 8805 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8806 ins_encode %{ 8807 int vector_len = 0; 8808 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8809 %} 8810 ins_pipe( pipe_slow ); 8811 %} 8812 8813 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8814 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8815 match(Set dst (LShiftVS dst shift)); 8816 effect(TEMP src); 8817 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8818 ins_encode %{ 8819 int vector_len = 0; 8820 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8821 %} 8822 ins_pipe( pipe_slow ); 8823 %} 8824 8825 instruct vsll4S(vecD dst, vecS shift) %{ 8826 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8827 match(Set dst (LShiftVS dst shift)); 8828 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8829 ins_encode %{ 8830 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8831 %} 8832 ins_pipe( pipe_slow ); 8833 %} 8834 8835 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8836 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8837 match(Set dst (LShiftVS dst shift)); 8838 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8839 ins_encode %{ 8840 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8841 %} 8842 ins_pipe( pipe_slow ); 8843 %} 8844 8845 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8846 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 8847 match(Set dst (LShiftVS src shift)); 8848 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8849 ins_encode %{ 8850 int vector_len = 0; 8851 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8852 %} 8853 ins_pipe( pipe_slow ); 8854 %} 8855 8856 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8857 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8858 match(Set dst (LShiftVS src shift)); 8859 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8860 ins_encode %{ 8861 int vector_len = 0; 8862 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8863 %} 8864 ins_pipe( pipe_slow ); 8865 %} 8866 8867 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8868 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8869 match(Set dst (LShiftVS dst shift)); 8870 effect(TEMP src); 8871 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8872 ins_encode %{ 8873 int vector_len = 0; 8874 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8875 %} 8876 ins_pipe( pipe_slow ); 8877 %} 8878 8879 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8880 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 8881 match(Set dst (LShiftVS src shift)); 8882 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8883 ins_encode %{ 8884 int vector_len = 0; 8885 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8886 %} 8887 ins_pipe( pipe_slow ); 8888 %} 8889 8890 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8891 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8892 match(Set dst (LShiftVS src shift)); 8893 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8894 ins_encode %{ 8895 int vector_len = 0; 8896 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8897 %} 8898 ins_pipe( pipe_slow ); 8899 %} 8900 8901 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8902 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8903 match(Set dst (LShiftVS dst shift)); 8904 effect(TEMP src); 8905 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8906 ins_encode %{ 8907 int vector_len = 0; 8908 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8909 %} 8910 ins_pipe( pipe_slow ); 8911 %} 8912 8913 instruct vsll8S(vecX dst, vecS shift) %{ 8914 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8915 match(Set dst (LShiftVS dst shift)); 8916 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8917 ins_encode %{ 8918 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8919 %} 8920 ins_pipe( pipe_slow ); 8921 %} 8922 8923 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8924 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8925 match(Set dst (LShiftVS dst shift)); 8926 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8927 ins_encode %{ 8928 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8929 %} 8930 ins_pipe( pipe_slow ); 8931 %} 8932 8933 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8934 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 8935 match(Set dst (LShiftVS src shift)); 8936 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8937 ins_encode %{ 8938 int vector_len = 0; 8939 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8940 %} 8941 ins_pipe( pipe_slow ); 8942 %} 8943 8944 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8945 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8946 match(Set dst (LShiftVS src shift)); 8947 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8948 ins_encode %{ 8949 int vector_len = 0; 8950 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8951 %} 8952 ins_pipe( pipe_slow ); 8953 %} 8954 8955 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8956 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8957 match(Set dst (LShiftVS dst shift)); 8958 effect(TEMP src); 8959 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8960 ins_encode %{ 8961 int vector_len = 0; 8962 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8963 %} 8964 ins_pipe( pipe_slow ); 8965 %} 8966 8967 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8968 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 8969 match(Set dst (LShiftVS src shift)); 8970 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8971 ins_encode %{ 8972 int vector_len = 0; 8973 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8974 %} 8975 ins_pipe( pipe_slow ); 8976 %} 8977 8978 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8979 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8980 match(Set dst (LShiftVS src shift)); 8981 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8982 ins_encode %{ 8983 int vector_len = 0; 8984 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8985 %} 8986 ins_pipe( pipe_slow ); 8987 %} 8988 8989 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8990 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8991 match(Set dst (LShiftVS dst shift)); 8992 effect(TEMP src); 8993 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8994 ins_encode %{ 8995 int vector_len = 0; 8996 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8997 %} 8998 ins_pipe( pipe_slow ); 8999 %} 9000 9001 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9002 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9003 match(Set dst (LShiftVS src shift)); 9004 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9005 ins_encode %{ 9006 int vector_len = 1; 9007 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9008 %} 9009 ins_pipe( pipe_slow ); 9010 %} 9011 9012 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9013 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9014 match(Set dst (LShiftVS src shift)); 9015 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9016 ins_encode %{ 9017 int vector_len = 1; 9018 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9019 %} 9020 ins_pipe( pipe_slow ); 9021 %} 9022 9023 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9024 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9025 match(Set dst (LShiftVS dst shift)); 9026 effect(TEMP src); 9027 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9028 ins_encode %{ 9029 int vector_len = 1; 9030 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9031 %} 9032 ins_pipe( pipe_slow ); 9033 %} 9034 9035 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9036 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9037 match(Set dst (LShiftVS src shift)); 9038 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9039 ins_encode %{ 9040 int vector_len = 1; 9041 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9042 %} 9043 ins_pipe( pipe_slow ); 9044 %} 9045 9046 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9047 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9048 match(Set dst (LShiftVS src shift)); 9049 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9050 ins_encode %{ 9051 int vector_len = 1; 9052 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9053 %} 9054 ins_pipe( pipe_slow ); 9055 %} 9056 9057 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9058 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9059 match(Set dst (LShiftVS dst shift)); 9060 effect(TEMP src); 9061 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9062 ins_encode %{ 9063 int vector_len = 1; 9064 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9065 %} 9066 ins_pipe( pipe_slow ); 9067 %} 9068 9069 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9070 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9071 match(Set dst (LShiftVS src shift)); 9072 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9073 ins_encode %{ 9074 int vector_len = 2; 9075 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9076 %} 9077 ins_pipe( pipe_slow ); 9078 %} 9079 9080 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9081 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9082 match(Set dst (LShiftVS src shift)); 9083 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9084 ins_encode %{ 9085 int vector_len = 2; 9086 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9087 %} 9088 ins_pipe( pipe_slow ); 9089 %} 9090 9091 // Integers vector left shift 9092 instruct vsll2I(vecD dst, vecS shift) %{ 9093 predicate(n->as_Vector()->length() == 2); 9094 match(Set dst (LShiftVI dst shift)); 9095 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9096 ins_encode %{ 9097 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9098 %} 9099 ins_pipe( pipe_slow ); 9100 %} 9101 9102 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9103 predicate(n->as_Vector()->length() == 2); 9104 match(Set dst (LShiftVI dst shift)); 9105 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9106 ins_encode %{ 9107 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9108 %} 9109 ins_pipe( pipe_slow ); 9110 %} 9111 9112 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9113 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9114 match(Set dst (LShiftVI src shift)); 9115 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9116 ins_encode %{ 9117 int vector_len = 0; 9118 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9119 %} 9120 ins_pipe( pipe_slow ); 9121 %} 9122 9123 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9124 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9125 match(Set dst (LShiftVI src shift)); 9126 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9127 ins_encode %{ 9128 int vector_len = 0; 9129 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9130 %} 9131 ins_pipe( pipe_slow ); 9132 %} 9133 9134 instruct vsll4I(vecX dst, vecS shift) %{ 9135 predicate(n->as_Vector()->length() == 4); 9136 match(Set dst (LShiftVI dst shift)); 9137 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9138 ins_encode %{ 9139 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9140 %} 9141 ins_pipe( pipe_slow ); 9142 %} 9143 9144 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9145 predicate(n->as_Vector()->length() == 4); 9146 match(Set dst (LShiftVI dst shift)); 9147 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9148 ins_encode %{ 9149 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9150 %} 9151 ins_pipe( pipe_slow ); 9152 %} 9153 9154 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9155 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9156 match(Set dst (LShiftVI src shift)); 9157 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9158 ins_encode %{ 9159 int vector_len = 0; 9160 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9161 %} 9162 ins_pipe( pipe_slow ); 9163 %} 9164 9165 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9166 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9167 match(Set dst (LShiftVI src shift)); 9168 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9169 ins_encode %{ 9170 int vector_len = 0; 9171 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9172 %} 9173 ins_pipe( pipe_slow ); 9174 %} 9175 9176 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9177 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9178 match(Set dst (LShiftVI src shift)); 9179 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9180 ins_encode %{ 9181 int vector_len = 1; 9182 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9183 %} 9184 ins_pipe( pipe_slow ); 9185 %} 9186 9187 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9188 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9189 match(Set dst (LShiftVI src shift)); 9190 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9191 ins_encode %{ 9192 int vector_len = 1; 9193 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9194 %} 9195 ins_pipe( pipe_slow ); 9196 %} 9197 9198 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9199 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9200 match(Set dst (LShiftVI src shift)); 9201 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9202 ins_encode %{ 9203 int vector_len = 2; 9204 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9205 %} 9206 ins_pipe( pipe_slow ); 9207 %} 9208 9209 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9210 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9211 match(Set dst (LShiftVI src shift)); 9212 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9213 ins_encode %{ 9214 int vector_len = 2; 9215 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9216 %} 9217 ins_pipe( pipe_slow ); 9218 %} 9219 9220 // Longs vector left shift 9221 instruct vsll2L(vecX dst, vecS shift) %{ 9222 predicate(n->as_Vector()->length() == 2); 9223 match(Set dst (LShiftVL dst shift)); 9224 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9225 ins_encode %{ 9226 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9227 %} 9228 ins_pipe( pipe_slow ); 9229 %} 9230 9231 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9232 predicate(n->as_Vector()->length() == 2); 9233 match(Set dst (LShiftVL dst shift)); 9234 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9235 ins_encode %{ 9236 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9237 %} 9238 ins_pipe( pipe_slow ); 9239 %} 9240 9241 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9242 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9243 match(Set dst (LShiftVL src shift)); 9244 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9245 ins_encode %{ 9246 int vector_len = 0; 9247 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9248 %} 9249 ins_pipe( pipe_slow ); 9250 %} 9251 9252 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9253 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9254 match(Set dst (LShiftVL src shift)); 9255 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9256 ins_encode %{ 9257 int vector_len = 0; 9258 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9259 %} 9260 ins_pipe( pipe_slow ); 9261 %} 9262 9263 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9264 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9265 match(Set dst (LShiftVL src shift)); 9266 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9267 ins_encode %{ 9268 int vector_len = 1; 9269 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9270 %} 9271 ins_pipe( pipe_slow ); 9272 %} 9273 9274 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9275 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9276 match(Set dst (LShiftVL src shift)); 9277 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9278 ins_encode %{ 9279 int vector_len = 1; 9280 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9281 %} 9282 ins_pipe( pipe_slow ); 9283 %} 9284 9285 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9286 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9287 match(Set dst (LShiftVL src shift)); 9288 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9289 ins_encode %{ 9290 int vector_len = 2; 9291 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9292 %} 9293 ins_pipe( pipe_slow ); 9294 %} 9295 9296 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9297 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9298 match(Set dst (LShiftVL src shift)); 9299 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9300 ins_encode %{ 9301 int vector_len = 2; 9302 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9303 %} 9304 ins_pipe( pipe_slow ); 9305 %} 9306 9307 // ----------------------- LogicalRightShift ----------------------------------- 9308 9309 // Shorts vector logical right shift produces incorrect Java result 9310 // for negative data because java code convert short value into int with 9311 // sign extension before a shift. But char vectors are fine since chars are 9312 // unsigned values. 9313 9314 instruct vsrl2S(vecS dst, vecS shift) %{ 9315 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9316 match(Set dst (URShiftVS dst shift)); 9317 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9318 ins_encode %{ 9319 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9320 %} 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9325 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9326 match(Set dst (URShiftVS dst shift)); 9327 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9328 ins_encode %{ 9329 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9330 %} 9331 ins_pipe( pipe_slow ); 9332 %} 9333 9334 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9335 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 9336 match(Set dst (URShiftVS src shift)); 9337 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9338 ins_encode %{ 9339 int vector_len = 0; 9340 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9341 %} 9342 ins_pipe( pipe_slow ); 9343 %} 9344 9345 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9346 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9347 match(Set dst (URShiftVS src shift)); 9348 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9349 ins_encode %{ 9350 int vector_len = 0; 9351 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9352 %} 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9357 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9358 match(Set dst (URShiftVS dst shift)); 9359 effect(TEMP src); 9360 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9361 ins_encode %{ 9362 int vector_len = 0; 9363 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9364 %} 9365 ins_pipe( pipe_slow ); 9366 %} 9367 9368 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9369 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 9370 match(Set dst (URShiftVS src shift)); 9371 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9372 ins_encode %{ 9373 int vector_len = 0; 9374 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9375 %} 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9380 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9381 match(Set dst (URShiftVS src shift)); 9382 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9383 ins_encode %{ 9384 int vector_len = 0; 9385 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9386 %} 9387 ins_pipe( pipe_slow ); 9388 %} 9389 9390 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9391 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9392 match(Set dst (URShiftVS dst shift)); 9393 effect(TEMP src); 9394 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9395 ins_encode %{ 9396 int vector_len = 0; 9397 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9398 %} 9399 ins_pipe( pipe_slow ); 9400 %} 9401 9402 instruct vsrl4S(vecD dst, vecS shift) %{ 9403 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9404 match(Set dst (URShiftVS dst shift)); 9405 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9406 ins_encode %{ 9407 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9408 %} 9409 ins_pipe( pipe_slow ); 9410 %} 9411 9412 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9413 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9414 match(Set dst (URShiftVS dst shift)); 9415 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9416 ins_encode %{ 9417 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9418 %} 9419 ins_pipe( pipe_slow ); 9420 %} 9421 9422 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9423 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 9424 match(Set dst (URShiftVS src shift)); 9425 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9426 ins_encode %{ 9427 int vector_len = 0; 9428 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9429 %} 9430 ins_pipe( pipe_slow ); 9431 %} 9432 9433 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9434 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9435 match(Set dst (URShiftVS src shift)); 9436 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9437 ins_encode %{ 9438 int vector_len = 0; 9439 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9440 %} 9441 ins_pipe( pipe_slow ); 9442 %} 9443 9444 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9445 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9446 match(Set dst (URShiftVS dst shift)); 9447 effect(TEMP src); 9448 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9449 ins_encode %{ 9450 int vector_len = 0; 9451 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9452 %} 9453 ins_pipe( pipe_slow ); 9454 %} 9455 9456 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9457 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 9458 match(Set dst (URShiftVS src shift)); 9459 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9460 ins_encode %{ 9461 int vector_len = 0; 9462 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9463 %} 9464 ins_pipe( pipe_slow ); 9465 %} 9466 9467 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9468 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9469 match(Set dst (URShiftVS src shift)); 9470 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9471 ins_encode %{ 9472 int vector_len = 0; 9473 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9474 %} 9475 ins_pipe( pipe_slow ); 9476 %} 9477 9478 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9479 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9480 match(Set dst (URShiftVS dst shift)); 9481 effect(TEMP src); 9482 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9483 ins_encode %{ 9484 int vector_len = 0; 9485 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9486 %} 9487 ins_pipe( pipe_slow ); 9488 %} 9489 9490 instruct vsrl8S(vecX dst, vecS shift) %{ 9491 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9492 match(Set dst (URShiftVS dst shift)); 9493 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9494 ins_encode %{ 9495 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9496 %} 9497 ins_pipe( pipe_slow ); 9498 %} 9499 9500 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9501 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9502 match(Set dst (URShiftVS dst shift)); 9503 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9504 ins_encode %{ 9505 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9506 %} 9507 ins_pipe( pipe_slow ); 9508 %} 9509 9510 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9511 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 9512 match(Set dst (URShiftVS src shift)); 9513 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9514 ins_encode %{ 9515 int vector_len = 0; 9516 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9517 %} 9518 ins_pipe( pipe_slow ); 9519 %} 9520 9521 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9522 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9523 match(Set dst (URShiftVS src shift)); 9524 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9525 ins_encode %{ 9526 int vector_len = 0; 9527 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9528 %} 9529 ins_pipe( pipe_slow ); 9530 %} 9531 9532 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9533 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9534 match(Set dst (URShiftVS dst shift)); 9535 effect(TEMP src); 9536 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9537 ins_encode %{ 9538 int vector_len = 0; 9539 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9540 %} 9541 ins_pipe( pipe_slow ); 9542 %} 9543 9544 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9545 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 9546 match(Set dst (URShiftVS src shift)); 9547 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9548 ins_encode %{ 9549 int vector_len = 0; 9550 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9551 %} 9552 ins_pipe( pipe_slow ); 9553 %} 9554 9555 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9556 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9557 match(Set dst (URShiftVS src shift)); 9558 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9559 ins_encode %{ 9560 int vector_len = 0; 9561 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9562 %} 9563 ins_pipe( pipe_slow ); 9564 %} 9565 9566 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9567 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9568 match(Set dst (URShiftVS dst shift)); 9569 effect(TEMP src); 9570 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9571 ins_encode %{ 9572 int vector_len = 0; 9573 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9574 %} 9575 ins_pipe( pipe_slow ); 9576 %} 9577 9578 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9579 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9580 match(Set dst (URShiftVS src shift)); 9581 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9582 ins_encode %{ 9583 int vector_len = 1; 9584 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9585 %} 9586 ins_pipe( pipe_slow ); 9587 %} 9588 9589 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9590 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9591 match(Set dst (URShiftVS src shift)); 9592 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9593 ins_encode %{ 9594 int vector_len = 1; 9595 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9596 %} 9597 ins_pipe( pipe_slow ); 9598 %} 9599 9600 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9601 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9602 match(Set dst (URShiftVS dst shift)); 9603 effect(TEMP src); 9604 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9605 ins_encode %{ 9606 int vector_len = 1; 9607 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9608 %} 9609 ins_pipe( pipe_slow ); 9610 %} 9611 9612 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9613 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9614 match(Set dst (URShiftVS src shift)); 9615 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9616 ins_encode %{ 9617 int vector_len = 1; 9618 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9619 %} 9620 ins_pipe( pipe_slow ); 9621 %} 9622 9623 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9624 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9625 match(Set dst (URShiftVS src shift)); 9626 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9627 ins_encode %{ 9628 int vector_len = 1; 9629 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9630 %} 9631 ins_pipe( pipe_slow ); 9632 %} 9633 9634 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9635 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9636 match(Set dst (URShiftVS dst shift)); 9637 effect(TEMP src); 9638 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9639 ins_encode %{ 9640 int vector_len = 1; 9641 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9642 %} 9643 ins_pipe( pipe_slow ); 9644 %} 9645 9646 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9647 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9648 match(Set dst (URShiftVS src shift)); 9649 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9650 ins_encode %{ 9651 int vector_len = 2; 9652 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9653 %} 9654 ins_pipe( pipe_slow ); 9655 %} 9656 9657 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9658 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9659 match(Set dst (URShiftVS src shift)); 9660 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9661 ins_encode %{ 9662 int vector_len = 2; 9663 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9664 %} 9665 ins_pipe( pipe_slow ); 9666 %} 9667 9668 // Integers vector logical right shift 9669 instruct vsrl2I(vecD dst, vecS shift) %{ 9670 predicate(n->as_Vector()->length() == 2); 9671 match(Set dst (URShiftVI dst shift)); 9672 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9673 ins_encode %{ 9674 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9675 %} 9676 ins_pipe( pipe_slow ); 9677 %} 9678 9679 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9680 predicate(n->as_Vector()->length() == 2); 9681 match(Set dst (URShiftVI dst shift)); 9682 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9683 ins_encode %{ 9684 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9685 %} 9686 ins_pipe( pipe_slow ); 9687 %} 9688 9689 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9690 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9691 match(Set dst (URShiftVI src shift)); 9692 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9693 ins_encode %{ 9694 int vector_len = 0; 9695 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9696 %} 9697 ins_pipe( pipe_slow ); 9698 %} 9699 9700 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9701 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9702 match(Set dst (URShiftVI src shift)); 9703 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9704 ins_encode %{ 9705 int vector_len = 0; 9706 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9707 %} 9708 ins_pipe( pipe_slow ); 9709 %} 9710 9711 instruct vsrl4I(vecX dst, vecS shift) %{ 9712 predicate(n->as_Vector()->length() == 4); 9713 match(Set dst (URShiftVI dst shift)); 9714 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9715 ins_encode %{ 9716 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9717 %} 9718 ins_pipe( pipe_slow ); 9719 %} 9720 9721 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9722 predicate(n->as_Vector()->length() == 4); 9723 match(Set dst (URShiftVI dst shift)); 9724 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9725 ins_encode %{ 9726 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9727 %} 9728 ins_pipe( pipe_slow ); 9729 %} 9730 9731 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9732 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9733 match(Set dst (URShiftVI src shift)); 9734 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9735 ins_encode %{ 9736 int vector_len = 0; 9737 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9738 %} 9739 ins_pipe( pipe_slow ); 9740 %} 9741 9742 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9743 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9744 match(Set dst (URShiftVI src shift)); 9745 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9746 ins_encode %{ 9747 int vector_len = 0; 9748 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9749 %} 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9754 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9755 match(Set dst (URShiftVI src shift)); 9756 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9757 ins_encode %{ 9758 int vector_len = 1; 9759 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9760 %} 9761 ins_pipe( pipe_slow ); 9762 %} 9763 9764 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9765 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9766 match(Set dst (URShiftVI src shift)); 9767 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9768 ins_encode %{ 9769 int vector_len = 1; 9770 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9771 %} 9772 ins_pipe( pipe_slow ); 9773 %} 9774 9775 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9776 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9777 match(Set dst (URShiftVI src shift)); 9778 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9779 ins_encode %{ 9780 int vector_len = 2; 9781 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9782 %} 9783 ins_pipe( pipe_slow ); 9784 %} 9785 9786 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9787 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9788 match(Set dst (URShiftVI src shift)); 9789 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9790 ins_encode %{ 9791 int vector_len = 2; 9792 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 // Longs vector logical right shift 9798 instruct vsrl2L(vecX dst, vecS shift) %{ 9799 predicate(n->as_Vector()->length() == 2); 9800 match(Set dst (URShiftVL dst shift)); 9801 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9802 ins_encode %{ 9803 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9804 %} 9805 ins_pipe( pipe_slow ); 9806 %} 9807 9808 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9809 predicate(n->as_Vector()->length() == 2); 9810 match(Set dst (URShiftVL dst shift)); 9811 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9812 ins_encode %{ 9813 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9814 %} 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9819 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9820 match(Set dst (URShiftVL src shift)); 9821 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9822 ins_encode %{ 9823 int vector_len = 0; 9824 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9825 %} 9826 ins_pipe( pipe_slow ); 9827 %} 9828 9829 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9830 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9831 match(Set dst (URShiftVL src shift)); 9832 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9833 ins_encode %{ 9834 int vector_len = 0; 9835 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9836 %} 9837 ins_pipe( pipe_slow ); 9838 %} 9839 9840 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9841 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9842 match(Set dst (URShiftVL src shift)); 9843 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9844 ins_encode %{ 9845 int vector_len = 1; 9846 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9847 %} 9848 ins_pipe( pipe_slow ); 9849 %} 9850 9851 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9852 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9853 match(Set dst (URShiftVL src shift)); 9854 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9855 ins_encode %{ 9856 int vector_len = 1; 9857 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9858 %} 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9863 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9864 match(Set dst (URShiftVL src shift)); 9865 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9866 ins_encode %{ 9867 int vector_len = 2; 9868 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9869 %} 9870 ins_pipe( pipe_slow ); 9871 %} 9872 9873 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9874 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9875 match(Set dst (URShiftVL src shift)); 9876 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9877 ins_encode %{ 9878 int vector_len = 2; 9879 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9880 %} 9881 ins_pipe( pipe_slow ); 9882 %} 9883 9884 // ------------------- ArithmeticRightShift ----------------------------------- 9885 9886 // Shorts/Chars vector arithmetic right shift 9887 instruct vsra2S(vecS dst, vecS shift) %{ 9888 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9889 match(Set dst (RShiftVS dst shift)); 9890 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9891 ins_encode %{ 9892 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9893 %} 9894 ins_pipe( pipe_slow ); 9895 %} 9896 9897 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9898 predicate(n->as_Vector()->length() == 2); 9899 match(Set dst (RShiftVS dst shift)); 9900 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9901 ins_encode %{ 9902 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9903 %} 9904 ins_pipe( pipe_slow ); 9905 %} 9906 9907 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9908 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 9909 match(Set dst (RShiftVS src shift)); 9910 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9911 ins_encode %{ 9912 int vector_len = 0; 9913 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9914 %} 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9919 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9920 match(Set dst (RShiftVS src shift)); 9921 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9922 ins_encode %{ 9923 int vector_len = 0; 9924 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9925 %} 9926 ins_pipe( pipe_slow ); 9927 %} 9928 9929 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9930 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9931 match(Set dst (RShiftVS dst shift)); 9932 effect(TEMP src); 9933 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9934 ins_encode %{ 9935 int vector_len = 0; 9936 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9937 %} 9938 ins_pipe( pipe_slow ); 9939 %} 9940 9941 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9942 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 9943 match(Set dst (RShiftVS src shift)); 9944 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9945 ins_encode %{ 9946 int vector_len = 0; 9947 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9948 %} 9949 ins_pipe( pipe_slow ); 9950 %} 9951 9952 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9953 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9954 match(Set dst (RShiftVS src shift)); 9955 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9956 ins_encode %{ 9957 int vector_len = 0; 9958 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9959 %} 9960 ins_pipe( pipe_slow ); 9961 %} 9962 9963 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9964 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9965 match(Set dst (RShiftVS dst shift)); 9966 effect(TEMP src); 9967 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9968 ins_encode %{ 9969 int vector_len = 0; 9970 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9971 %} 9972 ins_pipe( pipe_slow ); 9973 %} 9974 9975 instruct vsra4S(vecD dst, vecS shift) %{ 9976 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9977 match(Set dst (RShiftVS dst shift)); 9978 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9979 ins_encode %{ 9980 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9981 %} 9982 ins_pipe( pipe_slow ); 9983 %} 9984 9985 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9986 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9987 match(Set dst (RShiftVS dst shift)); 9988 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9989 ins_encode %{ 9990 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9991 %} 9992 ins_pipe( pipe_slow ); 9993 %} 9994 9995 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9996 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 9997 match(Set dst (RShiftVS src shift)); 9998 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9999 ins_encode %{ 10000 int vector_len = 0; 10001 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10002 %} 10003 ins_pipe( pipe_slow ); 10004 %} 10005 10006 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10007 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10008 match(Set dst (RShiftVS src shift)); 10009 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10010 ins_encode %{ 10011 int vector_len = 0; 10012 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10013 %} 10014 ins_pipe( pipe_slow ); 10015 %} 10016 10017 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10018 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10019 match(Set dst (RShiftVS dst shift)); 10020 effect(TEMP src); 10021 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10022 ins_encode %{ 10023 int vector_len = 0; 10024 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10025 %} 10026 ins_pipe( pipe_slow ); 10027 %} 10028 10029 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10030 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 10031 match(Set dst (RShiftVS src shift)); 10032 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10033 ins_encode %{ 10034 int vector_len = 0; 10035 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10036 %} 10037 ins_pipe( pipe_slow ); 10038 %} 10039 10040 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10041 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10042 match(Set dst (RShiftVS src shift)); 10043 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10044 ins_encode %{ 10045 int vector_len = 0; 10046 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10047 %} 10048 ins_pipe( pipe_slow ); 10049 %} 10050 10051 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10052 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10053 match(Set dst (RShiftVS dst shift)); 10054 effect(TEMP src); 10055 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10056 ins_encode %{ 10057 int vector_len = 0; 10058 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10059 %} 10060 ins_pipe( pipe_slow ); 10061 %} 10062 10063 instruct vsra8S(vecX dst, vecS shift) %{ 10064 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10065 match(Set dst (RShiftVS dst shift)); 10066 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10067 ins_encode %{ 10068 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10069 %} 10070 ins_pipe( pipe_slow ); 10071 %} 10072 10073 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10074 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10075 match(Set dst (RShiftVS dst shift)); 10076 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10077 ins_encode %{ 10078 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10079 %} 10080 ins_pipe( pipe_slow ); 10081 %} 10082 10083 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10084 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 10085 match(Set dst (RShiftVS src shift)); 10086 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10087 ins_encode %{ 10088 int vector_len = 0; 10089 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10090 %} 10091 ins_pipe( pipe_slow ); 10092 %} 10093 10094 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10095 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10096 match(Set dst (RShiftVS src shift)); 10097 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10098 ins_encode %{ 10099 int vector_len = 0; 10100 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10101 %} 10102 ins_pipe( pipe_slow ); 10103 %} 10104 10105 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10106 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10107 match(Set dst (RShiftVS dst shift)); 10108 effect(TEMP src); 10109 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10110 ins_encode %{ 10111 int vector_len = 0; 10112 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10113 %} 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10118 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 10119 match(Set dst (RShiftVS src shift)); 10120 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10121 ins_encode %{ 10122 int vector_len = 0; 10123 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10124 %} 10125 ins_pipe( pipe_slow ); 10126 %} 10127 10128 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10129 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10130 match(Set dst (RShiftVS src shift)); 10131 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10132 ins_encode %{ 10133 int vector_len = 0; 10134 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10135 %} 10136 ins_pipe( pipe_slow ); 10137 %} 10138 10139 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10140 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10141 match(Set dst (RShiftVS dst shift)); 10142 effect(TEMP src); 10143 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10144 ins_encode %{ 10145 int vector_len = 0; 10146 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10147 %} 10148 ins_pipe( pipe_slow ); 10149 %} 10150 10151 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10152 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10153 match(Set dst (RShiftVS src shift)); 10154 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10155 ins_encode %{ 10156 int vector_len = 1; 10157 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10158 %} 10159 ins_pipe( pipe_slow ); 10160 %} 10161 10162 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10163 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10164 match(Set dst (RShiftVS src shift)); 10165 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10166 ins_encode %{ 10167 int vector_len = 1; 10168 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10169 %} 10170 ins_pipe( pipe_slow ); 10171 %} 10172 10173 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10174 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10175 match(Set dst (RShiftVS dst shift)); 10176 effect(TEMP src); 10177 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10178 ins_encode %{ 10179 int vector_len = 1; 10180 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10181 %} 10182 ins_pipe( pipe_slow ); 10183 %} 10184 10185 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10186 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10187 match(Set dst (RShiftVS src shift)); 10188 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10189 ins_encode %{ 10190 int vector_len = 1; 10191 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10192 %} 10193 ins_pipe( pipe_slow ); 10194 %} 10195 10196 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10197 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10198 match(Set dst (RShiftVS src shift)); 10199 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10200 ins_encode %{ 10201 int vector_len = 1; 10202 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10203 %} 10204 ins_pipe( pipe_slow ); 10205 %} 10206 10207 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10208 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10209 match(Set dst (RShiftVS dst shift)); 10210 effect(TEMP src); 10211 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10212 ins_encode %{ 10213 int vector_len = 1; 10214 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10215 %} 10216 ins_pipe( pipe_slow ); 10217 %} 10218 10219 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10220 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10221 match(Set dst (RShiftVS src shift)); 10222 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10223 ins_encode %{ 10224 int vector_len = 2; 10225 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10226 %} 10227 ins_pipe( pipe_slow ); 10228 %} 10229 10230 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10231 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10232 match(Set dst (RShiftVS src shift)); 10233 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10234 ins_encode %{ 10235 int vector_len = 2; 10236 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10237 %} 10238 ins_pipe( pipe_slow ); 10239 %} 10240 10241 // Integers vector arithmetic right shift 10242 instruct vsra2I(vecD dst, vecS shift) %{ 10243 predicate(n->as_Vector()->length() == 2); 10244 match(Set dst (RShiftVI dst shift)); 10245 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10246 ins_encode %{ 10247 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10248 %} 10249 ins_pipe( pipe_slow ); 10250 %} 10251 10252 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10253 predicate(n->as_Vector()->length() == 2); 10254 match(Set dst (RShiftVI dst shift)); 10255 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10256 ins_encode %{ 10257 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10258 %} 10259 ins_pipe( pipe_slow ); 10260 %} 10261 10262 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10263 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10264 match(Set dst (RShiftVI src shift)); 10265 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10266 ins_encode %{ 10267 int vector_len = 0; 10268 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10269 %} 10270 ins_pipe( pipe_slow ); 10271 %} 10272 10273 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10274 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10275 match(Set dst (RShiftVI src shift)); 10276 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10277 ins_encode %{ 10278 int vector_len = 0; 10279 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10280 %} 10281 ins_pipe( pipe_slow ); 10282 %} 10283 10284 instruct vsra4I(vecX dst, vecS shift) %{ 10285 predicate(n->as_Vector()->length() == 4); 10286 match(Set dst (RShiftVI dst shift)); 10287 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10288 ins_encode %{ 10289 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10290 %} 10291 ins_pipe( pipe_slow ); 10292 %} 10293 10294 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10295 predicate(n->as_Vector()->length() == 4); 10296 match(Set dst (RShiftVI dst shift)); 10297 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10298 ins_encode %{ 10299 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10300 %} 10301 ins_pipe( pipe_slow ); 10302 %} 10303 10304 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10305 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10306 match(Set dst (RShiftVI src shift)); 10307 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10308 ins_encode %{ 10309 int vector_len = 0; 10310 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10311 %} 10312 ins_pipe( pipe_slow ); 10313 %} 10314 10315 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10316 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10317 match(Set dst (RShiftVI src shift)); 10318 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10319 ins_encode %{ 10320 int vector_len = 0; 10321 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10322 %} 10323 ins_pipe( pipe_slow ); 10324 %} 10325 10326 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10327 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10328 match(Set dst (RShiftVI src shift)); 10329 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10330 ins_encode %{ 10331 int vector_len = 1; 10332 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10333 %} 10334 ins_pipe( pipe_slow ); 10335 %} 10336 10337 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10338 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10339 match(Set dst (RShiftVI src shift)); 10340 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10341 ins_encode %{ 10342 int vector_len = 1; 10343 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10344 %} 10345 ins_pipe( pipe_slow ); 10346 %} 10347 10348 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10349 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10350 match(Set dst (RShiftVI src shift)); 10351 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10352 ins_encode %{ 10353 int vector_len = 2; 10354 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10355 %} 10356 ins_pipe( pipe_slow ); 10357 %} 10358 10359 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10360 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10361 match(Set dst (RShiftVI src shift)); 10362 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10363 ins_encode %{ 10364 int vector_len = 2; 10365 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10366 %} 10367 ins_pipe( pipe_slow ); 10368 %} 10369 10370 // There are no longs vector arithmetic right shift instructions. 10371 10372 10373 // --------------------------------- AND -------------------------------------- 10374 10375 instruct vand4B(vecS dst, vecS src) %{ 10376 predicate(n->as_Vector()->length_in_bytes() == 4); 10377 match(Set dst (AndV dst src)); 10378 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10379 ins_encode %{ 10380 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10381 %} 10382 ins_pipe( pipe_slow ); 10383 %} 10384 10385 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10386 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10387 match(Set dst (AndV src1 src2)); 10388 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10389 ins_encode %{ 10390 int vector_len = 0; 10391 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10392 %} 10393 ins_pipe( pipe_slow ); 10394 %} 10395 10396 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10397 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10398 match(Set dst (AndV src (LoadVector mem))); 10399 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10400 ins_encode %{ 10401 int vector_len = 0; 10402 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10403 %} 10404 ins_pipe( pipe_slow ); 10405 %} 10406 10407 instruct vand8B(vecD dst, vecD src) %{ 10408 predicate(n->as_Vector()->length_in_bytes() == 8); 10409 match(Set dst (AndV dst src)); 10410 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10411 ins_encode %{ 10412 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10413 %} 10414 ins_pipe( pipe_slow ); 10415 %} 10416 10417 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10418 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10419 match(Set dst (AndV src1 src2)); 10420 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10421 ins_encode %{ 10422 int vector_len = 0; 10423 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10424 %} 10425 ins_pipe( pipe_slow ); 10426 %} 10427 10428 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10429 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10430 match(Set dst (AndV src (LoadVector mem))); 10431 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10432 ins_encode %{ 10433 int vector_len = 0; 10434 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10435 %} 10436 ins_pipe( pipe_slow ); 10437 %} 10438 10439 instruct vand16B(vecX dst, vecX src) %{ 10440 predicate(n->as_Vector()->length_in_bytes() == 16); 10441 match(Set dst (AndV dst src)); 10442 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10443 ins_encode %{ 10444 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10445 %} 10446 ins_pipe( pipe_slow ); 10447 %} 10448 10449 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10450 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10451 match(Set dst (AndV src1 src2)); 10452 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10453 ins_encode %{ 10454 int vector_len = 0; 10455 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10456 %} 10457 ins_pipe( pipe_slow ); 10458 %} 10459 10460 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10461 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10462 match(Set dst (AndV src (LoadVector mem))); 10463 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10464 ins_encode %{ 10465 int vector_len = 0; 10466 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10467 %} 10468 ins_pipe( pipe_slow ); 10469 %} 10470 10471 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10472 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10473 match(Set dst (AndV src1 src2)); 10474 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10475 ins_encode %{ 10476 int vector_len = 1; 10477 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10478 %} 10479 ins_pipe( pipe_slow ); 10480 %} 10481 10482 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10483 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10484 match(Set dst (AndV src (LoadVector mem))); 10485 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10486 ins_encode %{ 10487 int vector_len = 1; 10488 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10489 %} 10490 ins_pipe( pipe_slow ); 10491 %} 10492 10493 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10494 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10495 match(Set dst (AndV src1 src2)); 10496 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10497 ins_encode %{ 10498 int vector_len = 2; 10499 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10500 %} 10501 ins_pipe( pipe_slow ); 10502 %} 10503 10504 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10505 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10506 match(Set dst (AndV src (LoadVector mem))); 10507 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10508 ins_encode %{ 10509 int vector_len = 2; 10510 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10511 %} 10512 ins_pipe( pipe_slow ); 10513 %} 10514 10515 // --------------------------------- OR --------------------------------------- 10516 10517 instruct vor4B(vecS dst, vecS src) %{ 10518 predicate(n->as_Vector()->length_in_bytes() == 4); 10519 match(Set dst (OrV dst src)); 10520 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10521 ins_encode %{ 10522 __ por($dst$$XMMRegister, $src$$XMMRegister); 10523 %} 10524 ins_pipe( pipe_slow ); 10525 %} 10526 10527 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10528 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10529 match(Set dst (OrV src1 src2)); 10530 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10531 ins_encode %{ 10532 int vector_len = 0; 10533 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10534 %} 10535 ins_pipe( pipe_slow ); 10536 %} 10537 10538 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10539 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10540 match(Set dst (OrV src (LoadVector mem))); 10541 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10542 ins_encode %{ 10543 int vector_len = 0; 10544 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10545 %} 10546 ins_pipe( pipe_slow ); 10547 %} 10548 10549 instruct vor8B(vecD dst, vecD src) %{ 10550 predicate(n->as_Vector()->length_in_bytes() == 8); 10551 match(Set dst (OrV dst src)); 10552 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10553 ins_encode %{ 10554 __ por($dst$$XMMRegister, $src$$XMMRegister); 10555 %} 10556 ins_pipe( pipe_slow ); 10557 %} 10558 10559 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10560 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10561 match(Set dst (OrV src1 src2)); 10562 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10563 ins_encode %{ 10564 int vector_len = 0; 10565 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10566 %} 10567 ins_pipe( pipe_slow ); 10568 %} 10569 10570 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10571 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10572 match(Set dst (OrV src (LoadVector mem))); 10573 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10574 ins_encode %{ 10575 int vector_len = 0; 10576 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10577 %} 10578 ins_pipe( pipe_slow ); 10579 %} 10580 10581 instruct vor16B(vecX dst, vecX src) %{ 10582 predicate(n->as_Vector()->length_in_bytes() == 16); 10583 match(Set dst (OrV dst src)); 10584 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10585 ins_encode %{ 10586 __ por($dst$$XMMRegister, $src$$XMMRegister); 10587 %} 10588 ins_pipe( pipe_slow ); 10589 %} 10590 10591 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10592 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10593 match(Set dst (OrV src1 src2)); 10594 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10595 ins_encode %{ 10596 int vector_len = 0; 10597 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10598 %} 10599 ins_pipe( pipe_slow ); 10600 %} 10601 10602 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10603 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10604 match(Set dst (OrV src (LoadVector mem))); 10605 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10606 ins_encode %{ 10607 int vector_len = 0; 10608 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10609 %} 10610 ins_pipe( pipe_slow ); 10611 %} 10612 10613 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10614 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10615 match(Set dst (OrV src1 src2)); 10616 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10617 ins_encode %{ 10618 int vector_len = 1; 10619 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10620 %} 10621 ins_pipe( pipe_slow ); 10622 %} 10623 10624 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10625 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10626 match(Set dst (OrV src (LoadVector mem))); 10627 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10628 ins_encode %{ 10629 int vector_len = 1; 10630 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10631 %} 10632 ins_pipe( pipe_slow ); 10633 %} 10634 10635 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10636 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10637 match(Set dst (OrV src1 src2)); 10638 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10639 ins_encode %{ 10640 int vector_len = 2; 10641 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10642 %} 10643 ins_pipe( pipe_slow ); 10644 %} 10645 10646 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10647 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10648 match(Set dst (OrV src (LoadVector mem))); 10649 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10650 ins_encode %{ 10651 int vector_len = 2; 10652 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10653 %} 10654 ins_pipe( pipe_slow ); 10655 %} 10656 10657 // --------------------------------- XOR -------------------------------------- 10658 10659 instruct vxor4B(vecS dst, vecS src) %{ 10660 predicate(n->as_Vector()->length_in_bytes() == 4); 10661 match(Set dst (XorV dst src)); 10662 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10663 ins_encode %{ 10664 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10665 %} 10666 ins_pipe( pipe_slow ); 10667 %} 10668 10669 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10670 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10671 match(Set dst (XorV src1 src2)); 10672 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10673 ins_encode %{ 10674 int vector_len = 0; 10675 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10676 %} 10677 ins_pipe( pipe_slow ); 10678 %} 10679 10680 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10681 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10682 match(Set dst (XorV src (LoadVector mem))); 10683 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10684 ins_encode %{ 10685 int vector_len = 0; 10686 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10687 %} 10688 ins_pipe( pipe_slow ); 10689 %} 10690 10691 instruct vxor8B(vecD dst, vecD src) %{ 10692 predicate(n->as_Vector()->length_in_bytes() == 8); 10693 match(Set dst (XorV dst src)); 10694 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10695 ins_encode %{ 10696 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10697 %} 10698 ins_pipe( pipe_slow ); 10699 %} 10700 10701 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10702 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10703 match(Set dst (XorV src1 src2)); 10704 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10705 ins_encode %{ 10706 int vector_len = 0; 10707 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10708 %} 10709 ins_pipe( pipe_slow ); 10710 %} 10711 10712 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10713 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10714 match(Set dst (XorV src (LoadVector mem))); 10715 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10716 ins_encode %{ 10717 int vector_len = 0; 10718 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10719 %} 10720 ins_pipe( pipe_slow ); 10721 %} 10722 10723 instruct vxor16B(vecX dst, vecX src) %{ 10724 predicate(n->as_Vector()->length_in_bytes() == 16); 10725 match(Set dst (XorV dst src)); 10726 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10727 ins_encode %{ 10728 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10729 %} 10730 ins_pipe( pipe_slow ); 10731 %} 10732 10733 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10734 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10735 match(Set dst (XorV src1 src2)); 10736 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10737 ins_encode %{ 10738 int vector_len = 0; 10739 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10740 %} 10741 ins_pipe( pipe_slow ); 10742 %} 10743 10744 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10745 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10746 match(Set dst (XorV src (LoadVector mem))); 10747 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10748 ins_encode %{ 10749 int vector_len = 0; 10750 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10751 %} 10752 ins_pipe( pipe_slow ); 10753 %} 10754 10755 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10756 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10757 match(Set dst (XorV src1 src2)); 10758 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10759 ins_encode %{ 10760 int vector_len = 1; 10761 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10762 %} 10763 ins_pipe( pipe_slow ); 10764 %} 10765 10766 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10767 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10768 match(Set dst (XorV src (LoadVector mem))); 10769 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10770 ins_encode %{ 10771 int vector_len = 1; 10772 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10773 %} 10774 ins_pipe( pipe_slow ); 10775 %} 10776 10777 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10778 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10779 match(Set dst (XorV src1 src2)); 10780 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10781 ins_encode %{ 10782 int vector_len = 2; 10783 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10784 %} 10785 ins_pipe( pipe_slow ); 10786 %} 10787 10788 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10789 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10790 match(Set dst (XorV src (LoadVector mem))); 10791 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10792 ins_encode %{ 10793 int vector_len = 2; 10794 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10795 %} 10796 ins_pipe( pipe_slow ); 10797 %} 10798