1 // 2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 %} 1102 1103 1104 //----------SOURCE BLOCK------------------------------------------------------- 1105 // This is a block of C++ code which provides values, functions, and 1106 // definitions necessary in the rest of the architecture description 1107 1108 source_hpp %{ 1109 // Header information of the source block. 1110 // Method declarations/definitions which are used outside 1111 // the ad-scope can conveniently be defined here. 1112 // 1113 // To keep related declarations/definitions/uses close together, 1114 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1115 1116 class NativeJump; 1117 1118 class CallStubImpl { 1119 1120 //-------------------------------------------------------------- 1121 //---< Used for optimization in Compile::shorten_branches >--- 1122 //-------------------------------------------------------------- 1123 1124 public: 1125 // Size of call trampoline stub. 1126 static uint size_call_trampoline() { 1127 return 0; // no call trampolines on this platform 1128 } 1129 1130 // number of relocations needed by a call trampoline stub 1131 static uint reloc_call_trampoline() { 1132 return 0; // no call trampolines on this platform 1133 } 1134 }; 1135 1136 class HandlerImpl { 1137 1138 public: 1139 1140 static int emit_exception_handler(CodeBuffer &cbuf); 1141 static int emit_deopt_handler(CodeBuffer& cbuf); 1142 1143 static uint size_exception_handler() { 1144 // NativeCall instruction size is the same as NativeJump. 1145 // exception handler starts out as jump and can be patched to 1146 // a call be deoptimization. (4932387) 1147 // Note that this value is also credited (in output.cpp) to 1148 // the size of the code section. 1149 return NativeJump::instruction_size; 1150 } 1151 1152 #ifdef _LP64 1153 static uint size_deopt_handler() { 1154 // three 5 byte instructions plus one move for unreachable address. 1155 return 15+3; 1156 } 1157 #else 1158 static uint size_deopt_handler() { 1159 // NativeCall instruction size is the same as NativeJump. 1160 // exception handler starts out as jump and can be patched to 1161 // a call be deoptimization. (4932387) 1162 // Note that this value is also credited (in output.cpp) to 1163 // the size of the code section. 1164 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1165 } 1166 #endif 1167 }; 1168 1169 1170 inline uint vector_length(const Node* n) { 1171 const TypeVect* vt = n->bottom_type()->is_vect(); 1172 return vt->length(); 1173 } 1174 1175 inline uint vector_length(const MachNode* use, MachOper* opnd) { 1176 uint def_idx = use->operand_index(opnd); 1177 Node* def = use->in(def_idx); 1178 return def->bottom_type()->is_vect()->length(); 1179 } 1180 1181 inline uint vector_length_in_bytes(const Node* n) { 1182 const TypeVect* vt = n->bottom_type()->is_vect(); 1183 return vt->length_in_bytes(); 1184 } 1185 1186 inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { 1187 uint def_idx = use->operand_index(opnd); 1188 Node* def = use->in(def_idx); 1189 return def->bottom_type()->is_vect()->length_in_bytes(); 1190 } 1191 1192 inline BasicType vector_element_basic_type(const Node *n) { 1193 return n->bottom_type()->is_vect()->element_basic_type(); 1194 } 1195 1196 inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) { 1197 uint def_idx = use->operand_index(opnd); 1198 Node* def = use->in(def_idx); 1199 return def->bottom_type()->is_vect()->element_basic_type(); 1200 } 1201 1202 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1203 switch(bytes) { 1204 case 4: // fall-through 1205 case 8: // fall-through 1206 case 16: return Assembler::AVX_128bit; 1207 case 32: return Assembler::AVX_256bit; 1208 case 64: return Assembler::AVX_512bit; 1209 1210 default: { 1211 ShouldNotReachHere(); 1212 return Assembler::AVX_NoVec; 1213 } 1214 } 1215 } 1216 1217 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1218 return vector_length_encoding(vector_length_in_bytes(n)); 1219 } 1220 1221 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1222 uint def_idx = use->operand_index(opnd); 1223 Node* def = use->in(def_idx); 1224 return vector_length_encoding(def); 1225 } 1226 1227 %} // end source_hpp 1228 1229 source %{ 1230 1231 #include "opto/addnode.hpp" 1232 1233 // Emit exception handler code. 1234 // Stuff framesize into a register and call a VM stub routine. 1235 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1236 1237 // Note that the code buffer's insts_mark is always relative to insts. 1238 // That's why we must use the macroassembler to generate a handler. 1239 C2_MacroAssembler _masm(&cbuf); 1240 address base = __ start_a_stub(size_exception_handler()); 1241 if (base == NULL) { 1242 ciEnv::current()->record_failure("CodeCache is full"); 1243 return 0; // CodeBuffer::expand failed 1244 } 1245 int offset = __ offset(); 1246 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1247 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1248 __ end_a_stub(); 1249 return offset; 1250 } 1251 1252 // Emit deopt handler code. 1253 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1254 1255 // Note that the code buffer's insts_mark is always relative to insts. 1256 // That's why we must use the macroassembler to generate a handler. 1257 C2_MacroAssembler _masm(&cbuf); 1258 address base = __ start_a_stub(size_deopt_handler()); 1259 if (base == NULL) { 1260 ciEnv::current()->record_failure("CodeCache is full"); 1261 return 0; // CodeBuffer::expand failed 1262 } 1263 int offset = __ offset(); 1264 1265 #ifdef _LP64 1266 address the_pc = (address) __ pc(); 1267 Label next; 1268 // push a "the_pc" on the stack without destroying any registers 1269 // as they all may be live. 1270 1271 // push address of "next" 1272 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1273 __ bind(next); 1274 // adjust it so it matches "the_pc" 1275 __ subptr(Address(rsp, 0), __ offset() - offset); 1276 #else 1277 InternalAddress here(__ pc()); 1278 __ pushptr(here.addr()); 1279 #endif 1280 1281 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1282 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1283 __ end_a_stub(); 1284 return offset; 1285 } 1286 1287 Assembler::Width widthForType(BasicType bt) { 1288 if (bt == T_BYTE) { 1289 return Assembler::B; 1290 } else if (bt == T_SHORT) { 1291 return Assembler::W; 1292 } else if (bt == T_INT) { 1293 return Assembler::D; 1294 } else { 1295 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1296 return Assembler::Q; 1297 } 1298 } 1299 1300 //============================================================================= 1301 1302 // Float masks come from different places depending on platform. 1303 #ifdef _LP64 1304 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1305 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1306 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1307 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1308 #else 1309 static address float_signmask() { return (address)float_signmask_pool; } 1310 static address float_signflip() { return (address)float_signflip_pool; } 1311 static address double_signmask() { return (address)double_signmask_pool; } 1312 static address double_signflip() { return (address)double_signflip_pool; } 1313 #endif 1314 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1315 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1316 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1317 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1318 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1319 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1320 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1321 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1322 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1323 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1324 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1325 1326 //============================================================================= 1327 const bool Matcher::match_rule_supported(int opcode) { 1328 if (!has_match_rule(opcode)) { 1329 return false; // no match rule present 1330 } 1331 switch (opcode) { 1332 case Op_AbsVL: 1333 case Op_StoreVectorScatter: 1334 if (UseAVX < 3) { 1335 return false; 1336 } 1337 break; 1338 case Op_PopCountI: 1339 case Op_PopCountL: 1340 if (!UsePopCountInstruction) { 1341 return false; 1342 } 1343 break; 1344 case Op_PopCountVI: 1345 if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) { 1346 return false; 1347 } 1348 break; 1349 case Op_MulVI: 1350 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1351 return false; 1352 } 1353 break; 1354 case Op_MulVL: 1355 if (UseSSE < 4) { // only with SSE4_1 or AVX 1356 return false; 1357 } 1358 break; 1359 case Op_MulReductionVL: 1360 if (VM_Version::supports_avx512dq() == false) { 1361 return false; 1362 } 1363 break; 1364 case Op_AddReductionVL: 1365 if (UseSSE < 2) { // requires at least SSE2 1366 return false; 1367 } 1368 break; 1369 case Op_AbsVB: 1370 case Op_AbsVS: 1371 case Op_AbsVI: 1372 case Op_AddReductionVI: 1373 case Op_AndReductionV: 1374 case Op_OrReductionV: 1375 case Op_XorReductionV: 1376 if (UseSSE < 3) { // requires at least SSSE3 1377 return false; 1378 } 1379 break; 1380 case Op_VectorLoadShuffle: 1381 case Op_VectorRearrange: 1382 case Op_MulReductionVI: 1383 if (UseSSE < 4) { // requires at least SSE4 1384 return false; 1385 } 1386 break; 1387 case Op_SqrtVD: 1388 case Op_SqrtVF: 1389 case Op_VectorMaskCmp: 1390 case Op_VectorCastB2X: 1391 case Op_VectorCastS2X: 1392 case Op_VectorCastI2X: 1393 case Op_VectorCastL2X: 1394 case Op_VectorCastF2X: 1395 case Op_VectorCastD2X: 1396 if (UseAVX < 1) { // enabled for AVX only 1397 return false; 1398 } 1399 break; 1400 case Op_CompareAndSwapL: 1401 #ifdef _LP64 1402 case Op_CompareAndSwapP: 1403 #endif 1404 if (!VM_Version::supports_cx8()) { 1405 return false; 1406 } 1407 break; 1408 case Op_CMoveVF: 1409 case Op_CMoveVD: 1410 if (UseAVX < 1) { // enabled for AVX only 1411 return false; 1412 } 1413 break; 1414 case Op_StrIndexOf: 1415 if (!UseSSE42Intrinsics) { 1416 return false; 1417 } 1418 break; 1419 case Op_StrIndexOfChar: 1420 if (!UseSSE42Intrinsics) { 1421 return false; 1422 } 1423 break; 1424 case Op_OnSpinWait: 1425 if (VM_Version::supports_on_spin_wait() == false) { 1426 return false; 1427 } 1428 break; 1429 case Op_MulVB: 1430 case Op_LShiftVB: 1431 case Op_RShiftVB: 1432 case Op_URShiftVB: 1433 case Op_VectorInsert: 1434 case Op_VectorLoadMask: 1435 case Op_VectorStoreMask: 1436 case Op_VectorBlend: 1437 if (UseSSE < 4) { 1438 return false; 1439 } 1440 break; 1441 #ifdef _LP64 1442 case Op_MaxD: 1443 case Op_MaxF: 1444 case Op_MinD: 1445 case Op_MinF: 1446 if (UseAVX < 1) { // enabled for AVX only 1447 return false; 1448 } 1449 break; 1450 #endif 1451 case Op_CacheWB: 1452 case Op_CacheWBPreSync: 1453 case Op_CacheWBPostSync: 1454 if (!VM_Version::supports_data_cache_line_flush()) { 1455 return false; 1456 } 1457 break; 1458 case Op_ExtractB: 1459 case Op_ExtractL: 1460 case Op_ExtractI: 1461 case Op_RoundDoubleMode: 1462 if (UseSSE < 4) { 1463 return false; 1464 } 1465 break; 1466 case Op_RoundDoubleModeV: 1467 if (VM_Version::supports_avx() == false) { 1468 return false; // 128bit vroundpd is not available 1469 } 1470 break; 1471 case Op_VLShiftV: 1472 case Op_VRShiftV: 1473 case Op_VURShiftV: 1474 case Op_LoadVectorGather: 1475 if (UseAVX < 2) { 1476 return false; 1477 } 1478 break; 1479 case Op_FmaVD: 1480 case Op_FmaVF: 1481 if (!UseFMA) { 1482 return false; 1483 } 1484 break; 1485 #ifndef _LP64 1486 case Op_AddReductionVF: 1487 case Op_AddReductionVD: 1488 case Op_MulReductionVF: 1489 case Op_MulReductionVD: 1490 if (UseSSE < 1) { // requires at least SSE 1491 return false; 1492 } 1493 break; 1494 case Op_MulAddVS2VI: 1495 case Op_RShiftVL: 1496 case Op_AbsVD: 1497 case Op_NegVD: 1498 if (UseSSE < 2) { 1499 return false; 1500 } 1501 break; 1502 #endif // !LP64 1503 } 1504 return true; // Match rules are supported by default. 1505 } 1506 1507 //------------------------------------------------------------------------ 1508 1509 // Identify extra cases that we might want to provide match rules for vector nodes and 1510 // other intrinsics guarded with vector length (vlen) and element type (bt). 1511 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1512 if (!match_rule_supported(opcode)) { 1513 return false; 1514 } 1515 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1516 // * SSE2 supports 128bit vectors for all types; 1517 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1518 // * AVX2 supports 256bit vectors for all types; 1519 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1520 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1521 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1522 // And MaxVectorSize is taken into account as well. 1523 1524 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1525 if (!vector_size_supported(bt, vlen)) { 1526 return false; 1527 } 1528 // Special cases which require vector length follow: 1529 // * implementation limitations 1530 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1531 // * 128bit vroundpd instruction is present only in AVX1 1532 switch (opcode) { 1533 case Op_AbsVF: 1534 case Op_NegVF: 1535 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1536 return false; // 512bit vandps and vxorps are not available 1537 } 1538 break; 1539 case Op_AbsVD: 1540 case Op_NegVD: 1541 case Op_MulVL: 1542 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1543 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1544 } 1545 break; 1546 case Op_CMoveVF: 1547 if (vlen != 8) { 1548 return false; // implementation limitation (only vcmov8F_reg is present) 1549 } 1550 break; 1551 case Op_CMoveVD: 1552 if (vlen != 4) { 1553 return false; // implementation limitation (only vcmov4D_reg is present) 1554 } 1555 break; 1556 case Op_MaxV: 1557 case Op_MinV: 1558 if (UseSSE < 4 && is_integral_type(bt)) { 1559 return false; 1560 } 1561 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1562 // Float/Double intrinsics are enabled for AVX family currently. 1563 if (UseAVX == 0) { 1564 return false; 1565 } 1566 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1567 return false; 1568 } 1569 } 1570 break; 1571 case Op_AddReductionVI: 1572 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1573 return false; 1574 } 1575 // fallthrough 1576 case Op_AndReductionV: 1577 case Op_OrReductionV: 1578 case Op_XorReductionV: 1579 if (is_subword_type(bt) && (UseSSE < 4)) { 1580 return false; 1581 } 1582 #ifndef _LP64 1583 if (bt == T_BYTE || bt == T_LONG) { 1584 return false; 1585 } 1586 #endif 1587 break; 1588 #ifndef _LP64 1589 case Op_VectorInsert: 1590 if (bt == T_LONG || bt == T_DOUBLE) { 1591 return false; 1592 } 1593 break; 1594 #endif 1595 case Op_MinReductionV: 1596 case Op_MaxReductionV: 1597 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1598 return false; 1599 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1600 return false; 1601 } 1602 // Float/Double intrinsics enabled for AVX family. 1603 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1604 return false; 1605 } 1606 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1607 return false; 1608 } 1609 #ifndef _LP64 1610 if (bt == T_BYTE || bt == T_LONG) { 1611 return false; 1612 } 1613 #endif 1614 break; 1615 case Op_VectorTest: 1616 if (UseSSE < 4) { 1617 return false; // Implementation limitation 1618 } else if (size_in_bits < 128) { 1619 return false; // Implementation limitation 1620 } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) { 1621 return false; // Implementation limitation 1622 } 1623 break; 1624 case Op_VectorLoadShuffle: 1625 case Op_VectorRearrange: 1626 if(vlen == 2) { 1627 return false; // Implementation limitation due to how shuffle is loaded 1628 } else if (size_in_bits == 256 && UseAVX < 2) { 1629 return false; // Implementation limitation 1630 } else if (bt == T_BYTE && size_in_bits >= 256 && !VM_Version::supports_avx512_vbmi()) { 1631 return false; // Implementation limitation 1632 } else if (bt == T_SHORT && size_in_bits >= 256 && !VM_Version::supports_avx512bw()) { 1633 return false; // Implementation limitation 1634 } 1635 break; 1636 case Op_VectorLoadMask: 1637 if (size_in_bits == 256 && UseAVX < 2) { 1638 return false; // Implementation limitation 1639 } 1640 // fallthrough 1641 case Op_VectorStoreMask: 1642 if (vlen == 2) { 1643 return false; // Implementation limitation 1644 } 1645 break; 1646 case Op_VectorCastB2X: 1647 if (size_in_bits == 256 && UseAVX < 2) { 1648 return false; // Implementation limitation 1649 } 1650 break; 1651 case Op_VectorCastS2X: 1652 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1653 return false; 1654 } 1655 break; 1656 case Op_VectorCastI2X: 1657 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1658 return false; 1659 } 1660 break; 1661 case Op_VectorCastL2X: 1662 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1663 return false; 1664 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1665 return false; 1666 } 1667 break; 1668 case Op_VectorCastF2X: 1669 case Op_VectorCastD2X: 1670 if (is_integral_type(bt)) { 1671 // Casts from FP to integral types require special fixup logic not easily 1672 // implementable with vectors. 1673 return false; // Implementation limitation 1674 } 1675 case Op_MulReductionVI: 1676 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1677 return false; 1678 } 1679 break; 1680 case Op_StoreVectorScatter: 1681 if(bt == T_BYTE || bt == T_SHORT) { 1682 return false; 1683 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1684 return false; 1685 } 1686 // fallthrough 1687 case Op_LoadVectorGather: 1688 if (size_in_bits == 64 ) { 1689 return false; 1690 } 1691 break; 1692 } 1693 return true; // Per default match rules are supported. 1694 } 1695 1696 // x86 supports generic vector operands: vec and legVec. 1697 const bool Matcher::supports_generic_vector_operands = true; 1698 1699 MachOper* Matcher::specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 1700 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 1701 bool legacy = (generic_opnd->opcode() == LEGVEC); 1702 if (!VM_Version::supports_avx512vlbwdq() && // KNL 1703 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 1704 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 1705 return new legVecZOper(); 1706 } 1707 if (legacy) { 1708 switch (ideal_reg) { 1709 case Op_VecS: return new legVecSOper(); 1710 case Op_VecD: return new legVecDOper(); 1711 case Op_VecX: return new legVecXOper(); 1712 case Op_VecY: return new legVecYOper(); 1713 case Op_VecZ: return new legVecZOper(); 1714 } 1715 } else { 1716 switch (ideal_reg) { 1717 case Op_VecS: return new vecSOper(); 1718 case Op_VecD: return new vecDOper(); 1719 case Op_VecX: return new vecXOper(); 1720 case Op_VecY: return new vecYOper(); 1721 case Op_VecZ: return new vecZOper(); 1722 } 1723 } 1724 ShouldNotReachHere(); 1725 return NULL; 1726 } 1727 1728 bool Matcher::is_generic_reg2reg_move(MachNode* m) { 1729 switch (m->rule()) { 1730 case MoveVec2Leg_rule: 1731 case MoveLeg2Vec_rule: 1732 return true; 1733 default: 1734 return false; 1735 } 1736 } 1737 1738 bool Matcher::is_generic_vector(MachOper* opnd) { 1739 switch (opnd->opcode()) { 1740 case VEC: 1741 case LEGVEC: 1742 return true; 1743 default: 1744 return false; 1745 } 1746 } 1747 1748 //------------------------------------------------------------------------ 1749 1750 const bool Matcher::has_predicated_vectors(void) { 1751 bool ret_value = false; 1752 if (UseAVX > 2) { 1753 ret_value = VM_Version::supports_avx512vl(); 1754 } 1755 1756 return ret_value; 1757 } 1758 1759 const int Matcher::float_pressure(int default_pressure_threshold) { 1760 int float_pressure_threshold = default_pressure_threshold; 1761 #ifdef _LP64 1762 if (UseAVX > 2) { 1763 // Increase pressure threshold on machines with AVX3 which have 1764 // 2x more XMM registers. 1765 float_pressure_threshold = default_pressure_threshold * 2; 1766 } 1767 #endif 1768 return float_pressure_threshold; 1769 } 1770 1771 // Max vector size in bytes. 0 if not supported. 1772 const int Matcher::vector_width_in_bytes(BasicType bt) { 1773 assert(is_java_primitive(bt), "only primitive type vectors"); 1774 if (UseSSE < 2) return 0; 1775 // SSE2 supports 128bit vectors for all types. 1776 // AVX2 supports 256bit vectors for all types. 1777 // AVX2/EVEX supports 512bit vectors for all types. 1778 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1779 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1780 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1781 size = (UseAVX > 2) ? 64 : 32; 1782 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1783 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1784 // Use flag to limit vector size. 1785 size = MIN2(size,(int)MaxVectorSize); 1786 // Minimum 2 values in vector (or 4 for bytes). 1787 switch (bt) { 1788 case T_DOUBLE: 1789 case T_LONG: 1790 if (size < 16) return 0; 1791 break; 1792 case T_FLOAT: 1793 case T_INT: 1794 if (size < 8) return 0; 1795 break; 1796 case T_BOOLEAN: 1797 if (size < 4) return 0; 1798 break; 1799 case T_CHAR: 1800 if (size < 4) return 0; 1801 break; 1802 case T_BYTE: 1803 if (size < 4) return 0; 1804 break; 1805 case T_SHORT: 1806 if (size < 4) return 0; 1807 break; 1808 default: 1809 ShouldNotReachHere(); 1810 } 1811 return size; 1812 } 1813 1814 // Limits on vector size (number of elements) loaded into vector. 1815 const int Matcher::max_vector_size(const BasicType bt) { 1816 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1817 } 1818 const int Matcher::min_vector_size(const BasicType bt) { 1819 int max_size = max_vector_size(bt); 1820 // Min size which can be loaded into vector is 4 bytes. 1821 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1822 return MIN2(size,max_size); 1823 } 1824 1825 // Vector ideal reg corresponding to specified size in bytes 1826 const uint Matcher::vector_ideal_reg(int size) { 1827 assert(MaxVectorSize >= size, ""); 1828 switch(size) { 1829 case 4: return Op_VecS; 1830 case 8: return Op_VecD; 1831 case 16: return Op_VecX; 1832 case 32: return Op_VecY; 1833 case 64: return Op_VecZ; 1834 } 1835 ShouldNotReachHere(); 1836 return 0; 1837 } 1838 1839 // Only lowest bits of xmm reg are used for vector shift count. 1840 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1841 return Op_VecS; 1842 } 1843 1844 // x86 supports misaligned vectors store/load. 1845 const bool Matcher::misaligned_vectors_ok() { 1846 return true; 1847 } 1848 1849 // x86 AES instructions are compatible with SunJCE expanded 1850 // keys, hence we do not need to pass the original key to stubs 1851 const bool Matcher::pass_original_key_for_aes() { 1852 return false; 1853 } 1854 1855 1856 const bool Matcher::convi2l_type_required = true; 1857 1858 // Check for shift by small constant as well 1859 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1860 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1861 shift->in(2)->get_int() <= 3 && 1862 // Are there other uses besides address expressions? 1863 !matcher->is_visited(shift)) { 1864 address_visited.set(shift->_idx); // Flag as address_visited 1865 mstack.push(shift->in(2), Matcher::Visit); 1866 Node *conv = shift->in(1); 1867 #ifdef _LP64 1868 // Allow Matcher to match the rule which bypass 1869 // ConvI2L operation for an array index on LP64 1870 // if the index value is positive. 1871 if (conv->Opcode() == Op_ConvI2L && 1872 conv->as_Type()->type()->is_long()->_lo >= 0 && 1873 // Are there other uses besides address expressions? 1874 !matcher->is_visited(conv)) { 1875 address_visited.set(conv->_idx); // Flag as address_visited 1876 mstack.push(conv->in(1), Matcher::Pre_Visit); 1877 } else 1878 #endif 1879 mstack.push(conv, Matcher::Pre_Visit); 1880 return true; 1881 } 1882 return false; 1883 } 1884 1885 // Should the Matcher clone shifts on addressing modes, expecting them 1886 // to be subsumed into complex addressing expressions or compute them 1887 // into registers? 1888 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1889 Node *off = m->in(AddPNode::Offset); 1890 if (off->is_Con()) { 1891 address_visited.test_set(m->_idx); // Flag as address_visited 1892 Node *adr = m->in(AddPNode::Address); 1893 1894 // Intel can handle 2 adds in addressing mode 1895 // AtomicAdd is not an addressing expression. 1896 // Cheap to find it by looking for screwy base. 1897 if (adr->is_AddP() && 1898 !adr->in(AddPNode::Base)->is_top() && 1899 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1900 // Are there other uses besides address expressions? 1901 !is_visited(adr)) { 1902 address_visited.set(adr->_idx); // Flag as address_visited 1903 Node *shift = adr->in(AddPNode::Offset); 1904 if (!clone_shift(shift, this, mstack, address_visited)) { 1905 mstack.push(shift, Pre_Visit); 1906 } 1907 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1908 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1909 } else { 1910 mstack.push(adr, Pre_Visit); 1911 } 1912 1913 // Clone X+offset as it also folds into most addressing expressions 1914 mstack.push(off, Visit); 1915 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1916 return true; 1917 } else if (clone_shift(off, this, mstack, address_visited)) { 1918 address_visited.test_set(m->_idx); // Flag as address_visited 1919 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1920 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1921 return true; 1922 } 1923 return false; 1924 } 1925 1926 void Compile::reshape_address(AddPNode* addp) { 1927 } 1928 1929 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 1930 switch (bt) { 1931 case BoolTest::eq: return Assembler::eq; 1932 case BoolTest::ne: return Assembler::neq; 1933 case BoolTest::le: return Assembler::le; 1934 case BoolTest::ge: return Assembler::nlt; 1935 case BoolTest::lt: return Assembler::lt; 1936 case BoolTest::gt: return Assembler::nle; 1937 default : ShouldNotReachHere(); return Assembler::_false; 1938 } 1939 } 1940 1941 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 1942 switch (bt) { 1943 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 1944 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 1945 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 1946 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 1947 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 1948 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 1949 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 1950 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 1951 } 1952 } 1953 1954 // Helper methods for MachSpillCopyNode::implementation(). 1955 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1956 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1957 // In 64-bit VM size calculation is very complex. Emitting instructions 1958 // into scratch buffer is used to get size in 64-bit VM. 1959 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1960 assert(ireg == Op_VecS || // 32bit vector 1961 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1962 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1963 "no non-adjacent vector moves" ); 1964 if (cbuf) { 1965 C2_MacroAssembler _masm(cbuf); 1966 int offset = __ offset(); 1967 switch (ireg) { 1968 case Op_VecS: // copy whole register 1969 case Op_VecD: 1970 case Op_VecX: 1971 #ifndef _LP64 1972 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1973 #else 1974 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1975 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1976 } else { 1977 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1978 } 1979 #endif 1980 break; 1981 case Op_VecY: 1982 #ifndef _LP64 1983 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1984 #else 1985 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1986 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1987 } else { 1988 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1989 } 1990 #endif 1991 break; 1992 case Op_VecZ: 1993 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1994 break; 1995 default: 1996 ShouldNotReachHere(); 1997 } 1998 int size = __ offset() - offset; 1999 #ifdef ASSERT 2000 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2001 assert(!do_size || size == 4, "incorrect size calculattion"); 2002 #endif 2003 return size; 2004 #ifndef PRODUCT 2005 } else if (!do_size) { 2006 switch (ireg) { 2007 case Op_VecS: 2008 case Op_VecD: 2009 case Op_VecX: 2010 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2011 break; 2012 case Op_VecY: 2013 case Op_VecZ: 2014 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2015 break; 2016 default: 2017 ShouldNotReachHere(); 2018 } 2019 #endif 2020 } 2021 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 2022 return (UseAVX > 2) ? 6 : 4; 2023 } 2024 2025 int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 2026 int stack_offset, int reg, uint ireg, outputStream* st) { 2027 // In 64-bit VM size calculation is very complex. Emitting instructions 2028 // into scratch buffer is used to get size in 64-bit VM. 2029 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 2030 if (cbuf) { 2031 C2_MacroAssembler _masm(cbuf); 2032 int offset = __ offset(); 2033 if (is_load) { 2034 switch (ireg) { 2035 case Op_VecS: 2036 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2037 break; 2038 case Op_VecD: 2039 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2040 break; 2041 case Op_VecX: 2042 #ifndef _LP64 2043 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2044 #else 2045 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2046 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2047 } else { 2048 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2049 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2050 } 2051 #endif 2052 break; 2053 case Op_VecY: 2054 #ifndef _LP64 2055 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2056 #else 2057 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2058 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2059 } else { 2060 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2061 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2062 } 2063 #endif 2064 break; 2065 case Op_VecZ: 2066 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2067 break; 2068 default: 2069 ShouldNotReachHere(); 2070 } 2071 } else { // store 2072 switch (ireg) { 2073 case Op_VecS: 2074 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2075 break; 2076 case Op_VecD: 2077 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2078 break; 2079 case Op_VecX: 2080 #ifndef _LP64 2081 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2082 #else 2083 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2084 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2085 } 2086 else { 2087 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2088 } 2089 #endif 2090 break; 2091 case Op_VecY: 2092 #ifndef _LP64 2093 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2094 #else 2095 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2096 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2097 } 2098 else { 2099 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2100 } 2101 #endif 2102 break; 2103 case Op_VecZ: 2104 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2105 break; 2106 default: 2107 ShouldNotReachHere(); 2108 } 2109 } 2110 int size = __ offset() - offset; 2111 #ifdef ASSERT 2112 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 2113 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2114 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 2115 #endif 2116 return size; 2117 #ifndef PRODUCT 2118 } else if (!do_size) { 2119 if (is_load) { 2120 switch (ireg) { 2121 case Op_VecS: 2122 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2123 break; 2124 case Op_VecD: 2125 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2126 break; 2127 case Op_VecX: 2128 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2129 break; 2130 case Op_VecY: 2131 case Op_VecZ: 2132 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2133 break; 2134 default: 2135 ShouldNotReachHere(); 2136 } 2137 } else { // store 2138 switch (ireg) { 2139 case Op_VecS: 2140 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2141 break; 2142 case Op_VecD: 2143 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2144 break; 2145 case Op_VecX: 2146 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2147 break; 2148 case Op_VecY: 2149 case Op_VecZ: 2150 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2151 break; 2152 default: 2153 ShouldNotReachHere(); 2154 } 2155 } 2156 #endif 2157 } 2158 bool is_single_byte = false; 2159 int vec_len = 0; 2160 if ((UseAVX > 2) && (stack_offset != 0)) { 2161 int tuple_type = Assembler::EVEX_FVM; 2162 int input_size = Assembler::EVEX_32bit; 2163 switch (ireg) { 2164 case Op_VecS: 2165 tuple_type = Assembler::EVEX_T1S; 2166 break; 2167 case Op_VecD: 2168 tuple_type = Assembler::EVEX_T1S; 2169 input_size = Assembler::EVEX_64bit; 2170 break; 2171 case Op_VecX: 2172 break; 2173 case Op_VecY: 2174 vec_len = 1; 2175 break; 2176 case Op_VecZ: 2177 vec_len = 2; 2178 break; 2179 } 2180 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2181 } 2182 int offset_size = 0; 2183 int size = 5; 2184 if (UseAVX > 2 ) { 2185 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2186 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2187 size += 2; // Need an additional two bytes for EVEX encoding 2188 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2189 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2190 } else { 2191 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2192 size += 2; // Need an additional two bytes for EVEX encodding 2193 } 2194 } else { 2195 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2196 } 2197 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2198 return size+offset_size; 2199 } 2200 2201 static inline jint replicate4_imm(int con, int width) { 2202 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2203 assert(width == 1 || width == 2, "only byte or short types here"); 2204 int bit_width = width * 8; 2205 jint val = con; 2206 val &= (1 << bit_width) - 1; // mask off sign bits 2207 while(bit_width < 32) { 2208 val |= (val << bit_width); 2209 bit_width <<= 1; 2210 } 2211 return val; 2212 } 2213 2214 static inline jlong replicate8_imm(int con, int width) { 2215 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2216 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2217 int bit_width = width * 8; 2218 jlong val = con; 2219 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2220 while(bit_width < 64) { 2221 val |= (val << bit_width); 2222 bit_width <<= 1; 2223 } 2224 return val; 2225 } 2226 2227 #ifndef PRODUCT 2228 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2229 st->print("nop \t# %d bytes pad for loops and calls", _count); 2230 } 2231 #endif 2232 2233 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2234 C2_MacroAssembler _masm(&cbuf); 2235 __ nop(_count); 2236 } 2237 2238 uint MachNopNode::size(PhaseRegAlloc*) const { 2239 return _count; 2240 } 2241 2242 #ifndef PRODUCT 2243 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2244 st->print("# breakpoint"); 2245 } 2246 #endif 2247 2248 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2249 C2_MacroAssembler _masm(&cbuf); 2250 __ int3(); 2251 } 2252 2253 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2254 return MachNode::size(ra_); 2255 } 2256 2257 %} 2258 2259 encode %{ 2260 2261 enc_class call_epilog %{ 2262 if (VerifyStackAtCalls) { 2263 // Check that stack depth is unchanged: find majik cookie on stack 2264 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2265 C2_MacroAssembler _masm(&cbuf); 2266 Label L; 2267 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2268 __ jccb(Assembler::equal, L); 2269 // Die if stack mismatch 2270 __ int3(); 2271 __ bind(L); 2272 } 2273 %} 2274 2275 %} 2276 2277 // Operands for bound floating pointer register arguments 2278 operand rxmm0() %{ 2279 constraint(ALLOC_IN_RC(xmm0_reg)); 2280 match(VecX); 2281 format%{%} 2282 interface(REG_INTER); 2283 %} 2284 2285 //----------OPERANDS----------------------------------------------------------- 2286 // Operand definitions must precede instruction definitions for correct parsing 2287 // in the ADLC because operands constitute user defined types which are used in 2288 // instruction definitions. 2289 2290 // Vectors 2291 2292 // Dummy generic vector class. Should be used for all vector operands. 2293 // Replaced with vec[SDXYZ] during post-selection pass. 2294 operand vec() %{ 2295 constraint(ALLOC_IN_RC(dynamic)); 2296 match(VecX); 2297 match(VecY); 2298 match(VecZ); 2299 match(VecS); 2300 match(VecD); 2301 2302 format %{ %} 2303 interface(REG_INTER); 2304 %} 2305 2306 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2307 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2308 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2309 // runtime code generation via reg_class_dynamic. 2310 operand legVec() %{ 2311 constraint(ALLOC_IN_RC(dynamic)); 2312 match(VecX); 2313 match(VecY); 2314 match(VecZ); 2315 match(VecS); 2316 match(VecD); 2317 2318 format %{ %} 2319 interface(REG_INTER); 2320 %} 2321 2322 // Replaces vec during post-selection cleanup. See above. 2323 operand vecS() %{ 2324 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2325 match(VecS); 2326 2327 format %{ %} 2328 interface(REG_INTER); 2329 %} 2330 2331 // Replaces legVec during post-selection cleanup. See above. 2332 operand legVecS() %{ 2333 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2334 match(VecS); 2335 2336 format %{ %} 2337 interface(REG_INTER); 2338 %} 2339 2340 // Replaces vec during post-selection cleanup. See above. 2341 operand vecD() %{ 2342 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2343 match(VecD); 2344 2345 format %{ %} 2346 interface(REG_INTER); 2347 %} 2348 2349 // Replaces legVec during post-selection cleanup. See above. 2350 operand legVecD() %{ 2351 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2352 match(VecD); 2353 2354 format %{ %} 2355 interface(REG_INTER); 2356 %} 2357 2358 // Replaces vec during post-selection cleanup. See above. 2359 operand vecX() %{ 2360 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2361 match(VecX); 2362 2363 format %{ %} 2364 interface(REG_INTER); 2365 %} 2366 2367 // Replaces legVec during post-selection cleanup. See above. 2368 operand legVecX() %{ 2369 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2370 match(VecX); 2371 2372 format %{ %} 2373 interface(REG_INTER); 2374 %} 2375 2376 // Replaces vec during post-selection cleanup. See above. 2377 operand vecY() %{ 2378 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2379 match(VecY); 2380 2381 format %{ %} 2382 interface(REG_INTER); 2383 %} 2384 2385 // Replaces legVec during post-selection cleanup. See above. 2386 operand legVecY() %{ 2387 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2388 match(VecY); 2389 2390 format %{ %} 2391 interface(REG_INTER); 2392 %} 2393 2394 // Replaces vec during post-selection cleanup. See above. 2395 operand vecZ() %{ 2396 constraint(ALLOC_IN_RC(vectorz_reg)); 2397 match(VecZ); 2398 2399 format %{ %} 2400 interface(REG_INTER); 2401 %} 2402 2403 // Replaces legVec during post-selection cleanup. See above. 2404 operand legVecZ() %{ 2405 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2406 match(VecZ); 2407 2408 format %{ %} 2409 interface(REG_INTER); 2410 %} 2411 2412 // Comparison Code for FP conditional move 2413 operand cmpOp_vcmppd() %{ 2414 match(Bool); 2415 2416 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2417 n->as_Bool()->_test._test != BoolTest::no_overflow); 2418 format %{ "" %} 2419 interface(COND_INTER) %{ 2420 equal (0x0, "eq"); 2421 less (0x1, "lt"); 2422 less_equal (0x2, "le"); 2423 not_equal (0xC, "ne"); 2424 greater_equal(0xD, "ge"); 2425 greater (0xE, "gt"); 2426 //TODO cannot compile (adlc breaks) without two next lines with error: 2427 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2428 // equal' for overflow. 2429 overflow (0x20, "o"); // not really supported by the instruction 2430 no_overflow (0x21, "no"); // not really supported by the instruction 2431 %} 2432 %} 2433 2434 2435 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2436 2437 // ============================================================================ 2438 2439 instruct ShouldNotReachHere() %{ 2440 match(Halt); 2441 format %{ "ud2\t# ShouldNotReachHere" %} 2442 ins_encode %{ 2443 __ stop(_halt_reason); 2444 %} 2445 ins_pipe(pipe_slow); 2446 %} 2447 2448 // =================================EVEX special=============================== 2449 2450 instruct setMask(rRegI dst, rRegI src) %{ 2451 predicate(Matcher::has_predicated_vectors()); 2452 match(Set dst (SetVectMaskI src)); 2453 effect(TEMP dst); 2454 format %{ "setvectmask $dst, $src" %} 2455 ins_encode %{ 2456 __ setvectmask($dst$$Register, $src$$Register); 2457 %} 2458 ins_pipe(pipe_slow); 2459 %} 2460 2461 // ============================================================================ 2462 2463 instruct addF_reg(regF dst, regF src) %{ 2464 predicate((UseSSE>=1) && (UseAVX == 0)); 2465 match(Set dst (AddF dst src)); 2466 2467 format %{ "addss $dst, $src" %} 2468 ins_cost(150); 2469 ins_encode %{ 2470 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2471 %} 2472 ins_pipe(pipe_slow); 2473 %} 2474 2475 instruct addF_mem(regF dst, memory src) %{ 2476 predicate((UseSSE>=1) && (UseAVX == 0)); 2477 match(Set dst (AddF dst (LoadF src))); 2478 2479 format %{ "addss $dst, $src" %} 2480 ins_cost(150); 2481 ins_encode %{ 2482 __ addss($dst$$XMMRegister, $src$$Address); 2483 %} 2484 ins_pipe(pipe_slow); 2485 %} 2486 2487 instruct addF_imm(regF dst, immF con) %{ 2488 predicate((UseSSE>=1) && (UseAVX == 0)); 2489 match(Set dst (AddF dst con)); 2490 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2491 ins_cost(150); 2492 ins_encode %{ 2493 __ addss($dst$$XMMRegister, $constantaddress($con)); 2494 %} 2495 ins_pipe(pipe_slow); 2496 %} 2497 2498 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2499 predicate(UseAVX > 0); 2500 match(Set dst (AddF src1 src2)); 2501 2502 format %{ "vaddss $dst, $src1, $src2" %} 2503 ins_cost(150); 2504 ins_encode %{ 2505 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2506 %} 2507 ins_pipe(pipe_slow); 2508 %} 2509 2510 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2511 predicate(UseAVX > 0); 2512 match(Set dst (AddF src1 (LoadF src2))); 2513 2514 format %{ "vaddss $dst, $src1, $src2" %} 2515 ins_cost(150); 2516 ins_encode %{ 2517 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2518 %} 2519 ins_pipe(pipe_slow); 2520 %} 2521 2522 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2523 predicate(UseAVX > 0); 2524 match(Set dst (AddF src con)); 2525 2526 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2527 ins_cost(150); 2528 ins_encode %{ 2529 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2530 %} 2531 ins_pipe(pipe_slow); 2532 %} 2533 2534 instruct addD_reg(regD dst, regD src) %{ 2535 predicate((UseSSE>=2) && (UseAVX == 0)); 2536 match(Set dst (AddD dst src)); 2537 2538 format %{ "addsd $dst, $src" %} 2539 ins_cost(150); 2540 ins_encode %{ 2541 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2542 %} 2543 ins_pipe(pipe_slow); 2544 %} 2545 2546 instruct addD_mem(regD dst, memory src) %{ 2547 predicate((UseSSE>=2) && (UseAVX == 0)); 2548 match(Set dst (AddD dst (LoadD src))); 2549 2550 format %{ "addsd $dst, $src" %} 2551 ins_cost(150); 2552 ins_encode %{ 2553 __ addsd($dst$$XMMRegister, $src$$Address); 2554 %} 2555 ins_pipe(pipe_slow); 2556 %} 2557 2558 instruct addD_imm(regD dst, immD con) %{ 2559 predicate((UseSSE>=2) && (UseAVX == 0)); 2560 match(Set dst (AddD dst con)); 2561 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2562 ins_cost(150); 2563 ins_encode %{ 2564 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2565 %} 2566 ins_pipe(pipe_slow); 2567 %} 2568 2569 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2570 predicate(UseAVX > 0); 2571 match(Set dst (AddD src1 src2)); 2572 2573 format %{ "vaddsd $dst, $src1, $src2" %} 2574 ins_cost(150); 2575 ins_encode %{ 2576 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2577 %} 2578 ins_pipe(pipe_slow); 2579 %} 2580 2581 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2582 predicate(UseAVX > 0); 2583 match(Set dst (AddD src1 (LoadD src2))); 2584 2585 format %{ "vaddsd $dst, $src1, $src2" %} 2586 ins_cost(150); 2587 ins_encode %{ 2588 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2589 %} 2590 ins_pipe(pipe_slow); 2591 %} 2592 2593 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2594 predicate(UseAVX > 0); 2595 match(Set dst (AddD src con)); 2596 2597 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2598 ins_cost(150); 2599 ins_encode %{ 2600 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2601 %} 2602 ins_pipe(pipe_slow); 2603 %} 2604 2605 instruct subF_reg(regF dst, regF src) %{ 2606 predicate((UseSSE>=1) && (UseAVX == 0)); 2607 match(Set dst (SubF dst src)); 2608 2609 format %{ "subss $dst, $src" %} 2610 ins_cost(150); 2611 ins_encode %{ 2612 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2613 %} 2614 ins_pipe(pipe_slow); 2615 %} 2616 2617 instruct subF_mem(regF dst, memory src) %{ 2618 predicate((UseSSE>=1) && (UseAVX == 0)); 2619 match(Set dst (SubF dst (LoadF src))); 2620 2621 format %{ "subss $dst, $src" %} 2622 ins_cost(150); 2623 ins_encode %{ 2624 __ subss($dst$$XMMRegister, $src$$Address); 2625 %} 2626 ins_pipe(pipe_slow); 2627 %} 2628 2629 instruct subF_imm(regF dst, immF con) %{ 2630 predicate((UseSSE>=1) && (UseAVX == 0)); 2631 match(Set dst (SubF dst con)); 2632 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2633 ins_cost(150); 2634 ins_encode %{ 2635 __ subss($dst$$XMMRegister, $constantaddress($con)); 2636 %} 2637 ins_pipe(pipe_slow); 2638 %} 2639 2640 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2641 predicate(UseAVX > 0); 2642 match(Set dst (SubF src1 src2)); 2643 2644 format %{ "vsubss $dst, $src1, $src2" %} 2645 ins_cost(150); 2646 ins_encode %{ 2647 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2648 %} 2649 ins_pipe(pipe_slow); 2650 %} 2651 2652 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2653 predicate(UseAVX > 0); 2654 match(Set dst (SubF src1 (LoadF src2))); 2655 2656 format %{ "vsubss $dst, $src1, $src2" %} 2657 ins_cost(150); 2658 ins_encode %{ 2659 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2660 %} 2661 ins_pipe(pipe_slow); 2662 %} 2663 2664 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2665 predicate(UseAVX > 0); 2666 match(Set dst (SubF src con)); 2667 2668 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2669 ins_cost(150); 2670 ins_encode %{ 2671 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2672 %} 2673 ins_pipe(pipe_slow); 2674 %} 2675 2676 instruct subD_reg(regD dst, regD src) %{ 2677 predicate((UseSSE>=2) && (UseAVX == 0)); 2678 match(Set dst (SubD dst src)); 2679 2680 format %{ "subsd $dst, $src" %} 2681 ins_cost(150); 2682 ins_encode %{ 2683 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2684 %} 2685 ins_pipe(pipe_slow); 2686 %} 2687 2688 instruct subD_mem(regD dst, memory src) %{ 2689 predicate((UseSSE>=2) && (UseAVX == 0)); 2690 match(Set dst (SubD dst (LoadD src))); 2691 2692 format %{ "subsd $dst, $src" %} 2693 ins_cost(150); 2694 ins_encode %{ 2695 __ subsd($dst$$XMMRegister, $src$$Address); 2696 %} 2697 ins_pipe(pipe_slow); 2698 %} 2699 2700 instruct subD_imm(regD dst, immD con) %{ 2701 predicate((UseSSE>=2) && (UseAVX == 0)); 2702 match(Set dst (SubD dst con)); 2703 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2704 ins_cost(150); 2705 ins_encode %{ 2706 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2707 %} 2708 ins_pipe(pipe_slow); 2709 %} 2710 2711 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2712 predicate(UseAVX > 0); 2713 match(Set dst (SubD src1 src2)); 2714 2715 format %{ "vsubsd $dst, $src1, $src2" %} 2716 ins_cost(150); 2717 ins_encode %{ 2718 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2719 %} 2720 ins_pipe(pipe_slow); 2721 %} 2722 2723 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2724 predicate(UseAVX > 0); 2725 match(Set dst (SubD src1 (LoadD src2))); 2726 2727 format %{ "vsubsd $dst, $src1, $src2" %} 2728 ins_cost(150); 2729 ins_encode %{ 2730 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2731 %} 2732 ins_pipe(pipe_slow); 2733 %} 2734 2735 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2736 predicate(UseAVX > 0); 2737 match(Set dst (SubD src con)); 2738 2739 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2740 ins_cost(150); 2741 ins_encode %{ 2742 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2743 %} 2744 ins_pipe(pipe_slow); 2745 %} 2746 2747 instruct mulF_reg(regF dst, regF src) %{ 2748 predicate((UseSSE>=1) && (UseAVX == 0)); 2749 match(Set dst (MulF dst src)); 2750 2751 format %{ "mulss $dst, $src" %} 2752 ins_cost(150); 2753 ins_encode %{ 2754 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2755 %} 2756 ins_pipe(pipe_slow); 2757 %} 2758 2759 instruct mulF_mem(regF dst, memory src) %{ 2760 predicate((UseSSE>=1) && (UseAVX == 0)); 2761 match(Set dst (MulF dst (LoadF src))); 2762 2763 format %{ "mulss $dst, $src" %} 2764 ins_cost(150); 2765 ins_encode %{ 2766 __ mulss($dst$$XMMRegister, $src$$Address); 2767 %} 2768 ins_pipe(pipe_slow); 2769 %} 2770 2771 instruct mulF_imm(regF dst, immF con) %{ 2772 predicate((UseSSE>=1) && (UseAVX == 0)); 2773 match(Set dst (MulF dst con)); 2774 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2775 ins_cost(150); 2776 ins_encode %{ 2777 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2778 %} 2779 ins_pipe(pipe_slow); 2780 %} 2781 2782 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2783 predicate(UseAVX > 0); 2784 match(Set dst (MulF src1 src2)); 2785 2786 format %{ "vmulss $dst, $src1, $src2" %} 2787 ins_cost(150); 2788 ins_encode %{ 2789 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2790 %} 2791 ins_pipe(pipe_slow); 2792 %} 2793 2794 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2795 predicate(UseAVX > 0); 2796 match(Set dst (MulF src1 (LoadF src2))); 2797 2798 format %{ "vmulss $dst, $src1, $src2" %} 2799 ins_cost(150); 2800 ins_encode %{ 2801 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2802 %} 2803 ins_pipe(pipe_slow); 2804 %} 2805 2806 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2807 predicate(UseAVX > 0); 2808 match(Set dst (MulF src con)); 2809 2810 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2811 ins_cost(150); 2812 ins_encode %{ 2813 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2814 %} 2815 ins_pipe(pipe_slow); 2816 %} 2817 2818 instruct mulD_reg(regD dst, regD src) %{ 2819 predicate((UseSSE>=2) && (UseAVX == 0)); 2820 match(Set dst (MulD dst src)); 2821 2822 format %{ "mulsd $dst, $src" %} 2823 ins_cost(150); 2824 ins_encode %{ 2825 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2826 %} 2827 ins_pipe(pipe_slow); 2828 %} 2829 2830 instruct mulD_mem(regD dst, memory src) %{ 2831 predicate((UseSSE>=2) && (UseAVX == 0)); 2832 match(Set dst (MulD dst (LoadD src))); 2833 2834 format %{ "mulsd $dst, $src" %} 2835 ins_cost(150); 2836 ins_encode %{ 2837 __ mulsd($dst$$XMMRegister, $src$$Address); 2838 %} 2839 ins_pipe(pipe_slow); 2840 %} 2841 2842 instruct mulD_imm(regD dst, immD con) %{ 2843 predicate((UseSSE>=2) && (UseAVX == 0)); 2844 match(Set dst (MulD dst con)); 2845 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2846 ins_cost(150); 2847 ins_encode %{ 2848 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2849 %} 2850 ins_pipe(pipe_slow); 2851 %} 2852 2853 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2854 predicate(UseAVX > 0); 2855 match(Set dst (MulD src1 src2)); 2856 2857 format %{ "vmulsd $dst, $src1, $src2" %} 2858 ins_cost(150); 2859 ins_encode %{ 2860 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2861 %} 2862 ins_pipe(pipe_slow); 2863 %} 2864 2865 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2866 predicate(UseAVX > 0); 2867 match(Set dst (MulD src1 (LoadD src2))); 2868 2869 format %{ "vmulsd $dst, $src1, $src2" %} 2870 ins_cost(150); 2871 ins_encode %{ 2872 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2873 %} 2874 ins_pipe(pipe_slow); 2875 %} 2876 2877 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2878 predicate(UseAVX > 0); 2879 match(Set dst (MulD src con)); 2880 2881 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2882 ins_cost(150); 2883 ins_encode %{ 2884 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2885 %} 2886 ins_pipe(pipe_slow); 2887 %} 2888 2889 instruct divF_reg(regF dst, regF src) %{ 2890 predicate((UseSSE>=1) && (UseAVX == 0)); 2891 match(Set dst (DivF dst src)); 2892 2893 format %{ "divss $dst, $src" %} 2894 ins_cost(150); 2895 ins_encode %{ 2896 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2897 %} 2898 ins_pipe(pipe_slow); 2899 %} 2900 2901 instruct divF_mem(regF dst, memory src) %{ 2902 predicate((UseSSE>=1) && (UseAVX == 0)); 2903 match(Set dst (DivF dst (LoadF src))); 2904 2905 format %{ "divss $dst, $src" %} 2906 ins_cost(150); 2907 ins_encode %{ 2908 __ divss($dst$$XMMRegister, $src$$Address); 2909 %} 2910 ins_pipe(pipe_slow); 2911 %} 2912 2913 instruct divF_imm(regF dst, immF con) %{ 2914 predicate((UseSSE>=1) && (UseAVX == 0)); 2915 match(Set dst (DivF dst con)); 2916 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2917 ins_cost(150); 2918 ins_encode %{ 2919 __ divss($dst$$XMMRegister, $constantaddress($con)); 2920 %} 2921 ins_pipe(pipe_slow); 2922 %} 2923 2924 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2925 predicate(UseAVX > 0); 2926 match(Set dst (DivF src1 src2)); 2927 2928 format %{ "vdivss $dst, $src1, $src2" %} 2929 ins_cost(150); 2930 ins_encode %{ 2931 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2932 %} 2933 ins_pipe(pipe_slow); 2934 %} 2935 2936 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2937 predicate(UseAVX > 0); 2938 match(Set dst (DivF src1 (LoadF src2))); 2939 2940 format %{ "vdivss $dst, $src1, $src2" %} 2941 ins_cost(150); 2942 ins_encode %{ 2943 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2944 %} 2945 ins_pipe(pipe_slow); 2946 %} 2947 2948 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2949 predicate(UseAVX > 0); 2950 match(Set dst (DivF src con)); 2951 2952 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2953 ins_cost(150); 2954 ins_encode %{ 2955 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2956 %} 2957 ins_pipe(pipe_slow); 2958 %} 2959 2960 instruct divD_reg(regD dst, regD src) %{ 2961 predicate((UseSSE>=2) && (UseAVX == 0)); 2962 match(Set dst (DivD dst src)); 2963 2964 format %{ "divsd $dst, $src" %} 2965 ins_cost(150); 2966 ins_encode %{ 2967 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2968 %} 2969 ins_pipe(pipe_slow); 2970 %} 2971 2972 instruct divD_mem(regD dst, memory src) %{ 2973 predicate((UseSSE>=2) && (UseAVX == 0)); 2974 match(Set dst (DivD dst (LoadD src))); 2975 2976 format %{ "divsd $dst, $src" %} 2977 ins_cost(150); 2978 ins_encode %{ 2979 __ divsd($dst$$XMMRegister, $src$$Address); 2980 %} 2981 ins_pipe(pipe_slow); 2982 %} 2983 2984 instruct divD_imm(regD dst, immD con) %{ 2985 predicate((UseSSE>=2) && (UseAVX == 0)); 2986 match(Set dst (DivD dst con)); 2987 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2988 ins_cost(150); 2989 ins_encode %{ 2990 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2991 %} 2992 ins_pipe(pipe_slow); 2993 %} 2994 2995 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2996 predicate(UseAVX > 0); 2997 match(Set dst (DivD src1 src2)); 2998 2999 format %{ "vdivsd $dst, $src1, $src2" %} 3000 ins_cost(150); 3001 ins_encode %{ 3002 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3003 %} 3004 ins_pipe(pipe_slow); 3005 %} 3006 3007 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3008 predicate(UseAVX > 0); 3009 match(Set dst (DivD src1 (LoadD src2))); 3010 3011 format %{ "vdivsd $dst, $src1, $src2" %} 3012 ins_cost(150); 3013 ins_encode %{ 3014 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3015 %} 3016 ins_pipe(pipe_slow); 3017 %} 3018 3019 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3020 predicate(UseAVX > 0); 3021 match(Set dst (DivD src con)); 3022 3023 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3024 ins_cost(150); 3025 ins_encode %{ 3026 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3027 %} 3028 ins_pipe(pipe_slow); 3029 %} 3030 3031 instruct absF_reg(regF dst) %{ 3032 predicate((UseSSE>=1) && (UseAVX == 0)); 3033 match(Set dst (AbsF dst)); 3034 ins_cost(150); 3035 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3036 ins_encode %{ 3037 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3038 %} 3039 ins_pipe(pipe_slow); 3040 %} 3041 3042 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3043 predicate(UseAVX > 0); 3044 match(Set dst (AbsF src)); 3045 ins_cost(150); 3046 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3047 ins_encode %{ 3048 int vlen_enc = Assembler::AVX_128bit; 3049 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3050 ExternalAddress(float_signmask()), vlen_enc); 3051 %} 3052 ins_pipe(pipe_slow); 3053 %} 3054 3055 instruct absD_reg(regD dst) %{ 3056 predicate((UseSSE>=2) && (UseAVX == 0)); 3057 match(Set dst (AbsD dst)); 3058 ins_cost(150); 3059 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3060 "# abs double by sign masking" %} 3061 ins_encode %{ 3062 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3063 %} 3064 ins_pipe(pipe_slow); 3065 %} 3066 3067 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3068 predicate(UseAVX > 0); 3069 match(Set dst (AbsD src)); 3070 ins_cost(150); 3071 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3072 "# abs double by sign masking" %} 3073 ins_encode %{ 3074 int vlen_enc = Assembler::AVX_128bit; 3075 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3076 ExternalAddress(double_signmask()), vlen_enc); 3077 %} 3078 ins_pipe(pipe_slow); 3079 %} 3080 3081 instruct negF_reg(regF dst) %{ 3082 predicate((UseSSE>=1) && (UseAVX == 0)); 3083 match(Set dst (NegF dst)); 3084 ins_cost(150); 3085 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3086 ins_encode %{ 3087 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3088 %} 3089 ins_pipe(pipe_slow); 3090 %} 3091 3092 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3093 predicate(UseAVX > 0); 3094 match(Set dst (NegF src)); 3095 ins_cost(150); 3096 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3097 ins_encode %{ 3098 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3099 ExternalAddress(float_signflip())); 3100 %} 3101 ins_pipe(pipe_slow); 3102 %} 3103 3104 instruct negD_reg(regD dst) %{ 3105 predicate((UseSSE>=2) && (UseAVX == 0)); 3106 match(Set dst (NegD dst)); 3107 ins_cost(150); 3108 format %{ "xorpd $dst, [0x8000000000000000]\t" 3109 "# neg double by sign flipping" %} 3110 ins_encode %{ 3111 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3112 %} 3113 ins_pipe(pipe_slow); 3114 %} 3115 3116 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3117 predicate(UseAVX > 0); 3118 match(Set dst (NegD src)); 3119 ins_cost(150); 3120 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3121 "# neg double by sign flipping" %} 3122 ins_encode %{ 3123 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3124 ExternalAddress(double_signflip())); 3125 %} 3126 ins_pipe(pipe_slow); 3127 %} 3128 3129 instruct sqrtF_reg(regF dst, regF src) %{ 3130 predicate(UseSSE>=1); 3131 match(Set dst (SqrtF src)); 3132 3133 format %{ "sqrtss $dst, $src" %} 3134 ins_cost(150); 3135 ins_encode %{ 3136 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 3137 %} 3138 ins_pipe(pipe_slow); 3139 %} 3140 3141 instruct sqrtF_mem(regF dst, memory src) %{ 3142 predicate(UseSSE>=1); 3143 match(Set dst (SqrtF (LoadF src))); 3144 3145 format %{ "sqrtss $dst, $src" %} 3146 ins_cost(150); 3147 ins_encode %{ 3148 __ sqrtss($dst$$XMMRegister, $src$$Address); 3149 %} 3150 ins_pipe(pipe_slow); 3151 %} 3152 3153 instruct sqrtF_imm(regF dst, immF con) %{ 3154 predicate(UseSSE>=1); 3155 match(Set dst (SqrtF con)); 3156 3157 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3158 ins_cost(150); 3159 ins_encode %{ 3160 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 3161 %} 3162 ins_pipe(pipe_slow); 3163 %} 3164 3165 instruct sqrtD_reg(regD dst, regD src) %{ 3166 predicate(UseSSE>=2); 3167 match(Set dst (SqrtD src)); 3168 3169 format %{ "sqrtsd $dst, $src" %} 3170 ins_cost(150); 3171 ins_encode %{ 3172 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 3173 %} 3174 ins_pipe(pipe_slow); 3175 %} 3176 3177 instruct sqrtD_mem(regD dst, memory src) %{ 3178 predicate(UseSSE>=2); 3179 match(Set dst (SqrtD (LoadD src))); 3180 3181 format %{ "sqrtsd $dst, $src" %} 3182 ins_cost(150); 3183 ins_encode %{ 3184 __ sqrtsd($dst$$XMMRegister, $src$$Address); 3185 %} 3186 ins_pipe(pipe_slow); 3187 %} 3188 3189 instruct sqrtD_imm(regD dst, immD con) %{ 3190 predicate(UseSSE>=2); 3191 match(Set dst (SqrtD con)); 3192 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3193 ins_cost(150); 3194 ins_encode %{ 3195 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 3196 %} 3197 ins_pipe(pipe_slow); 3198 %} 3199 3200 // ---------------------------------------- VectorReinterpret ------------------------------------ 3201 3202 instruct reinterpret(vec dst) %{ 3203 predicate(vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1))); // dst == src 3204 match(Set dst (VectorReinterpret dst)); 3205 ins_cost(125); 3206 format %{ "vector_reinterpret $dst\t!" %} 3207 ins_encode %{ 3208 // empty 3209 %} 3210 ins_pipe( pipe_slow ); 3211 %} 3212 3213 instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ 3214 predicate(UseAVX == 0 && 3215 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3216 match(Set dst (VectorReinterpret src)); 3217 ins_cost(125); 3218 effect(TEMP dst, TEMP scratch); 3219 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3220 ins_encode %{ 3221 assert(vector_length_in_bytes(this) <= 16, "required"); 3222 assert(vector_length_in_bytes(this, $src) <= 8, "required"); 3223 3224 int src_vlen_in_bytes = vector_length_in_bytes(this, $src); 3225 if (src_vlen_in_bytes == 4) { 3226 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3227 } else { 3228 assert(src_vlen_in_bytes == 8, ""); 3229 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 3230 } 3231 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3232 %} 3233 ins_pipe( pipe_slow ); 3234 %} 3235 3236 instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ 3237 predicate(UseAVX > 0 && 3238 (vector_length_in_bytes(n->in(1)) == 4) && // src 3239 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3240 match(Set dst (VectorReinterpret src)); 3241 ins_cost(125); 3242 effect(TEMP scratch); 3243 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3244 ins_encode %{ 3245 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register); 3246 %} 3247 ins_pipe( pipe_slow ); 3248 %} 3249 3250 3251 instruct vreinterpret_expand(legVec dst, vec src) %{ 3252 predicate(UseAVX > 0 && 3253 (vector_length_in_bytes(n->in(1)) > 4) && // src 3254 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3255 match(Set dst (VectorReinterpret src)); 3256 ins_cost(125); 3257 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3258 ins_encode %{ 3259 switch (vector_length_in_bytes(this, $src)) { 3260 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3261 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3262 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3263 default: ShouldNotReachHere(); 3264 } 3265 %} 3266 ins_pipe( pipe_slow ); 3267 %} 3268 3269 instruct reinterpret_shrink(vec dst, legVec src) %{ 3270 predicate(vector_length_in_bytes(n->in(1)) > vector_length_in_bytes(n)); // src > dst 3271 match(Set dst (VectorReinterpret src)); 3272 ins_cost(125); 3273 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3274 ins_encode %{ 3275 switch (vector_length_in_bytes(this)) { 3276 case 4: __ movflt ($dst$$XMMRegister, $src$$XMMRegister); break; 3277 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3278 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3279 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3280 default: ShouldNotReachHere(); 3281 } 3282 %} 3283 ins_pipe( pipe_slow ); 3284 %} 3285 3286 // ---------------------------------------------------------------------------------------------------- 3287 3288 #ifdef _LP64 3289 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3290 match(Set dst (RoundDoubleMode src rmode)); 3291 format %{ "roundsd $dst,$src" %} 3292 ins_cost(150); 3293 ins_encode %{ 3294 assert(UseSSE >= 4, "required"); 3295 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3296 %} 3297 ins_pipe(pipe_slow); 3298 %} 3299 3300 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3301 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3302 format %{ "roundsd $dst,$src" %} 3303 ins_cost(150); 3304 ins_encode %{ 3305 assert(UseSSE >= 4, "required"); 3306 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3307 %} 3308 ins_pipe(pipe_slow); 3309 %} 3310 3311 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 3312 match(Set dst (RoundDoubleMode con rmode)); 3313 effect(TEMP scratch_reg); 3314 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3315 ins_cost(150); 3316 ins_encode %{ 3317 assert(UseSSE >= 4, "required"); 3318 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 3319 %} 3320 ins_pipe(pipe_slow); 3321 %} 3322 3323 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3324 predicate(vector_length(n) < 8); 3325 match(Set dst (RoundDoubleModeV src rmode)); 3326 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3327 ins_encode %{ 3328 assert(UseAVX > 0, "required"); 3329 int vlen_enc = vector_length_encoding(this); 3330 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3331 %} 3332 ins_pipe( pipe_slow ); 3333 %} 3334 3335 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3336 predicate(vector_length(n) == 8); 3337 match(Set dst (RoundDoubleModeV src rmode)); 3338 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3339 ins_encode %{ 3340 assert(UseAVX > 2, "required"); 3341 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3342 %} 3343 ins_pipe( pipe_slow ); 3344 %} 3345 3346 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3347 predicate(vector_length(n) < 8); 3348 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3349 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3350 ins_encode %{ 3351 assert(UseAVX > 0, "required"); 3352 int vlen_enc = vector_length_encoding(this); 3353 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3354 %} 3355 ins_pipe( pipe_slow ); 3356 %} 3357 3358 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3359 predicate(vector_length(n) == 8); 3360 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3361 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3362 ins_encode %{ 3363 assert(UseAVX > 2, "required"); 3364 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3365 %} 3366 ins_pipe( pipe_slow ); 3367 %} 3368 #endif // _LP64 3369 3370 instruct onspinwait() %{ 3371 match(OnSpinWait); 3372 ins_cost(200); 3373 3374 format %{ 3375 $$template 3376 $$emit$$"pause\t! membar_onspinwait" 3377 %} 3378 ins_encode %{ 3379 __ pause(); 3380 %} 3381 ins_pipe(pipe_slow); 3382 %} 3383 3384 // a * b + c 3385 instruct fmaD_reg(regD a, regD b, regD c) %{ 3386 predicate(UseFMA); 3387 match(Set c (FmaD c (Binary a b))); 3388 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3389 ins_cost(150); 3390 ins_encode %{ 3391 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3392 %} 3393 ins_pipe( pipe_slow ); 3394 %} 3395 3396 // a * b + c 3397 instruct fmaF_reg(regF a, regF b, regF c) %{ 3398 predicate(UseFMA); 3399 match(Set c (FmaF c (Binary a b))); 3400 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3401 ins_cost(150); 3402 ins_encode %{ 3403 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3404 %} 3405 ins_pipe( pipe_slow ); 3406 %} 3407 3408 // ====================VECTOR INSTRUCTIONS===================================== 3409 3410 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3411 instruct MoveVec2Leg(legVec dst, vec src) %{ 3412 match(Set dst src); 3413 format %{ "" %} 3414 ins_encode %{ 3415 ShouldNotReachHere(); 3416 %} 3417 ins_pipe( fpu_reg_reg ); 3418 %} 3419 3420 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3421 match(Set dst src); 3422 format %{ "" %} 3423 ins_encode %{ 3424 ShouldNotReachHere(); 3425 %} 3426 ins_pipe( fpu_reg_reg ); 3427 %} 3428 3429 // ============================================================================ 3430 3431 // Load vectors generic operand pattern 3432 instruct loadV(vec dst, memory mem) %{ 3433 match(Set dst (LoadVector mem)); 3434 ins_cost(125); 3435 format %{ "load_vector $dst,$mem" %} 3436 ins_encode %{ 3437 switch (vector_length_in_bytes(this)) { 3438 case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; 3439 case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; 3440 case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; 3441 case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break; 3442 case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break; 3443 default: ShouldNotReachHere(); 3444 } 3445 %} 3446 ins_pipe( pipe_slow ); 3447 %} 3448 3449 // Store vectors generic operand pattern. 3450 instruct storeV(memory mem, vec src) %{ 3451 match(Set mem (StoreVector mem src)); 3452 ins_cost(145); 3453 format %{ "store_vector $mem,$src\n\t" %} 3454 ins_encode %{ 3455 switch (vector_length_in_bytes(this, $src)) { 3456 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3457 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3458 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3459 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3460 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3461 default: ShouldNotReachHere(); 3462 } 3463 %} 3464 ins_pipe( pipe_slow ); 3465 %} 3466 3467 // ---------------------------------------- Gather ------------------------------------ 3468 3469 // Gather INT, LONG, FLOAT, DOUBLE 3470 3471 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3472 predicate(vector_length_in_bytes(n) <= 32); 3473 match(Set dst (LoadVectorGather mem idx)); 3474 effect(TEMP dst, TEMP tmp, TEMP mask); 3475 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3476 ins_encode %{ 3477 assert(UseAVX >= 2, "sanity"); 3478 3479 int vlen_enc = vector_length_encoding(this); 3480 BasicType elem_bt = vector_element_basic_type(this); 3481 3482 assert(vector_length_in_bytes(this) >= 16, "sanity"); 3483 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3484 3485 if (vlen_enc == Assembler::AVX_128bit) { 3486 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3487 } else { 3488 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3489 } 3490 __ lea($tmp$$Register, $mem$$Address); 3491 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3492 %} 3493 ins_pipe( pipe_slow ); 3494 %} 3495 3496 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{ 3497 predicate(vector_length_in_bytes(n) == 64); 3498 match(Set dst (LoadVectorGather mem idx)); 3499 effect(TEMP dst, TEMP tmp); 3500 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %} 3501 ins_encode %{ 3502 assert(UseAVX > 2, "sanity"); 3503 3504 int vlen_enc = vector_length_encoding(this); 3505 BasicType elem_bt = vector_element_basic_type(this); 3506 3507 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3508 3509 KRegister ktmp = k2; 3510 __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3511 __ lea($tmp$$Register, $mem$$Address); 3512 __ evgather(elem_bt, $dst$$XMMRegister, ktmp, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 3513 %} 3514 ins_pipe( pipe_slow ); 3515 %} 3516 3517 // ====================Scatter======================================= 3518 3519 // Scatter INT, LONG, FLOAT, DOUBLE 3520 3521 instruct scatter(memory mem, vec src, vec idx, rRegP tmp) %{ 3522 match(Set mem (StoreVectorScatter mem (Binary src idx))); 3523 effect(TEMP tmp); 3524 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 3525 ins_encode %{ 3526 assert(UseAVX > 2, "sanity"); 3527 3528 int vlen_enc = vector_length_encoding(this, $src); 3529 BasicType elem_bt = vector_element_basic_type(this, $src); 3530 3531 assert(vector_length_in_bytes(this, $src) >= 16, "sanity"); 3532 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3533 3534 KRegister ktmp = k2; 3535 __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3536 __ lea($tmp$$Register, $mem$$Address); 3537 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, ktmp, $src$$XMMRegister, vlen_enc); 3538 %} 3539 ins_pipe( pipe_slow ); 3540 %} 3541 3542 // ====================REPLICATE======================================= 3543 3544 // Replicate byte scalar to be vector 3545 instruct ReplB_reg(vec dst, rRegI src) %{ 3546 match(Set dst (ReplicateB src)); 3547 format %{ "replicateB $dst,$src" %} 3548 ins_encode %{ 3549 uint vlen = vector_length(this); 3550 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3551 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 3552 int vlen_enc = vector_length_encoding(this); 3553 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 3554 } else { 3555 __ movdl($dst$$XMMRegister, $src$$Register); 3556 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3557 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3558 if (vlen >= 16) { 3559 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3560 if (vlen >= 32) { 3561 assert(vlen == 32, "sanity"); 3562 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3563 } 3564 } 3565 } 3566 %} 3567 ins_pipe( pipe_slow ); 3568 %} 3569 3570 instruct ReplB_mem(vec dst, memory mem) %{ 3571 predicate(VM_Version::supports_avx2()); 3572 match(Set dst (ReplicateB (LoadB mem))); 3573 format %{ "replicateB $dst,$mem" %} 3574 ins_encode %{ 3575 int vlen_enc = vector_length_encoding(this); 3576 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 3577 %} 3578 ins_pipe( pipe_slow ); 3579 %} 3580 3581 instruct ReplB_imm(vec dst, immI con) %{ 3582 match(Set dst (ReplicateB con)); 3583 format %{ "replicateB $dst,$con" %} 3584 ins_encode %{ 3585 uint vlen = vector_length(this); 3586 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1)); 3587 if (vlen == 4) { 3588 __ movdl($dst$$XMMRegister, const_addr); 3589 } else { 3590 __ movq($dst$$XMMRegister, const_addr); 3591 if (vlen >= 16) { 3592 if (VM_Version::supports_avx2()) { 3593 int vlen_enc = vector_length_encoding(this); 3594 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3595 } else { 3596 assert(vlen == 16, "sanity"); 3597 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3598 } 3599 } 3600 } 3601 %} 3602 ins_pipe( pipe_slow ); 3603 %} 3604 3605 // Replicate byte scalar zero to be vector 3606 instruct ReplB_zero(vec dst, immI_0 zero) %{ 3607 match(Set dst (ReplicateB zero)); 3608 format %{ "replicateB $dst,$zero" %} 3609 ins_encode %{ 3610 uint vlen = vector_length(this); 3611 if (vlen <= 16) { 3612 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3613 } else { 3614 // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ). 3615 int vlen_enc = vector_length_encoding(this); 3616 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3617 } 3618 %} 3619 ins_pipe( fpu_reg_reg ); 3620 %} 3621 3622 // ====================ReplicateS======================================= 3623 3624 instruct ReplS_reg(vec dst, rRegI src) %{ 3625 match(Set dst (ReplicateS src)); 3626 format %{ "replicateS $dst,$src" %} 3627 ins_encode %{ 3628 uint vlen = vector_length(this); 3629 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3630 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 3631 int vlen_enc = vector_length_encoding(this); 3632 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 3633 } else { 3634 __ movdl($dst$$XMMRegister, $src$$Register); 3635 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3636 if (vlen >= 8) { 3637 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3638 if (vlen >= 16) { 3639 assert(vlen == 16, "sanity"); 3640 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3641 } 3642 } 3643 } 3644 %} 3645 ins_pipe( pipe_slow ); 3646 %} 3647 3648 instruct ReplS_mem(vec dst, memory mem) %{ 3649 predicate(VM_Version::supports_avx2()); 3650 match(Set dst (ReplicateS (LoadS mem))); 3651 format %{ "replicateS $dst,$mem" %} 3652 ins_encode %{ 3653 int vlen_enc = vector_length_encoding(this); 3654 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 3655 %} 3656 ins_pipe( pipe_slow ); 3657 %} 3658 3659 instruct ReplS_imm(vec dst, immI con) %{ 3660 match(Set dst (ReplicateS con)); 3661 format %{ "replicateS $dst,$con" %} 3662 ins_encode %{ 3663 uint vlen = vector_length(this); 3664 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2)); 3665 if (vlen == 2) { 3666 __ movdl($dst$$XMMRegister, const_addr); 3667 } else { 3668 __ movq($dst$$XMMRegister, const_addr); 3669 if (vlen >= 8) { 3670 if (VM_Version::supports_avx2()) { 3671 int vlen_enc = vector_length_encoding(this); 3672 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3673 } else { 3674 assert(vlen == 8, "sanity"); 3675 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3676 } 3677 } 3678 } 3679 %} 3680 ins_pipe( fpu_reg_reg ); 3681 %} 3682 3683 instruct ReplS_zero(vec dst, immI_0 zero) %{ 3684 match(Set dst (ReplicateS zero)); 3685 format %{ "replicateS $dst,$zero" %} 3686 ins_encode %{ 3687 uint vlen = vector_length(this); 3688 if (vlen <= 8) { 3689 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3690 } else { 3691 int vlen_enc = vector_length_encoding(this); 3692 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3693 } 3694 %} 3695 ins_pipe( fpu_reg_reg ); 3696 %} 3697 3698 // ====================ReplicateI======================================= 3699 3700 instruct ReplI_reg(vec dst, rRegI src) %{ 3701 match(Set dst (ReplicateI src)); 3702 format %{ "replicateI $dst,$src" %} 3703 ins_encode %{ 3704 uint vlen = vector_length(this); 3705 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3706 int vlen_enc = vector_length_encoding(this); 3707 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 3708 } else { 3709 __ movdl($dst$$XMMRegister, $src$$Register); 3710 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3711 if (vlen >= 8) { 3712 assert(vlen == 8, "sanity"); 3713 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3714 } 3715 } 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 instruct ReplI_mem(vec dst, memory mem) %{ 3721 match(Set dst (ReplicateI (LoadI mem))); 3722 format %{ "replicateI $dst,$mem" %} 3723 ins_encode %{ 3724 uint vlen = vector_length(this); 3725 if (vlen <= 4) { 3726 __ movdl($dst$$XMMRegister, $mem$$Address); 3727 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3728 } else { 3729 assert(VM_Version::supports_avx2(), "sanity"); 3730 int vlen_enc = vector_length_encoding(this); 3731 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 3732 } 3733 %} 3734 ins_pipe( pipe_slow ); 3735 %} 3736 3737 instruct ReplI_imm(vec dst, immI con) %{ 3738 match(Set dst (ReplicateI con)); 3739 format %{ "replicateI $dst,$con" %} 3740 ins_encode %{ 3741 uint vlen = vector_length(this); 3742 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4)); 3743 if (vlen <= 4) { 3744 __ movq($dst$$XMMRegister, const_addr); 3745 if (vlen == 4) { 3746 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3747 } 3748 } else { 3749 assert(VM_Version::supports_avx2(), "sanity"); 3750 int vlen_enc = vector_length_encoding(this); 3751 __ movq($dst$$XMMRegister, const_addr); 3752 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3753 } 3754 %} 3755 ins_pipe( pipe_slow ); 3756 %} 3757 3758 // Replicate integer (4 byte) scalar zero to be vector 3759 instruct ReplI_zero(vec dst, immI_0 zero) %{ 3760 match(Set dst (ReplicateI zero)); 3761 format %{ "replicateI $dst,$zero" %} 3762 ins_encode %{ 3763 uint vlen = vector_length(this); 3764 if (vlen <= 4) { 3765 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3766 } else { 3767 int vlen_enc = vector_length_encoding(this); 3768 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3769 } 3770 %} 3771 ins_pipe( fpu_reg_reg ); 3772 %} 3773 3774 // ====================ReplicateL======================================= 3775 3776 #ifdef _LP64 3777 // Replicate long (8 byte) scalar to be vector 3778 instruct ReplL_reg(vec dst, rRegL src) %{ 3779 match(Set dst (ReplicateL src)); 3780 format %{ "replicateL $dst,$src" %} 3781 ins_encode %{ 3782 uint vlen = vector_length(this); 3783 if (vlen == 2) { 3784 __ movdq($dst$$XMMRegister, $src$$Register); 3785 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3786 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3787 int vlen_enc = vector_length_encoding(this); 3788 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 3789 } else { 3790 assert(vlen == 4, "sanity"); 3791 __ movdq($dst$$XMMRegister, $src$$Register); 3792 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3793 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3794 } 3795 %} 3796 ins_pipe( pipe_slow ); 3797 %} 3798 #else // _LP64 3799 // Replicate long (8 byte) scalar to be vector 3800 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 3801 predicate(vector_length(n) <= 4); 3802 match(Set dst (ReplicateL src)); 3803 effect(TEMP dst, USE src, TEMP tmp); 3804 format %{ "replicateL $dst,$src" %} 3805 ins_encode %{ 3806 uint vlen = vector_length(this); 3807 if (vlen == 2) { 3808 __ movdl($dst$$XMMRegister, $src$$Register); 3809 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3810 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3811 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3812 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3813 int vlen_enc = Assembler::AVX_256bit; 3814 __ movdl($dst$$XMMRegister, $src$$Register); 3815 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3816 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3817 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3818 } else { 3819 __ movdl($dst$$XMMRegister, $src$$Register); 3820 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3821 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3822 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3823 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3824 } 3825 %} 3826 ins_pipe( pipe_slow ); 3827 %} 3828 3829 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 3830 predicate(vector_length(n) == 8); 3831 match(Set dst (ReplicateL src)); 3832 effect(TEMP dst, USE src, TEMP tmp); 3833 format %{ "replicateL $dst,$src" %} 3834 ins_encode %{ 3835 if (VM_Version::supports_avx512vl()) { 3836 __ movdl($dst$$XMMRegister, $src$$Register); 3837 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3838 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3839 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3840 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3841 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3842 } else { 3843 int vlen_enc = Assembler::AVX_512bit; 3844 __ movdl($dst$$XMMRegister, $src$$Register); 3845 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3846 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3847 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3848 } 3849 %} 3850 ins_pipe( pipe_slow ); 3851 %} 3852 #endif // _LP64 3853 3854 instruct ReplL_mem(vec dst, memory mem) %{ 3855 match(Set dst (ReplicateL (LoadL mem))); 3856 format %{ "replicateL $dst,$mem" %} 3857 ins_encode %{ 3858 uint vlen = vector_length(this); 3859 if (vlen == 2) { 3860 __ movq($dst$$XMMRegister, $mem$$Address); 3861 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3862 } else { 3863 assert(VM_Version::supports_avx2(), "sanity"); 3864 int vlen_enc = vector_length_encoding(this); 3865 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 3866 } 3867 %} 3868 ins_pipe( pipe_slow ); 3869 %} 3870 3871 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3872 instruct ReplL_imm(vec dst, immL con) %{ 3873 match(Set dst (ReplicateL con)); 3874 format %{ "replicateL $dst,$con" %} 3875 ins_encode %{ 3876 uint vlen = vector_length(this); 3877 InternalAddress const_addr = $constantaddress($con); 3878 if (vlen == 2) { 3879 __ movq($dst$$XMMRegister, const_addr); 3880 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3881 } else { 3882 assert(VM_Version::supports_avx2(), "sanity"); 3883 int vlen_enc = vector_length_encoding(this); 3884 __ movq($dst$$XMMRegister, const_addr); 3885 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3886 } 3887 %} 3888 ins_pipe( pipe_slow ); 3889 %} 3890 3891 instruct ReplL_zero(vec dst, immL0 zero) %{ 3892 match(Set dst (ReplicateL zero)); 3893 format %{ "replicateL $dst,$zero" %} 3894 ins_encode %{ 3895 int vlen = vector_length(this); 3896 if (vlen == 2) { 3897 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3898 } else { 3899 int vlen_enc = vector_length_encoding(this); 3900 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3901 } 3902 %} 3903 ins_pipe( fpu_reg_reg ); 3904 %} 3905 3906 // ====================ReplicateF======================================= 3907 3908 instruct ReplF_reg(vec dst, vlRegF src) %{ 3909 match(Set dst (ReplicateF src)); 3910 format %{ "replicateF $dst,$src" %} 3911 ins_encode %{ 3912 uint vlen = vector_length(this); 3913 if (vlen <= 4) { 3914 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3915 } else if (VM_Version::supports_avx2()) { 3916 int vlen_enc = vector_length_encoding(this); 3917 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 3918 } else { 3919 assert(vlen == 8, "sanity"); 3920 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3921 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3922 } 3923 %} 3924 ins_pipe( pipe_slow ); 3925 %} 3926 3927 instruct ReplF_mem(vec dst, memory mem) %{ 3928 match(Set dst (ReplicateF (LoadF mem))); 3929 format %{ "replicateF $dst,$mem" %} 3930 ins_encode %{ 3931 uint vlen = vector_length(this); 3932 if (vlen <= 4) { 3933 __ movdl($dst$$XMMRegister, $mem$$Address); 3934 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3935 } else { 3936 assert(VM_Version::supports_avx(), "sanity"); 3937 int vlen_enc = vector_length_encoding(this); 3938 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 3939 } 3940 %} 3941 ins_pipe( pipe_slow ); 3942 %} 3943 3944 instruct ReplF_zero(vec dst, immF0 zero) %{ 3945 match(Set dst (ReplicateF zero)); 3946 format %{ "replicateF $dst,$zero" %} 3947 ins_encode %{ 3948 uint vlen = vector_length(this); 3949 if (vlen <= 4) { 3950 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3951 } else { 3952 int vlen_enc = vector_length_encoding(this); 3953 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 3954 } 3955 %} 3956 ins_pipe( fpu_reg_reg ); 3957 %} 3958 3959 // ====================ReplicateD======================================= 3960 3961 // Replicate double (8 bytes) scalar to be vector 3962 instruct ReplD_reg(vec dst, vlRegD src) %{ 3963 match(Set dst (ReplicateD src)); 3964 format %{ "replicateD $dst,$src" %} 3965 ins_encode %{ 3966 uint vlen = vector_length(this); 3967 if (vlen == 2) { 3968 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3969 } else if (VM_Version::supports_avx2()) { 3970 int vlen_enc = vector_length_encoding(this); 3971 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 3972 } else { 3973 assert(vlen == 4, "sanity"); 3974 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3975 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3976 } 3977 %} 3978 ins_pipe( pipe_slow ); 3979 %} 3980 3981 instruct ReplD_mem(vec dst, memory mem) %{ 3982 match(Set dst (ReplicateD (LoadD mem))); 3983 format %{ "replicateD $dst,$mem" %} 3984 ins_encode %{ 3985 uint vlen = vector_length(this); 3986 if (vlen == 2) { 3987 __ movq($dst$$XMMRegister, $mem$$Address); 3988 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 3989 } else { 3990 assert(VM_Version::supports_avx(), "sanity"); 3991 int vlen_enc = vector_length_encoding(this); 3992 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 3993 } 3994 %} 3995 ins_pipe( pipe_slow ); 3996 %} 3997 3998 instruct ReplD_zero(vec dst, immD0 zero) %{ 3999 match(Set dst (ReplicateD zero)); 4000 format %{ "replicateD $dst,$zero" %} 4001 ins_encode %{ 4002 uint vlen = vector_length(this); 4003 if (vlen == 2) { 4004 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 4005 } else { 4006 int vlen_enc = vector_length_encoding(this); 4007 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4008 } 4009 %} 4010 ins_pipe( fpu_reg_reg ); 4011 %} 4012 4013 // ====================VECTOR INSERT======================================= 4014 4015 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4016 predicate(vector_length_in_bytes(n) >= 8 && 4017 vector_length_in_bytes(n) <= 16); 4018 match(Set dst (VectorInsert (Binary dst val) idx)); 4019 format %{ "vector_insert $dst,$val,$idx" %} 4020 ins_encode %{ 4021 assert(UseSSE >= 4, "required"); 4022 4023 BasicType elem_bt = vector_element_basic_type(this); 4024 4025 assert(is_integral_type(elem_bt), ""); 4026 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4027 4028 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4029 %} 4030 ins_pipe( pipe_slow ); 4031 %} 4032 4033 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4034 predicate(vector_length_in_bytes(n) == 32); 4035 match(Set dst (VectorInsert (Binary src val) idx)); 4036 effect(TEMP vtmp); 4037 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4038 ins_encode %{ 4039 int vlen_enc = Assembler::AVX_256bit; 4040 BasicType elem_bt = vector_element_basic_type(this); 4041 int elem_per_lane = 16/type2aelembytes(elem_bt); 4042 int log2epr = log2(elem_per_lane); 4043 4044 assert(is_integral_type(elem_bt), "sanity"); 4045 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4046 4047 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4048 uint y_idx = ($idx$$constant >> log2epr) & 1; 4049 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4050 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4051 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4052 %} 4053 ins_pipe( pipe_slow ); 4054 %} 4055 4056 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4057 predicate(vector_length_in_bytes(n) == 64); 4058 match(Set dst (VectorInsert (Binary src val) idx)); 4059 effect(TEMP vtmp); 4060 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4061 ins_encode %{ 4062 assert(UseAVX > 2, "sanity"); 4063 4064 BasicType elem_bt = vector_element_basic_type(this); 4065 int elem_per_lane = 16/type2aelembytes(elem_bt); 4066 int log2epr = log2(elem_per_lane); 4067 4068 assert(is_integral_type(elem_bt), ""); 4069 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4070 4071 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4072 uint y_idx = ($idx$$constant >> log2epr) & 3; 4073 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4074 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4075 __ vinserti32x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4076 %} 4077 ins_pipe( pipe_slow ); 4078 %} 4079 4080 #ifdef _LP64 4081 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4082 predicate(vector_length(n) == 2); 4083 match(Set dst (VectorInsert (Binary dst val) idx)); 4084 format %{ "vector_insert $dst,$val,$idx" %} 4085 ins_encode %{ 4086 assert(UseSSE >= 4, "required"); 4087 assert(vector_element_basic_type(this) == T_LONG, ""); 4088 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4089 4090 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4091 %} 4092 ins_pipe( pipe_slow ); 4093 %} 4094 4095 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4096 predicate(vector_length(n) == 4); 4097 match(Set dst (VectorInsert (Binary src val) idx)); 4098 effect(TEMP vtmp); 4099 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4100 ins_encode %{ 4101 assert(vector_element_basic_type(this) == T_LONG, ""); 4102 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4103 4104 uint x_idx = $idx$$constant & right_n_bits(1); 4105 uint y_idx = ($idx$$constant >> 1) & 1; 4106 int vlen_enc = Assembler::AVX_256bit; 4107 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4108 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4109 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 4114 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4115 predicate(vector_length(n) == 8); 4116 match(Set dst (VectorInsert (Binary src val) idx)); 4117 effect(TEMP vtmp); 4118 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4119 ins_encode %{ 4120 assert(vector_element_basic_type(this) == T_LONG, "sanity"); 4121 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4122 4123 uint x_idx = $idx$$constant & right_n_bits(1); 4124 uint y_idx = ($idx$$constant >> 1) & 3; 4125 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4126 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4127 __ vinserti32x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4128 %} 4129 ins_pipe( pipe_slow ); 4130 %} 4131 #endif 4132 4133 instruct insertF(vec dst, regF val, immU8 idx) %{ 4134 predicate(vector_length(n) >= 2 && 4135 vector_length(n) <= 4); 4136 match(Set dst (VectorInsert (Binary dst val) idx)); 4137 format %{ "vector_insert $dst,$val,$idx" %} 4138 ins_encode %{ 4139 assert(UseSSE >= 4, "sanity"); 4140 4141 assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); 4142 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4143 4144 __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 4145 %} 4146 ins_pipe( pipe_slow ); 4147 %} 4148 4149 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4150 predicate(vector_length(n) >= 8); 4151 match(Set dst (VectorInsert (Binary src val) idx)); 4152 effect(TEMP vtmp); 4153 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4154 ins_encode %{ 4155 assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); 4156 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4157 4158 int vlen = vector_length(this); 4159 uint x_idx = $idx$$constant & right_n_bits(2); 4160 if (vlen == 8) { 4161 uint y_idx = ($idx$$constant >> 2) & 1; 4162 int vlen_enc = Assembler::AVX_256bit; 4163 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4164 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx); 4165 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4166 } else { 4167 assert(vlen == 16, "sanity"); 4168 uint y_idx = ($idx$$constant >> 2) & 3; 4169 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4170 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx); 4171 __ vinserti32x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4172 } 4173 %} 4174 ins_pipe( pipe_slow ); 4175 %} 4176 4177 #ifdef _LP64 4178 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4179 predicate(vector_length(n) == 2); 4180 match(Set dst (VectorInsert (Binary dst val) idx)); 4181 effect(TEMP tmp); 4182 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4183 ins_encode %{ 4184 assert(UseSSE >= 4, "sanity"); 4185 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4186 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4187 4188 __ movq($tmp$$Register, $val$$XMMRegister); 4189 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4190 %} 4191 ins_pipe( pipe_slow ); 4192 %} 4193 4194 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4195 predicate(vector_length(n) == 4); 4196 match(Set dst (VectorInsert (Binary src val) idx)); 4197 effect(TEMP vtmp, TEMP tmp); 4198 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4199 ins_encode %{ 4200 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4201 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4202 4203 uint x_idx = $idx$$constant & right_n_bits(1); 4204 uint y_idx = ($idx$$constant >> 1) & 1; 4205 int vlen_enc = Assembler::AVX_256bit; 4206 __ movq($tmp$$Register, $val$$XMMRegister); 4207 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4208 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4209 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4210 %} 4211 ins_pipe( pipe_slow ); 4212 %} 4213 4214 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4215 predicate(vector_length(n) == 8); 4216 match(Set dst (VectorInsert (Binary src val) idx)); 4217 effect(TEMP tmp, TEMP vtmp); 4218 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4219 ins_encode %{ 4220 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4221 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4222 4223 uint x_idx = $idx$$constant & right_n_bits(1); 4224 uint y_idx = ($idx$$constant >> 1) & 3; 4225 __ movq($tmp$$Register, $val$$XMMRegister); 4226 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4227 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4228 __ vinserti32x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4229 %} 4230 ins_pipe( pipe_slow ); 4231 %} 4232 #endif 4233 4234 // ====================REDUCTION ARITHMETIC======================================= 4235 4236 // =======================Int Reduction========================================== 4237 4238 instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4239 predicate(vector_element_basic_type(n->in(2)) == T_INT && 4240 vector_length(n->in(2)) < 16); // src2 4241 match(Set dst (AddReductionVI src1 src2)); 4242 match(Set dst (MulReductionVI src1 src2)); 4243 match(Set dst (AndReductionV src1 src2)); 4244 match(Set dst ( OrReductionV src1 src2)); 4245 match(Set dst (XorReductionV src1 src2)); 4246 match(Set dst (MinReductionV src1 src2)); 4247 match(Set dst (MaxReductionV src1 src2)); 4248 effect(TEMP vtmp1, TEMP vtmp2); 4249 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4250 ins_encode %{ 4251 int opcode = this->ideal_Opcode(); 4252 int vlen = vector_length(this, $src2); 4253 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4254 %} 4255 ins_pipe( pipe_slow ); 4256 %} 4257 4258 instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4259 predicate(vector_element_basic_type(n->in(2)) == T_INT && 4260 vector_length(n->in(2)) == 16); // src2 4261 match(Set dst (AddReductionVI src1 src2)); 4262 match(Set dst (MulReductionVI src1 src2)); 4263 match(Set dst (AndReductionV src1 src2)); 4264 match(Set dst ( OrReductionV src1 src2)); 4265 match(Set dst (XorReductionV src1 src2)); 4266 match(Set dst (MinReductionV src1 src2)); 4267 match(Set dst (MaxReductionV src1 src2)); 4268 effect(TEMP vtmp1, TEMP vtmp2); 4269 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4270 ins_encode %{ 4271 int opcode = this->ideal_Opcode(); 4272 int vlen = vector_length(this, $src2); 4273 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4274 %} 4275 ins_pipe( pipe_slow ); 4276 %} 4277 4278 // =======================Long Reduction========================================== 4279 4280 #ifdef _LP64 4281 instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4282 predicate(vector_element_basic_type(n->in(2)) == T_LONG && 4283 vector_length(n->in(2)) < 8); // src2 4284 match(Set dst (AddReductionVL src1 src2)); 4285 match(Set dst (MulReductionVL src1 src2)); 4286 match(Set dst (AndReductionV src1 src2)); 4287 match(Set dst ( OrReductionV src1 src2)); 4288 match(Set dst (XorReductionV src1 src2)); 4289 match(Set dst (MinReductionV src1 src2)); 4290 match(Set dst (MaxReductionV src1 src2)); 4291 effect(TEMP vtmp1, TEMP vtmp2); 4292 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4293 ins_encode %{ 4294 int opcode = this->ideal_Opcode(); 4295 int vlen = vector_length(this, $src2); 4296 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4297 %} 4298 ins_pipe( pipe_slow ); 4299 %} 4300 4301 instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4302 predicate(vector_element_basic_type(n->in(2)) == T_LONG && 4303 vector_length(n->in(2)) == 8); // src2 4304 match(Set dst (AddReductionVL src1 src2)); 4305 match(Set dst (MulReductionVL src1 src2)); 4306 match(Set dst (AndReductionV src1 src2)); 4307 match(Set dst ( OrReductionV src1 src2)); 4308 match(Set dst (XorReductionV src1 src2)); 4309 match(Set dst (MinReductionV src1 src2)); 4310 match(Set dst (MaxReductionV src1 src2)); 4311 effect(TEMP vtmp1, TEMP vtmp2); 4312 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4313 ins_encode %{ 4314 int opcode = this->ideal_Opcode(); 4315 int vlen = vector_length(this, $src2); 4316 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4317 %} 4318 ins_pipe( pipe_slow ); 4319 %} 4320 #endif // _LP64 4321 4322 // =======================Float Reduction========================================== 4323 4324 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4325 predicate(vector_length(n->in(2)) <= 4); // src 4326 match(Set dst (AddReductionVF dst src)); 4327 match(Set dst (MulReductionVF dst src)); 4328 effect(TEMP dst, TEMP vtmp); 4329 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4330 ins_encode %{ 4331 int opcode = this->ideal_Opcode(); 4332 int vlen = vector_length(this, $src); 4333 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4334 %} 4335 ins_pipe( pipe_slow ); 4336 %} 4337 4338 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4339 predicate(vector_length(n->in(2)) == 8); // src 4340 match(Set dst (AddReductionVF dst src)); 4341 match(Set dst (MulReductionVF dst src)); 4342 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4343 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4344 ins_encode %{ 4345 int opcode = this->ideal_Opcode(); 4346 int vlen = vector_length(this, $src); 4347 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4353 predicate(vector_length(n->in(2)) == 16); // src 4354 match(Set dst (AddReductionVF dst src)); 4355 match(Set dst (MulReductionVF dst src)); 4356 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4357 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4358 ins_encode %{ 4359 int opcode = this->ideal_Opcode(); 4360 int vlen = vector_length(this, $src); 4361 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4362 %} 4363 ins_pipe( pipe_slow ); 4364 %} 4365 4366 // =======================Double Reduction========================================== 4367 4368 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4369 predicate(vector_length(n->in(2)) == 2); // src 4370 match(Set dst (AddReductionVD dst src)); 4371 match(Set dst (MulReductionVD dst src)); 4372 effect(TEMP dst, TEMP vtmp); 4373 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4374 ins_encode %{ 4375 int opcode = this->ideal_Opcode(); 4376 int vlen = vector_length(this, $src); 4377 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4378 %} 4379 ins_pipe( pipe_slow ); 4380 %} 4381 4382 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4383 predicate(vector_length(n->in(2)) == 4); // src 4384 match(Set dst (AddReductionVD dst src)); 4385 match(Set dst (MulReductionVD dst src)); 4386 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4387 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4388 ins_encode %{ 4389 int opcode = this->ideal_Opcode(); 4390 int vlen = vector_length(this, $src); 4391 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4392 %} 4393 ins_pipe( pipe_slow ); 4394 %} 4395 4396 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4397 predicate(vector_length(n->in(2)) == 8); // src 4398 match(Set dst (AddReductionVD dst src)); 4399 match(Set dst (MulReductionVD dst src)); 4400 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4401 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4402 ins_encode %{ 4403 int opcode = this->ideal_Opcode(); 4404 int vlen = vector_length(this, $src); 4405 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4406 %} 4407 ins_pipe( pipe_slow ); 4408 %} 4409 4410 // =======================Byte Reduction========================================== 4411 4412 #ifdef _LP64 4413 instruct reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4414 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4415 vector_length(n->in(2)) <= 32); // src2 4416 match(Set dst (AddReductionVI src1 src2)); 4417 match(Set dst (AndReductionV src1 src2)); 4418 match(Set dst ( OrReductionV src1 src2)); 4419 match(Set dst (XorReductionV src1 src2)); 4420 match(Set dst (MinReductionV src1 src2)); 4421 match(Set dst (MaxReductionV src1 src2)); 4422 effect(TEMP vtmp1, TEMP vtmp2); 4423 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4424 ins_encode %{ 4425 int opcode = this->ideal_Opcode(); 4426 int vlen = vector_length(this, $src2); 4427 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4428 %} 4429 ins_pipe( pipe_slow ); 4430 %} 4431 4432 instruct reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4433 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4434 vector_length(n->in(2)) == 64); // src2 4435 match(Set dst (AddReductionVI src1 src2)); 4436 match(Set dst (AndReductionV src1 src2)); 4437 match(Set dst ( OrReductionV src1 src2)); 4438 match(Set dst (XorReductionV src1 src2)); 4439 match(Set dst (MinReductionV src1 src2)); 4440 match(Set dst (MaxReductionV src1 src2)); 4441 effect(TEMP vtmp1, TEMP vtmp2); 4442 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4443 ins_encode %{ 4444 int opcode = this->ideal_Opcode(); 4445 int vlen = vector_length(this, $src2); 4446 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4447 %} 4448 ins_pipe( pipe_slow ); 4449 %} 4450 #endif 4451 4452 // =======================Short Reduction========================================== 4453 4454 instruct reductionS(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4455 predicate(vector_element_basic_type(n->in(2)) == T_SHORT && 4456 vector_length(n->in(2)) <= 16); // src2 4457 match(Set dst (AddReductionVI src1 src2)); 4458 match(Set dst (MulReductionVI src1 src2)); 4459 match(Set dst (AndReductionV src1 src2)); 4460 match(Set dst ( OrReductionV src1 src2)); 4461 match(Set dst (XorReductionV src1 src2)); 4462 match(Set dst (MinReductionV src1 src2)); 4463 match(Set dst (MaxReductionV src1 src2)); 4464 effect(TEMP vtmp1, TEMP vtmp2); 4465 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4466 ins_encode %{ 4467 int opcode = this->ideal_Opcode(); 4468 int vlen = vector_length(this, $src2); 4469 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4470 %} 4471 ins_pipe( pipe_slow ); 4472 %} 4473 4474 instruct reduction32S(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4475 predicate(vector_element_basic_type(n->in(2)) == T_SHORT && 4476 vector_length(n->in(2)) == 32); // src2 4477 match(Set dst (AddReductionVI src1 src2)); 4478 match(Set dst (MulReductionVI src1 src2)); 4479 match(Set dst (AndReductionV src1 src2)); 4480 match(Set dst ( OrReductionV src1 src2)); 4481 match(Set dst (XorReductionV src1 src2)); 4482 match(Set dst (MinReductionV src1 src2)); 4483 match(Set dst (MaxReductionV src1 src2)); 4484 effect(TEMP vtmp1, TEMP vtmp2); 4485 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4486 ins_encode %{ 4487 int opcode = this->ideal_Opcode(); 4488 int vlen = vector_length(this, $src2); 4489 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4490 %} 4491 ins_pipe( pipe_slow ); 4492 %} 4493 4494 // =======================Mul Reduction========================================== 4495 4496 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4497 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4498 vector_length(n->in(2)) <= 32); // src2 4499 match(Set dst (MulReductionVI src1 src2)); 4500 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4501 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4502 ins_encode %{ 4503 int opcode = this->ideal_Opcode(); 4504 int vlen = vector_length(this, $src2); 4505 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4506 %} 4507 ins_pipe( pipe_slow ); 4508 %} 4509 4510 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4511 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4512 vector_length(n->in(2)) == 64); // src2 4513 match(Set dst (MulReductionVI src1 src2)); 4514 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4515 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4516 ins_encode %{ 4517 int opcode = this->ideal_Opcode(); 4518 int vlen = vector_length(this, $src2); 4519 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4520 %} 4521 ins_pipe( pipe_slow ); 4522 %} 4523 4524 //--------------------Min/Max Float Reduction -------------------- 4525 // Float Min Reduction 4526 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 4527 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4528 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4529 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4530 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4531 vector_length(n->in(2)) == 2); 4532 match(Set dst (MinReductionV src1 src2)); 4533 match(Set dst (MaxReductionV src1 src2)); 4534 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4535 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4536 ins_encode %{ 4537 assert(UseAVX > 0, "sanity"); 4538 4539 int opcode = this->ideal_Opcode(); 4540 int vlen = vector_length(this, $src2); 4541 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4542 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4543 %} 4544 ins_pipe( pipe_slow ); 4545 %} 4546 4547 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 4548 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4549 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4550 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4551 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4552 vector_length(n->in(2)) >= 4); 4553 match(Set dst (MinReductionV src1 src2)); 4554 match(Set dst (MaxReductionV src1 src2)); 4555 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4556 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4557 ins_encode %{ 4558 assert(UseAVX > 0, "sanity"); 4559 4560 int opcode = this->ideal_Opcode(); 4561 int vlen = vector_length(this, $src2); 4562 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4563 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4564 %} 4565 ins_pipe( pipe_slow ); 4566 %} 4567 4568 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 4569 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4570 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4571 vector_length(n->in(2)) == 2); 4572 match(Set dst (MinReductionV dst src)); 4573 match(Set dst (MaxReductionV dst src)); 4574 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4575 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4576 ins_encode %{ 4577 assert(UseAVX > 0, "sanity"); 4578 4579 int opcode = this->ideal_Opcode(); 4580 int vlen = vector_length(this, $src); 4581 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 4582 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4583 %} 4584 ins_pipe( pipe_slow ); 4585 %} 4586 4587 4588 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 4589 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4590 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4591 vector_length(n->in(2)) >= 4); 4592 match(Set dst (MinReductionV dst src)); 4593 match(Set dst (MaxReductionV dst src)); 4594 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4595 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4596 ins_encode %{ 4597 assert(UseAVX > 0, "sanity"); 4598 4599 int opcode = this->ideal_Opcode(); 4600 int vlen = vector_length(this, $src); 4601 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 4602 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4603 %} 4604 ins_pipe( pipe_slow ); 4605 %} 4606 4607 4608 //--------------------Min Double Reduction -------------------- 4609 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 4610 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 4611 rFlagsReg cr) %{ 4612 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4613 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 4614 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 4615 vector_length(n->in(2)) == 2); 4616 match(Set dst (MinReductionV src1 src2)); 4617 match(Set dst (MaxReductionV src1 src2)); 4618 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 4619 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 4620 ins_encode %{ 4621 assert(UseAVX > 0, "sanity"); 4622 4623 int opcode = this->ideal_Opcode(); 4624 int vlen = vector_length(this, $src2); 4625 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 4626 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 4627 %} 4628 ins_pipe( pipe_slow ); 4629 %} 4630 4631 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 4632 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 4633 rFlagsReg cr) %{ 4634 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4635 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 4636 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 4637 vector_length(n->in(2)) >= 4); 4638 match(Set dst (MinReductionV src1 src2)); 4639 match(Set dst (MaxReductionV src1 src2)); 4640 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 4641 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 4642 ins_encode %{ 4643 assert(UseAVX > 0, "sanity"); 4644 4645 int opcode = this->ideal_Opcode(); 4646 int vlen = vector_length(this, $src2); 4647 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 4648 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 4649 %} 4650 ins_pipe( pipe_slow ); 4651 %} 4652 4653 4654 instruct minmax_reduction2D_av(legRegD dst, legVec src, 4655 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 4656 rFlagsReg cr) %{ 4657 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4658 vector_length(n->in(2)) == 2); 4659 match(Set dst (MinReductionV dst src)); 4660 match(Set dst (MaxReductionV dst src)); 4661 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 4662 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 4663 ins_encode %{ 4664 assert(UseAVX > 0, "sanity"); 4665 4666 int opcode = this->ideal_Opcode(); 4667 int vlen = vector_length(this, $src); 4668 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 4669 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 4670 %} 4671 ins_pipe( pipe_slow ); 4672 %} 4673 4674 instruct minmax_reductionD_av(legRegD dst, legVec src, 4675 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 4676 rFlagsReg cr) %{ 4677 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4678 vector_length(n->in(2)) >= 4); 4679 match(Set dst (MinReductionV dst src)); 4680 match(Set dst (MaxReductionV dst src)); 4681 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 4682 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 4683 ins_encode %{ 4684 assert(UseAVX > 0, "sanity"); 4685 4686 int opcode = this->ideal_Opcode(); 4687 int vlen = vector_length(this, $src); 4688 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 4689 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 4690 %} 4691 ins_pipe( pipe_slow ); 4692 %} 4693 4694 // ====================VECTOR ARITHMETIC======================================= 4695 4696 // --------------------------------- ADD -------------------------------------- 4697 4698 // Bytes vector add 4699 instruct vaddB(vec dst, vec src) %{ 4700 predicate(UseAVX == 0); 4701 match(Set dst (AddVB dst src)); 4702 format %{ "paddb $dst,$src\t! add packedB" %} 4703 ins_encode %{ 4704 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4705 %} 4706 ins_pipe( pipe_slow ); 4707 %} 4708 4709 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 4710 predicate(UseAVX > 0); 4711 match(Set dst (AddVB src1 src2)); 4712 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 4713 ins_encode %{ 4714 int vlen_enc = vector_length_encoding(this); 4715 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4716 %} 4717 ins_pipe( pipe_slow ); 4718 %} 4719 4720 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 4721 predicate(UseAVX > 0); 4722 match(Set dst (AddVB src (LoadVector mem))); 4723 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 4724 ins_encode %{ 4725 int vlen_enc = vector_length_encoding(this); 4726 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4727 %} 4728 ins_pipe( pipe_slow ); 4729 %} 4730 4731 // Shorts/Chars vector add 4732 instruct vaddS(vec dst, vec src) %{ 4733 predicate(UseAVX == 0); 4734 match(Set dst (AddVS dst src)); 4735 format %{ "paddw $dst,$src\t! add packedS" %} 4736 ins_encode %{ 4737 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 4738 %} 4739 ins_pipe( pipe_slow ); 4740 %} 4741 4742 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 4743 predicate(UseAVX > 0); 4744 match(Set dst (AddVS src1 src2)); 4745 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 4746 ins_encode %{ 4747 int vlen_enc = vector_length_encoding(this); 4748 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4749 %} 4750 ins_pipe( pipe_slow ); 4751 %} 4752 4753 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 4754 predicate(UseAVX > 0); 4755 match(Set dst (AddVS src (LoadVector mem))); 4756 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 4757 ins_encode %{ 4758 int vlen_enc = vector_length_encoding(this); 4759 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4760 %} 4761 ins_pipe( pipe_slow ); 4762 %} 4763 4764 // Integers vector add 4765 instruct vaddI(vec dst, vec src) %{ 4766 predicate(UseAVX == 0); 4767 match(Set dst (AddVI dst src)); 4768 format %{ "paddd $dst,$src\t! add packedI" %} 4769 ins_encode %{ 4770 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 4776 predicate(UseAVX > 0); 4777 match(Set dst (AddVI src1 src2)); 4778 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 4779 ins_encode %{ 4780 int vlen_enc = vector_length_encoding(this); 4781 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 4787 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 4788 predicate(UseAVX > 0); 4789 match(Set dst (AddVI src (LoadVector mem))); 4790 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 4791 ins_encode %{ 4792 int vlen_enc = vector_length_encoding(this); 4793 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4794 %} 4795 ins_pipe( pipe_slow ); 4796 %} 4797 4798 // Longs vector add 4799 instruct vaddL(vec dst, vec src) %{ 4800 predicate(UseAVX == 0); 4801 match(Set dst (AddVL dst src)); 4802 format %{ "paddq $dst,$src\t! add packedL" %} 4803 ins_encode %{ 4804 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 4805 %} 4806 ins_pipe( pipe_slow ); 4807 %} 4808 4809 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 4810 predicate(UseAVX > 0); 4811 match(Set dst (AddVL src1 src2)); 4812 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 4813 ins_encode %{ 4814 int vlen_enc = vector_length_encoding(this); 4815 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4816 %} 4817 ins_pipe( pipe_slow ); 4818 %} 4819 4820 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 4821 predicate(UseAVX > 0); 4822 match(Set dst (AddVL src (LoadVector mem))); 4823 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 4824 ins_encode %{ 4825 int vlen_enc = vector_length_encoding(this); 4826 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4827 %} 4828 ins_pipe( pipe_slow ); 4829 %} 4830 4831 // Floats vector add 4832 instruct vaddF(vec dst, vec src) %{ 4833 predicate(UseAVX == 0); 4834 match(Set dst (AddVF dst src)); 4835 format %{ "addps $dst,$src\t! add packedF" %} 4836 ins_encode %{ 4837 __ addps($dst$$XMMRegister, $src$$XMMRegister); 4838 %} 4839 ins_pipe( pipe_slow ); 4840 %} 4841 4842 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 4843 predicate(UseAVX > 0); 4844 match(Set dst (AddVF src1 src2)); 4845 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 4846 ins_encode %{ 4847 int vlen_enc = vector_length_encoding(this); 4848 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4849 %} 4850 ins_pipe( pipe_slow ); 4851 %} 4852 4853 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 4854 predicate(UseAVX > 0); 4855 match(Set dst (AddVF src (LoadVector mem))); 4856 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 4857 ins_encode %{ 4858 int vlen_enc = vector_length_encoding(this); 4859 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4860 %} 4861 ins_pipe( pipe_slow ); 4862 %} 4863 4864 // Doubles vector add 4865 instruct vaddD(vec dst, vec src) %{ 4866 predicate(UseAVX == 0); 4867 match(Set dst (AddVD dst src)); 4868 format %{ "addpd $dst,$src\t! add packedD" %} 4869 ins_encode %{ 4870 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 4871 %} 4872 ins_pipe( pipe_slow ); 4873 %} 4874 4875 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 4876 predicate(UseAVX > 0); 4877 match(Set dst (AddVD src1 src2)); 4878 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 4879 ins_encode %{ 4880 int vlen_enc = vector_length_encoding(this); 4881 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4882 %} 4883 ins_pipe( pipe_slow ); 4884 %} 4885 4886 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 4887 predicate(UseAVX > 0); 4888 match(Set dst (AddVD src (LoadVector mem))); 4889 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 4890 ins_encode %{ 4891 int vlen_enc = vector_length_encoding(this); 4892 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4893 %} 4894 ins_pipe( pipe_slow ); 4895 %} 4896 4897 // --------------------------------- SUB -------------------------------------- 4898 4899 // Bytes vector sub 4900 instruct vsubB(vec dst, vec src) %{ 4901 predicate(UseAVX == 0); 4902 match(Set dst (SubVB dst src)); 4903 format %{ "psubb $dst,$src\t! sub packedB" %} 4904 ins_encode %{ 4905 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 4906 %} 4907 ins_pipe( pipe_slow ); 4908 %} 4909 4910 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 4911 predicate(UseAVX > 0); 4912 match(Set dst (SubVB src1 src2)); 4913 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 4914 ins_encode %{ 4915 int vlen_enc = vector_length_encoding(this); 4916 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4917 %} 4918 ins_pipe( pipe_slow ); 4919 %} 4920 4921 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 4922 predicate(UseAVX > 0); 4923 match(Set dst (SubVB src (LoadVector mem))); 4924 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 4925 ins_encode %{ 4926 int vlen_enc = vector_length_encoding(this); 4927 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4928 %} 4929 ins_pipe( pipe_slow ); 4930 %} 4931 4932 // Shorts/Chars vector sub 4933 instruct vsubS(vec dst, vec src) %{ 4934 predicate(UseAVX == 0); 4935 match(Set dst (SubVS dst src)); 4936 format %{ "psubw $dst,$src\t! sub packedS" %} 4937 ins_encode %{ 4938 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 4939 %} 4940 ins_pipe( pipe_slow ); 4941 %} 4942 4943 4944 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 4945 predicate(UseAVX > 0); 4946 match(Set dst (SubVS src1 src2)); 4947 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 4948 ins_encode %{ 4949 int vlen_enc = vector_length_encoding(this); 4950 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4951 %} 4952 ins_pipe( pipe_slow ); 4953 %} 4954 4955 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 4956 predicate(UseAVX > 0); 4957 match(Set dst (SubVS src (LoadVector mem))); 4958 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 4959 ins_encode %{ 4960 int vlen_enc = vector_length_encoding(this); 4961 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4962 %} 4963 ins_pipe( pipe_slow ); 4964 %} 4965 4966 // Integers vector sub 4967 instruct vsubI(vec dst, vec src) %{ 4968 predicate(UseAVX == 0); 4969 match(Set dst (SubVI dst src)); 4970 format %{ "psubd $dst,$src\t! sub packedI" %} 4971 ins_encode %{ 4972 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 4973 %} 4974 ins_pipe( pipe_slow ); 4975 %} 4976 4977 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 4978 predicate(UseAVX > 0); 4979 match(Set dst (SubVI src1 src2)); 4980 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 4981 ins_encode %{ 4982 int vlen_enc = vector_length_encoding(this); 4983 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4984 %} 4985 ins_pipe( pipe_slow ); 4986 %} 4987 4988 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 4989 predicate(UseAVX > 0); 4990 match(Set dst (SubVI src (LoadVector mem))); 4991 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 4992 ins_encode %{ 4993 int vlen_enc = vector_length_encoding(this); 4994 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4995 %} 4996 ins_pipe( pipe_slow ); 4997 %} 4998 4999 // Longs vector sub 5000 instruct vsubL(vec dst, vec src) %{ 5001 predicate(UseAVX == 0); 5002 match(Set dst (SubVL dst src)); 5003 format %{ "psubq $dst,$src\t! sub packedL" %} 5004 ins_encode %{ 5005 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5006 %} 5007 ins_pipe( pipe_slow ); 5008 %} 5009 5010 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5011 predicate(UseAVX > 0); 5012 match(Set dst (SubVL src1 src2)); 5013 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5014 ins_encode %{ 5015 int vlen_enc = vector_length_encoding(this); 5016 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5017 %} 5018 ins_pipe( pipe_slow ); 5019 %} 5020 5021 5022 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5023 predicate(UseAVX > 0); 5024 match(Set dst (SubVL src (LoadVector mem))); 5025 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5026 ins_encode %{ 5027 int vlen_enc = vector_length_encoding(this); 5028 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5029 %} 5030 ins_pipe( pipe_slow ); 5031 %} 5032 5033 // Floats vector sub 5034 instruct vsubF(vec dst, vec src) %{ 5035 predicate(UseAVX == 0); 5036 match(Set dst (SubVF dst src)); 5037 format %{ "subps $dst,$src\t! sub packedF" %} 5038 ins_encode %{ 5039 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5040 %} 5041 ins_pipe( pipe_slow ); 5042 %} 5043 5044 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5045 predicate(UseAVX > 0); 5046 match(Set dst (SubVF src1 src2)); 5047 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5048 ins_encode %{ 5049 int vlen_enc = vector_length_encoding(this); 5050 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5051 %} 5052 ins_pipe( pipe_slow ); 5053 %} 5054 5055 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5056 predicate(UseAVX > 0); 5057 match(Set dst (SubVF src (LoadVector mem))); 5058 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5059 ins_encode %{ 5060 int vlen_enc = vector_length_encoding(this); 5061 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5062 %} 5063 ins_pipe( pipe_slow ); 5064 %} 5065 5066 // Doubles vector sub 5067 instruct vsubD(vec dst, vec src) %{ 5068 predicate(UseAVX == 0); 5069 match(Set dst (SubVD dst src)); 5070 format %{ "subpd $dst,$src\t! sub packedD" %} 5071 ins_encode %{ 5072 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5073 %} 5074 ins_pipe( pipe_slow ); 5075 %} 5076 5077 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5078 predicate(UseAVX > 0); 5079 match(Set dst (SubVD src1 src2)); 5080 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5081 ins_encode %{ 5082 int vlen_enc = vector_length_encoding(this); 5083 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5084 %} 5085 ins_pipe( pipe_slow ); 5086 %} 5087 5088 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5089 predicate(UseAVX > 0); 5090 match(Set dst (SubVD src (LoadVector mem))); 5091 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5092 ins_encode %{ 5093 int vlen_enc = vector_length_encoding(this); 5094 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5095 %} 5096 ins_pipe( pipe_slow ); 5097 %} 5098 5099 // --------------------------------- MUL -------------------------------------- 5100 5101 // Byte vector mul 5102 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5103 predicate(vector_length(n) == 4 || 5104 vector_length(n) == 8); 5105 match(Set dst (MulVB src1 src2)); 5106 effect(TEMP dst, TEMP tmp, TEMP scratch); 5107 format %{"vector_mulB $dst,$src1,$src2" %} 5108 ins_encode %{ 5109 assert(UseSSE > 3, "required"); 5110 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 5111 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 5112 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 5113 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5114 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5115 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5116 %} 5117 ins_pipe( pipe_slow ); 5118 %} 5119 5120 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5121 predicate(vector_length(n) == 16 && UseAVX <= 1); 5122 match(Set dst (MulVB src1 src2)); 5123 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5124 format %{"vector_mulB $dst,$src1,$src2" %} 5125 ins_encode %{ 5126 assert(UseSSE > 3, "required"); 5127 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 5128 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 5129 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 5130 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 5131 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 5132 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5133 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 5134 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 5135 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5136 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5137 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5138 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5139 %} 5140 ins_pipe( pipe_slow ); 5141 %} 5142 5143 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5144 predicate(vector_length(n) == 16 && UseAVX > 1); 5145 match(Set dst (MulVB src1 src2)); 5146 effect(TEMP dst, TEMP tmp, TEMP scratch); 5147 format %{"vector_mulB $dst,$src1,$src2" %} 5148 ins_encode %{ 5149 int vlen_enc = Assembler::AVX_256bit; 5150 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5151 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5152 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5153 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5154 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5155 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 5156 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 5157 %} 5158 ins_pipe( pipe_slow ); 5159 %} 5160 5161 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5162 predicate(vector_length(n) == 32); 5163 match(Set dst (MulVB src1 src2)); 5164 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5165 format %{"vector_mulB $dst,$src1,$src2" %} 5166 ins_encode %{ 5167 assert(UseAVX > 1, "required"); 5168 int vlen_enc = Assembler::AVX_256bit; 5169 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5170 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 5171 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5172 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5173 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5174 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5175 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5176 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5177 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5178 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5179 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5180 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5181 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5182 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5183 %} 5184 ins_pipe( pipe_slow ); 5185 %} 5186 5187 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5188 predicate(vector_length(n) == 64); 5189 match(Set dst (MulVB src1 src2)); 5190 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5191 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 5192 ins_encode %{ 5193 assert(UseAVX > 2, "required"); 5194 int vlen_enc = Assembler::AVX_512bit; 5195 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5196 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 5197 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5198 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5199 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5200 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5201 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5202 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5203 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5204 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5205 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5206 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5207 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5208 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 5209 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5210 %} 5211 ins_pipe( pipe_slow ); 5212 %} 5213 5214 // Shorts/Chars vector mul 5215 instruct vmulS(vec dst, vec src) %{ 5216 predicate(UseAVX == 0); 5217 match(Set dst (MulVS dst src)); 5218 format %{ "pmullw $dst,$src\t! mul packedS" %} 5219 ins_encode %{ 5220 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5221 %} 5222 ins_pipe( pipe_slow ); 5223 %} 5224 5225 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5226 predicate(UseAVX > 0); 5227 match(Set dst (MulVS src1 src2)); 5228 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5229 ins_encode %{ 5230 int vlen_enc = vector_length_encoding(this); 5231 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5232 %} 5233 ins_pipe( pipe_slow ); 5234 %} 5235 5236 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5237 predicate(UseAVX > 0); 5238 match(Set dst (MulVS src (LoadVector mem))); 5239 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5240 ins_encode %{ 5241 int vlen_enc = vector_length_encoding(this); 5242 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5243 %} 5244 ins_pipe( pipe_slow ); 5245 %} 5246 5247 // Integers vector mul 5248 instruct vmulI(vec dst, vec src) %{ 5249 predicate(UseAVX == 0); 5250 match(Set dst (MulVI dst src)); 5251 format %{ "pmulld $dst,$src\t! mul packedI" %} 5252 ins_encode %{ 5253 assert(UseSSE > 3, "required"); 5254 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5255 %} 5256 ins_pipe( pipe_slow ); 5257 %} 5258 5259 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5260 predicate(UseAVX > 0); 5261 match(Set dst (MulVI src1 src2)); 5262 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5263 ins_encode %{ 5264 int vlen_enc = vector_length_encoding(this); 5265 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5266 %} 5267 ins_pipe( pipe_slow ); 5268 %} 5269 5270 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5271 predicate(UseAVX > 0); 5272 match(Set dst (MulVI src (LoadVector mem))); 5273 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5274 ins_encode %{ 5275 int vlen_enc = vector_length_encoding(this); 5276 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5277 %} 5278 ins_pipe( pipe_slow ); 5279 %} 5280 5281 // Longs vector mul 5282 instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 5283 predicate(VM_Version::supports_avx512dq()); 5284 match(Set dst (MulVL src1 src2)); 5285 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 5286 ins_encode %{ 5287 assert(UseAVX > 2, "required"); 5288 int vlen_enc = vector_length_encoding(this); 5289 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5290 %} 5291 ins_pipe( pipe_slow ); 5292 %} 5293 5294 instruct vmulL_mem(vec dst, vec src, memory mem) %{ 5295 predicate(VM_Version::supports_avx512dq()); 5296 match(Set dst (MulVL src (LoadVector mem))); 5297 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 5298 ins_encode %{ 5299 assert(UseAVX > 2, "required"); 5300 int vlen_enc = vector_length_encoding(this); 5301 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5302 %} 5303 ins_pipe( pipe_slow ); 5304 %} 5305 5306 instruct mul2L_reg(vec dst, vec src2, vec tmp) %{ 5307 predicate(vector_length(n) == 2 && !VM_Version::supports_avx512dq()); 5308 match(Set dst (MulVL dst src2)); 5309 effect(TEMP dst, TEMP tmp); 5310 format %{ "pshufd $tmp,$src2, 177\n\t" 5311 "pmulld $tmp,$dst\n\t" 5312 "phaddd $tmp,$tmp\n\t" 5313 "pmovzxdq $tmp,$tmp\n\t" 5314 "psllq $tmp, 32\n\t" 5315 "pmuludq $dst,$src2\n\t" 5316 "paddq $dst,$tmp\n\t! mul packed2L" %} 5317 5318 ins_encode %{ 5319 assert(VM_Version::supports_sse4_1(), "required"); 5320 int vlen_enc = Assembler::AVX_128bit; 5321 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 5322 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 5323 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5324 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 5325 __ psllq($tmp$$XMMRegister, 32); 5326 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 5327 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 5328 %} 5329 ins_pipe( pipe_slow ); 5330 %} 5331 5332 instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, vec tmp, vec tmp1) %{ 5333 predicate(vector_length(n) == 4 && !VM_Version::supports_avx512dq()); 5334 match(Set dst (MulVL src1 src2)); 5335 effect(TEMP tmp1, TEMP tmp); 5336 format %{ "vpshufd $tmp,$src2\n\t" 5337 "vpmulld $tmp,$src1,$tmp\n\t" 5338 "vphaddd $tmp,$tmp,$tmp\n\t" 5339 "vpmovzxdq $tmp,$tmp\n\t" 5340 "vpsllq $tmp,$tmp\n\t" 5341 "vpmuludq $tmp1,$src1,$src2\n\t" 5342 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 5343 ins_encode %{ 5344 int vlen_enc = Assembler::AVX_256bit; 5345 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc); 5346 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5347 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 5348 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5349 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5350 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc); 5351 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5352 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5353 %} 5354 ins_pipe( pipe_slow ); 5355 %} 5356 5357 // Floats vector mul 5358 instruct vmulF(vec dst, vec src) %{ 5359 predicate(UseAVX == 0); 5360 match(Set dst (MulVF dst src)); 5361 format %{ "mulps $dst,$src\t! mul packedF" %} 5362 ins_encode %{ 5363 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5364 %} 5365 ins_pipe( pipe_slow ); 5366 %} 5367 5368 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5369 predicate(UseAVX > 0); 5370 match(Set dst (MulVF src1 src2)); 5371 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5372 ins_encode %{ 5373 int vlen_enc = vector_length_encoding(this); 5374 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5375 %} 5376 ins_pipe( pipe_slow ); 5377 %} 5378 5379 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5380 predicate(UseAVX > 0); 5381 match(Set dst (MulVF src (LoadVector mem))); 5382 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5383 ins_encode %{ 5384 int vlen_enc = vector_length_encoding(this); 5385 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5386 %} 5387 ins_pipe( pipe_slow ); 5388 %} 5389 5390 // Doubles vector mul 5391 instruct vmulD(vec dst, vec src) %{ 5392 predicate(UseAVX == 0); 5393 match(Set dst (MulVD dst src)); 5394 format %{ "mulpd $dst,$src\t! mul packedD" %} 5395 ins_encode %{ 5396 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5397 %} 5398 ins_pipe( pipe_slow ); 5399 %} 5400 5401 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5402 predicate(UseAVX > 0); 5403 match(Set dst (MulVD src1 src2)); 5404 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5405 ins_encode %{ 5406 int vlen_enc = vector_length_encoding(this); 5407 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5408 %} 5409 ins_pipe( pipe_slow ); 5410 %} 5411 5412 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5413 predicate(UseAVX > 0); 5414 match(Set dst (MulVD src (LoadVector mem))); 5415 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5416 ins_encode %{ 5417 int vlen_enc = vector_length_encoding(this); 5418 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5419 %} 5420 ins_pipe( pipe_slow ); 5421 %} 5422 5423 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5424 predicate(vector_length(n) == 8); 5425 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 5426 effect(TEMP dst, USE src1, USE src2); 5427 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 5428 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 5429 %} 5430 ins_encode %{ 5431 assert(UseAVX > 0, "required"); 5432 5433 int vlen_enc = Assembler::AVX_256bit; 5434 int cond = (Assembler::Condition)($copnd$$cmpcode); 5435 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5436 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5437 %} 5438 ins_pipe( pipe_slow ); 5439 %} 5440 5441 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5442 predicate(vector_length(n) == 4); 5443 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 5444 effect(TEMP dst, USE src1, USE src2); 5445 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 5446 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 5447 %} 5448 ins_encode %{ 5449 assert(UseAVX > 0, "required"); 5450 5451 int vlen_enc = Assembler::AVX_256bit; 5452 int cond = (Assembler::Condition)($copnd$$cmpcode); 5453 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5454 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5455 %} 5456 ins_pipe( pipe_slow ); 5457 %} 5458 5459 // --------------------------------- DIV -------------------------------------- 5460 5461 // Floats vector div 5462 instruct vdivF(vec dst, vec src) %{ 5463 predicate(UseAVX == 0); 5464 match(Set dst (DivVF dst src)); 5465 format %{ "divps $dst,$src\t! div packedF" %} 5466 ins_encode %{ 5467 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5468 %} 5469 ins_pipe( pipe_slow ); 5470 %} 5471 5472 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5473 predicate(UseAVX > 0); 5474 match(Set dst (DivVF src1 src2)); 5475 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5476 ins_encode %{ 5477 int vlen_enc = vector_length_encoding(this); 5478 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5479 %} 5480 ins_pipe( pipe_slow ); 5481 %} 5482 5483 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5484 predicate(UseAVX > 0); 5485 match(Set dst (DivVF src (LoadVector mem))); 5486 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 5487 ins_encode %{ 5488 int vlen_enc = vector_length_encoding(this); 5489 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5490 %} 5491 ins_pipe( pipe_slow ); 5492 %} 5493 5494 // Doubles vector div 5495 instruct vdivD(vec dst, vec src) %{ 5496 predicate(UseAVX == 0); 5497 match(Set dst (DivVD dst src)); 5498 format %{ "divpd $dst,$src\t! div packedD" %} 5499 ins_encode %{ 5500 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 5501 %} 5502 ins_pipe( pipe_slow ); 5503 %} 5504 5505 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 5506 predicate(UseAVX > 0); 5507 match(Set dst (DivVD src1 src2)); 5508 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 5509 ins_encode %{ 5510 int vlen_enc = vector_length_encoding(this); 5511 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5512 %} 5513 ins_pipe( pipe_slow ); 5514 %} 5515 5516 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 5517 predicate(UseAVX > 0); 5518 match(Set dst (DivVD src (LoadVector mem))); 5519 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 5520 ins_encode %{ 5521 int vlen_enc = vector_length_encoding(this); 5522 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5523 %} 5524 ins_pipe( pipe_slow ); 5525 %} 5526 5527 // ------------------------------ MinMax --------------------------------------- 5528 5529 // Byte, Short, Int vector Min/Max 5530 instruct minmax_reg_sse(vec dst, vec src) %{ 5531 predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5532 UseAVX == 0); 5533 match(Set dst (MinV dst src)); 5534 match(Set dst (MaxV dst src)); 5535 format %{ "vector_minmax $dst,$src\t! " %} 5536 ins_encode %{ 5537 assert(UseSSE >= 4, "required"); 5538 5539 int opcode = this->ideal_Opcode(); 5540 BasicType elem_bt = vector_element_basic_type(this); 5541 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 5542 %} 5543 ins_pipe( pipe_slow ); 5544 %} 5545 5546 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 5547 predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5548 UseAVX > 0); 5549 match(Set dst (MinV src1 src2)); 5550 match(Set dst (MaxV src1 src2)); 5551 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 5552 ins_encode %{ 5553 int opcode = this->ideal_Opcode(); 5554 int vlen_enc = vector_length_encoding(this); 5555 BasicType elem_bt = vector_element_basic_type(this); 5556 5557 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5558 %} 5559 ins_pipe( pipe_slow ); 5560 %} 5561 5562 // Long vector Min/Max 5563 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 5564 predicate(vector_length_in_bytes(n) == 16 && vector_element_basic_type(n) == T_LONG && 5565 UseAVX == 0); 5566 match(Set dst (MinV dst src)); 5567 match(Set dst (MaxV src dst)); 5568 effect(TEMP dst, TEMP tmp); 5569 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 5570 ins_encode %{ 5571 assert(UseSSE >= 4, "required"); 5572 5573 int opcode = this->ideal_Opcode(); 5574 BasicType elem_bt = vector_element_basic_type(this); 5575 assert(elem_bt == T_LONG, "sanity"); 5576 5577 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 5582 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 5583 predicate(vector_length_in_bytes(n) <= 32 && vector_element_basic_type(n) == T_LONG && 5584 UseAVX > 0 && !VM_Version::supports_avx512vl()); 5585 match(Set dst (MinV src1 src2)); 5586 match(Set dst (MaxV src1 src2)); 5587 effect(TEMP dst); 5588 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 5589 ins_encode %{ 5590 int vlen_enc = vector_length_encoding(this); 5591 int opcode = this->ideal_Opcode(); 5592 BasicType elem_bt = vector_element_basic_type(this); 5593 assert(elem_bt == T_LONG, "sanity"); 5594 5595 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5596 %} 5597 ins_pipe( pipe_slow ); 5598 %} 5599 5600 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 5601 predicate((vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 5602 vector_element_basic_type(n) == T_LONG); 5603 match(Set dst (MinV src1 src2)); 5604 match(Set dst (MaxV src1 src2)); 5605 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 5606 ins_encode %{ 5607 assert(UseAVX > 2, "required"); 5608 5609 int vlen_enc = vector_length_encoding(this); 5610 int opcode = this->ideal_Opcode(); 5611 BasicType elem_bt = vector_element_basic_type(this); 5612 assert(elem_bt == T_LONG, "sanity"); 5613 5614 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5615 %} 5616 ins_pipe( pipe_slow ); 5617 %} 5618 5619 // Float/Double vector Min/Max 5620 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 5621 predicate(vector_length_in_bytes(n) <= 32 && 5622 is_floating_point_type(vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 5623 UseAVX > 0); 5624 match(Set dst (MinV a b)); 5625 match(Set dst (MaxV a b)); 5626 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 5627 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 5628 ins_encode %{ 5629 assert(UseAVX > 0, "required"); 5630 5631 int opcode = this->ideal_Opcode(); 5632 int vlen_enc = vector_length_encoding(this); 5633 BasicType elem_bt = vector_element_basic_type(this); 5634 5635 __ vminmax_fp(opcode, elem_bt, 5636 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 5637 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 5638 %} 5639 ins_pipe( pipe_slow ); 5640 %} 5641 5642 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{ 5643 predicate(vector_length_in_bytes(n) == 64 && 5644 is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 5645 match(Set dst (MinV a b)); 5646 match(Set dst (MaxV a b)); 5647 effect(USE a, USE b, TEMP atmp, TEMP btmp); 5648 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 5649 ins_encode %{ 5650 assert(UseAVX > 2, "required"); 5651 5652 int opcode = this->ideal_Opcode(); 5653 int vlen_enc = vector_length_encoding(this); 5654 BasicType elem_bt = vector_element_basic_type(this); 5655 5656 KRegister ktmp = k1; 5657 __ evminmax_fp(opcode, elem_bt, 5658 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 5659 ktmp, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 5660 %} 5661 ins_pipe( pipe_slow ); 5662 %} 5663 5664 // --------------------------------- Sqrt -------------------------------------- 5665 5666 instruct vsqrtF_reg(vec dst, vec src) %{ 5667 match(Set dst (SqrtVF src)); 5668 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 5669 ins_encode %{ 5670 assert(UseAVX > 0, "required"); 5671 int vlen_enc = vector_length_encoding(this); 5672 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5673 %} 5674 ins_pipe( pipe_slow ); 5675 %} 5676 5677 instruct vsqrtF_mem(vec dst, memory mem) %{ 5678 match(Set dst (SqrtVF (LoadVector mem))); 5679 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 5680 ins_encode %{ 5681 assert(UseAVX > 0, "required"); 5682 int vlen_enc = vector_length_encoding(this); 5683 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 5684 %} 5685 ins_pipe( pipe_slow ); 5686 %} 5687 5688 // Floating point vector sqrt 5689 instruct vsqrtD_reg(vec dst, vec src) %{ 5690 match(Set dst (SqrtVD src)); 5691 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 5692 ins_encode %{ 5693 assert(UseAVX > 0, "required"); 5694 int vlen_enc = vector_length_encoding(this); 5695 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5696 %} 5697 ins_pipe( pipe_slow ); 5698 %} 5699 5700 instruct vsqrtD_mem(vec dst, memory mem) %{ 5701 match(Set dst (SqrtVD (LoadVector mem))); 5702 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 5703 ins_encode %{ 5704 assert(UseAVX > 0, "required"); 5705 int vlen_enc = vector_length_encoding(this); 5706 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 5707 %} 5708 ins_pipe( pipe_slow ); 5709 %} 5710 5711 // ------------------------------ Shift --------------------------------------- 5712 5713 // Left and right shift count vectors are the same on x86 5714 // (only lowest bits of xmm reg are used for count). 5715 instruct vshiftcnt(vec dst, rRegI cnt) %{ 5716 match(Set dst (LShiftCntV cnt)); 5717 match(Set dst (RShiftCntV cnt)); 5718 format %{ "movdl $dst,$cnt\t! load shift count" %} 5719 ins_encode %{ 5720 __ movdl($dst$$XMMRegister, $cnt$$Register); 5721 %} 5722 ins_pipe( pipe_slow ); 5723 %} 5724 5725 // Byte vector shift 5726 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5727 predicate(vector_length(n) <= 8); 5728 match(Set dst ( LShiftVB src shift)); 5729 match(Set dst ( RShiftVB src shift)); 5730 match(Set dst (URShiftVB src shift)); 5731 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 5732 format %{"vector_byte_shift $dst,$src,$shift" %} 5733 ins_encode %{ 5734 assert(UseSSE > 3, "required"); 5735 int opcode = this->ideal_Opcode(); 5736 bool sign = (opcode == Op_URShiftVB) ? false : true; 5737 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 5738 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 5739 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5740 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5741 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5742 %} 5743 ins_pipe( pipe_slow ); 5744 %} 5745 5746 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 5747 predicate(vector_length(n) == 16 && UseAVX <= 1); 5748 match(Set dst ( LShiftVB src shift)); 5749 match(Set dst ( RShiftVB src shift)); 5750 match(Set dst (URShiftVB src shift)); 5751 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 5752 format %{"vector_byte_shift $dst,$src,$shift" %} 5753 ins_encode %{ 5754 assert(UseSSE > 3, "required"); 5755 int opcode = this->ideal_Opcode(); 5756 bool sign = (opcode == Op_URShiftVB) ? false : true; 5757 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 5758 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 5759 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 5760 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 5761 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 5762 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5763 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5764 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5765 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5771 predicate(vector_length(n) == 16 && UseAVX > 1); 5772 match(Set dst ( LShiftVB src shift)); 5773 match(Set dst ( RShiftVB src shift)); 5774 match(Set dst (URShiftVB src shift)); 5775 effect(TEMP dst, TEMP tmp, TEMP scratch); 5776 format %{"vector_byte_shift $dst,$src,$shift" %} 5777 ins_encode %{ 5778 int opcode = this->ideal_Opcode(); 5779 bool sign = (opcode == Op_URShiftVB) ? false : true; 5780 int vlen_enc = Assembler::AVX_256bit; 5781 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 5782 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5783 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5784 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 5785 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 5786 %} 5787 ins_pipe( pipe_slow ); 5788 %} 5789 5790 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5791 predicate(vector_length(n) == 32); 5792 match(Set dst ( LShiftVB src shift)); 5793 match(Set dst ( RShiftVB src shift)); 5794 match(Set dst (URShiftVB src shift)); 5795 effect(TEMP dst, TEMP tmp, TEMP scratch); 5796 format %{"vector_byte_shift $dst,$src,$shift" %} 5797 ins_encode %{ 5798 assert(UseAVX > 1, "required"); 5799 int opcode = this->ideal_Opcode(); 5800 bool sign = (opcode == Op_URShiftVB) ? false : true; 5801 int vlen_enc = Assembler::AVX_256bit; 5802 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 5803 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5804 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5805 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5806 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5807 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5808 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5809 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5810 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5811 %} 5812 ins_pipe( pipe_slow ); 5813 %} 5814 5815 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 5816 predicate(vector_length(n) == 64); 5817 match(Set dst ( LShiftVB src shift)); 5818 match(Set dst (RShiftVB src shift)); 5819 match(Set dst (URShiftVB src shift)); 5820 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5821 format %{"vector_byte_shift $dst,$src,$shift" %} 5822 ins_encode %{ 5823 assert(UseAVX > 2, "required"); 5824 int opcode = this->ideal_Opcode(); 5825 bool sign = (opcode == Op_URShiftVB) ? false : true; 5826 int vlen_enc = Assembler::AVX_512bit; 5827 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 5828 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5829 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 5830 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5831 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5832 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5833 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5834 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5835 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5836 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5837 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 5838 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5839 %} 5840 ins_pipe( pipe_slow ); 5841 %} 5842 5843 // Shorts vector logical right shift produces incorrect Java result 5844 // for negative data because java code convert short value into int with 5845 // sign extension before a shift. But char vectors are fine since chars are 5846 // unsigned values. 5847 // Shorts/Chars vector left shift 5848 instruct vshiftS(vec dst, vec src, vec shift) %{ 5849 match(Set dst ( LShiftVS src shift)); 5850 match(Set dst ( RShiftVS src shift)); 5851 match(Set dst (URShiftVS src shift)); 5852 effect(TEMP dst, USE src, USE shift); 5853 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 5854 ins_encode %{ 5855 int opcode = this->ideal_Opcode(); 5856 if (UseAVX > 0) { 5857 int vlen_enc = vector_length_encoding(this); 5858 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5859 } else { 5860 int vlen = vector_length(this); 5861 if (vlen == 2) { 5862 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5863 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 5864 } else if (vlen == 4) { 5865 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5866 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 5867 } else { 5868 assert (vlen == 8, "sanity"); 5869 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5870 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 5871 } 5872 } 5873 %} 5874 ins_pipe( pipe_slow ); 5875 %} 5876 5877 // Integers vector left shift 5878 instruct vshiftI(vec dst, vec src, vec shift) %{ 5879 match(Set dst ( LShiftVI src shift)); 5880 match(Set dst ( RShiftVI src shift)); 5881 match(Set dst (URShiftVI src shift)); 5882 effect(TEMP dst, USE src, USE shift); 5883 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 5884 ins_encode %{ 5885 int opcode = this->ideal_Opcode(); 5886 if (UseAVX > 0) { 5887 int vlen_enc = vector_length_encoding(this); 5888 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5889 } else { 5890 int vlen = vector_length(this); 5891 if (vlen == 2) { 5892 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5893 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 5894 } else { 5895 assert(vlen == 4, "sanity"); 5896 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5897 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 5898 } 5899 } 5900 %} 5901 ins_pipe( pipe_slow ); 5902 %} 5903 5904 // Longs vector shift 5905 instruct vshiftL(vec dst, vec src, vec shift) %{ 5906 match(Set dst ( LShiftVL src shift)); 5907 match(Set dst (URShiftVL src shift)); 5908 effect(TEMP dst, USE src, USE shift); 5909 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 5910 ins_encode %{ 5911 int opcode = this->ideal_Opcode(); 5912 if (UseAVX > 0) { 5913 int vlen_enc = vector_length_encoding(this); 5914 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5915 } else { 5916 assert(vector_length(this) == 2, ""); 5917 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5918 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 5919 } 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 // -------------------ArithmeticRightShift ----------------------------------- 5925 // Long vector arithmetic right shift 5926 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5927 predicate(UseAVX <= 2); 5928 match(Set dst (RShiftVL src shift)); 5929 effect(TEMP dst, TEMP tmp, TEMP scratch); 5930 format %{ "vshiftq $dst,$src,$shift" %} 5931 ins_encode %{ 5932 uint vlen = vector_length(this); 5933 if (vlen == 2) { 5934 assert(UseSSE >= 2, "required"); 5935 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5936 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 5937 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 5938 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 5939 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 5940 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 5941 } else { 5942 assert(vlen == 4, "sanity"); 5943 assert(UseAVX > 1, "required"); 5944 int vlen_enc = Assembler::AVX_256bit; 5945 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5946 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 5947 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5948 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5949 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5950 } 5951 %} 5952 ins_pipe( pipe_slow ); 5953 %} 5954 5955 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 5956 predicate(UseAVX > 2); 5957 match(Set dst (RShiftVL src shift)); 5958 format %{ "vshiftq $dst,$src,$shift" %} 5959 ins_encode %{ 5960 int vlen_enc = vector_length_encoding(this); 5961 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5962 %} 5963 ins_pipe( pipe_slow ); 5964 %} 5965 5966 // ------------------- Variable Shift ----------------------------- 5967 // Byte variable shift 5968 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 5969 predicate(vector_length(n) <= 8 && vector_element_basic_type(n) == T_BYTE && 5970 !VM_Version::supports_avx512bw()); 5971 match(Set dst ( VLShiftV src shift)); 5972 match(Set dst ( VRShiftV src shift)); 5973 match(Set dst (VURShiftV src shift)); 5974 effect(TEMP dst, TEMP vtmp, TEMP scratch); 5975 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 5976 ins_encode %{ 5977 assert(UseAVX >= 2, "required"); 5978 5979 int opcode = this->ideal_Opcode(); 5980 int vlen_enc = Assembler::AVX_128bit; 5981 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 5982 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 5983 %} 5984 ins_pipe( pipe_slow ); 5985 %} 5986 5987 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 5988 predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE && 5989 !VM_Version::supports_avx512bw()); 5990 match(Set dst ( VLShiftV src shift)); 5991 match(Set dst ( VRShiftV src shift)); 5992 match(Set dst (VURShiftV src shift)); 5993 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 5994 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 5995 ins_encode %{ 5996 assert(UseAVX >= 2, "required"); 5997 5998 int opcode = this->ideal_Opcode(); 5999 int vlen_enc = Assembler::AVX_128bit; 6000 // Shift lower half and get word result in dst 6001 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6002 6003 // Shift upper half and get word result in vtmp1 6004 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6005 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6006 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6007 6008 // Merge and down convert the two word results to byte in dst 6009 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6010 %} 6011 ins_pipe( pipe_slow ); 6012 %} 6013 6014 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ 6015 predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE && 6016 !VM_Version::supports_avx512bw()); 6017 match(Set dst ( VLShiftV src shift)); 6018 match(Set dst ( VRShiftV src shift)); 6019 match(Set dst (VURShiftV src shift)); 6020 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch); 6021 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %} 6022 ins_encode %{ 6023 assert(UseAVX >= 2, "required"); 6024 6025 int opcode = this->ideal_Opcode(); 6026 int vlen_enc = Assembler::AVX_128bit; 6027 // Process lower 128 bits and get result in dst 6028 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6029 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6030 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6031 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6032 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6033 6034 // Process higher 128 bits and get result in vtmp3 6035 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6036 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6037 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register); 6038 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6039 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6040 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6041 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6042 6043 // Merge the two results in dst 6044 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6045 %} 6046 ins_pipe( pipe_slow ); 6047 %} 6048 6049 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6050 predicate(vector_length(n) <= 32 && vector_element_basic_type(n) == T_BYTE && 6051 VM_Version::supports_avx512bw()); 6052 match(Set dst ( VLShiftV src shift)); 6053 match(Set dst ( VRShiftV src shift)); 6054 match(Set dst (VURShiftV src shift)); 6055 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6056 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6057 ins_encode %{ 6058 assert(UseAVX > 2, "required"); 6059 6060 int opcode = this->ideal_Opcode(); 6061 int vlen_enc = vector_length_encoding(this); 6062 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6063 %} 6064 ins_pipe( pipe_slow ); 6065 %} 6066 6067 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6068 predicate(vector_length(n) == 64 && vector_element_basic_type(n) == T_BYTE && 6069 VM_Version::supports_avx512bw()); 6070 match(Set dst ( VLShiftV src shift)); 6071 match(Set dst ( VRShiftV src shift)); 6072 match(Set dst (VURShiftV src shift)); 6073 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6074 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6075 ins_encode %{ 6076 assert(UseAVX > 2, "required"); 6077 6078 int opcode = this->ideal_Opcode(); 6079 int vlen_enc = Assembler::AVX_256bit; 6080 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6081 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6082 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6083 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6084 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6085 %} 6086 ins_pipe( pipe_slow ); 6087 %} 6088 6089 // Short variable shift 6090 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6091 predicate(vector_length(n) <= 8 && vector_element_basic_type(n) == T_SHORT && 6092 !VM_Version::supports_avx512bw()); 6093 match(Set dst (VLShiftV src shift)); 6094 match(Set dst (VRShiftV src shift)); 6095 match(Set dst (VURShiftV src shift)); 6096 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6097 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6098 ins_encode %{ 6099 assert(UseAVX >= 2, "required"); 6100 6101 int opcode = this->ideal_Opcode(); 6102 bool sign = (opcode == Op_VURShiftV) ? false : true; 6103 int vlen_enc = Assembler::AVX_256bit; 6104 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6105 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6106 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6107 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6108 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6109 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6115 predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT && 6116 !VM_Version::supports_avx512bw()); 6117 match(Set dst (VLShiftV src shift)); 6118 match(Set dst (VRShiftV src shift)); 6119 match(Set dst (VURShiftV src shift)); 6120 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6121 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6122 ins_encode %{ 6123 assert(UseAVX >= 2, "required"); 6124 6125 int opcode = this->ideal_Opcode(); 6126 bool sign = (opcode == Op_VURShiftV) ? false : true; 6127 int vlen_enc = Assembler::AVX_256bit; 6128 // Shift lower half, with result in vtmp2 usign vtmp1 as TEMP 6129 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6130 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6131 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6132 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6133 6134 // Shift upper half, with result in dst usign vtmp1 as TEMP 6135 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6136 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6137 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6138 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6139 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6140 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6141 6142 // Merge lower and upper half result into dst 6143 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6144 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6145 %} 6146 ins_pipe( pipe_slow ); 6147 %} 6148 6149 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6150 predicate(vector_element_basic_type(n) == T_SHORT && 6151 VM_Version::supports_avx512bw()); 6152 match(Set dst (VLShiftV src shift)); 6153 match(Set dst (VRShiftV src shift)); 6154 match(Set dst (VURShiftV src shift)); 6155 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6156 ins_encode %{ 6157 assert(UseAVX > 2, "required"); 6158 6159 int opcode = this->ideal_Opcode(); 6160 int vlen_enc = vector_length_encoding(this); 6161 if (!VM_Version::supports_avx512vl()) { 6162 vlen_enc = Assembler::AVX_512bit; 6163 } 6164 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6165 %} 6166 ins_pipe( pipe_slow ); 6167 %} 6168 6169 //Integer variable shift 6170 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6171 predicate(vector_element_basic_type(n) == T_INT); 6172 match(Set dst ( VLShiftV src shift)); 6173 match(Set dst ( VRShiftV src shift)); 6174 match(Set dst (VURShiftV src shift)); 6175 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6176 ins_encode %{ 6177 assert(UseAVX >= 2, "required"); 6178 6179 int opcode = this->ideal_Opcode(); 6180 int vlen_enc = vector_length_encoding(this); 6181 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 //Long variable shift 6187 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6188 predicate(vector_element_basic_type(n) == T_LONG); 6189 match(Set dst ( VLShiftV src shift)); 6190 match(Set dst (VURShiftV src shift)); 6191 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6192 ins_encode %{ 6193 assert(UseAVX >= 2, "required"); 6194 6195 int opcode = this->ideal_Opcode(); 6196 int vlen_enc = vector_length_encoding(this); 6197 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6198 %} 6199 ins_pipe( pipe_slow ); 6200 %} 6201 6202 //Long variable right shift arithmetic 6203 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6204 predicate(vector_length(n) <= 4 && vector_element_basic_type(n) == T_LONG && 6205 UseAVX == 2); 6206 match(Set dst (VRShiftV src shift)); 6207 effect(TEMP dst, TEMP vtmp); 6208 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6209 ins_encode %{ 6210 int opcode = this->ideal_Opcode(); 6211 int vlen_enc = vector_length_encoding(this); 6212 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6213 $vtmp$$XMMRegister); 6214 %} 6215 ins_pipe( pipe_slow ); 6216 %} 6217 6218 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6219 predicate(vector_element_basic_type(n) == T_LONG && 6220 UseAVX > 2); 6221 match(Set dst (VRShiftV src shift)); 6222 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6223 ins_encode %{ 6224 int opcode = this->ideal_Opcode(); 6225 int vlen_enc = vector_length_encoding(this); 6226 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6227 %} 6228 ins_pipe( pipe_slow ); 6229 %} 6230 6231 // --------------------------------- AND -------------------------------------- 6232 6233 instruct vand(vec dst, vec src) %{ 6234 predicate(UseAVX == 0); 6235 match(Set dst (AndV dst src)); 6236 format %{ "pand $dst,$src\t! and vectors" %} 6237 ins_encode %{ 6238 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6239 %} 6240 ins_pipe( pipe_slow ); 6241 %} 6242 6243 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6244 predicate(UseAVX > 0); 6245 match(Set dst (AndV src1 src2)); 6246 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6247 ins_encode %{ 6248 int vlen_enc = vector_length_encoding(this); 6249 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6250 %} 6251 ins_pipe( pipe_slow ); 6252 %} 6253 6254 instruct vand_mem(vec dst, vec src, memory mem) %{ 6255 predicate(UseAVX > 0); 6256 match(Set dst (AndV src (LoadVector mem))); 6257 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6258 ins_encode %{ 6259 int vlen_enc = vector_length_encoding(this); 6260 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6261 %} 6262 ins_pipe( pipe_slow ); 6263 %} 6264 6265 // --------------------------------- OR --------------------------------------- 6266 6267 instruct vor(vec dst, vec src) %{ 6268 predicate(UseAVX == 0); 6269 match(Set dst (OrV dst src)); 6270 format %{ "por $dst,$src\t! or vectors" %} 6271 ins_encode %{ 6272 __ por($dst$$XMMRegister, $src$$XMMRegister); 6273 %} 6274 ins_pipe( pipe_slow ); 6275 %} 6276 6277 instruct vor_reg(vec dst, vec src1, vec src2) %{ 6278 predicate(UseAVX > 0); 6279 match(Set dst (OrV src1 src2)); 6280 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6281 ins_encode %{ 6282 int vlen_enc = vector_length_encoding(this); 6283 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6284 %} 6285 ins_pipe( pipe_slow ); 6286 %} 6287 6288 instruct vor_mem(vec dst, vec src, memory mem) %{ 6289 predicate(UseAVX > 0); 6290 match(Set dst (OrV src (LoadVector mem))); 6291 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 6292 ins_encode %{ 6293 int vlen_enc = vector_length_encoding(this); 6294 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6295 %} 6296 ins_pipe( pipe_slow ); 6297 %} 6298 6299 // --------------------------------- XOR -------------------------------------- 6300 6301 instruct vxor(vec dst, vec src) %{ 6302 predicate(UseAVX == 0); 6303 match(Set dst (XorV dst src)); 6304 format %{ "pxor $dst,$src\t! xor vectors" %} 6305 ins_encode %{ 6306 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 6307 %} 6308 ins_pipe( pipe_slow ); 6309 %} 6310 6311 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 6312 predicate(UseAVX > 0); 6313 match(Set dst (XorV src1 src2)); 6314 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 6315 ins_encode %{ 6316 int vlen_enc = vector_length_encoding(this); 6317 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6318 %} 6319 ins_pipe( pipe_slow ); 6320 %} 6321 6322 instruct vxor_mem(vec dst, vec src, memory mem) %{ 6323 predicate(UseAVX > 0); 6324 match(Set dst (XorV src (LoadVector mem))); 6325 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 6326 ins_encode %{ 6327 int vlen_enc = vector_length_encoding(this); 6328 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6329 %} 6330 ins_pipe( pipe_slow ); 6331 %} 6332 6333 // --------------------------------- VectorCast -------------------------------------- 6334 6335 instruct vcastBtoX(vec dst, vec src) %{ 6336 match(Set dst (VectorCastB2X src)); 6337 format %{ "vector_cast_b2x $dst,$src\t!" %} 6338 ins_encode %{ 6339 assert(UseAVX > 0, "required"); 6340 6341 BasicType to_elem_bt = vector_element_basic_type(this); 6342 int vlen_enc = vector_length_encoding(this); 6343 switch (to_elem_bt) { 6344 case T_SHORT: 6345 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6346 break; 6347 case T_INT: 6348 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6349 break; 6350 case T_FLOAT: 6351 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6352 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6353 break; 6354 case T_LONG: 6355 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6356 break; 6357 case T_DOUBLE: 6358 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6359 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6360 break; 6361 6362 default: assert(false, "%s", type2name(to_elem_bt)); 6363 } 6364 %} 6365 ins_pipe( pipe_slow ); 6366 %} 6367 6368 instruct castStoX(vec dst, vec src, rRegP scratch) %{ 6369 predicate(UseAVX <= 2 && 6370 vector_length(n->in(1)) <= 8 && // src 6371 vector_element_basic_type(n) == T_BYTE); 6372 effect(TEMP scratch); 6373 match(Set dst (VectorCastS2X src)); 6374 format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} 6375 ins_encode %{ 6376 assert(UseAVX > 0, "required"); 6377 6378 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register); 6379 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6380 %} 6381 ins_pipe( pipe_slow ); 6382 %} 6383 6384 instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6385 predicate(UseAVX <= 2 && 6386 vector_length(n->in(1)) == 16 && // src 6387 vector_element_basic_type(n) == T_BYTE); 6388 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6389 match(Set dst (VectorCastS2X src)); 6390 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} 6391 ins_encode %{ 6392 assert(UseAVX > 0, "required"); 6393 6394 int vlen_enc = vector_length_encoding(vector_length_in_bytes(this, $src)); 6395 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6396 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 6397 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6398 %} 6399 ins_pipe( pipe_slow ); 6400 %} 6401 6402 instruct vcastStoX_evex(vec dst, vec src) %{ 6403 predicate(UseAVX > 2 || 6404 (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src 6405 match(Set dst (VectorCastS2X src)); 6406 format %{ "vector_cast_s2x $dst,$src\t!" %} 6407 ins_encode %{ 6408 BasicType to_elem_bt = vector_element_basic_type(this); 6409 int src_vlen_enc = vector_length_encoding(this, $src); 6410 int vlen_enc = vector_length_encoding(this); 6411 switch (to_elem_bt) { 6412 case T_BYTE: 6413 if (!VM_Version::supports_avx512vl()) { 6414 vlen_enc = Assembler::AVX_512bit; 6415 } 6416 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6417 break; 6418 case T_INT: 6419 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6420 break; 6421 case T_FLOAT: 6422 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6423 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6424 break; 6425 case T_LONG: 6426 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6427 break; 6428 case T_DOUBLE: 6429 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6430 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6431 break; 6432 default: 6433 ShouldNotReachHere(); 6434 } 6435 %} 6436 ins_pipe( pipe_slow ); 6437 %} 6438 6439 instruct castItoX(vec dst, vec src, rRegP scratch) %{ 6440 predicate(UseAVX <= 2 && 6441 (vector_length_in_bytes(n->in(1)) <= 16) && 6442 (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src 6443 match(Set dst (VectorCastI2X src)); 6444 format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} 6445 effect(TEMP scratch); 6446 ins_encode %{ 6447 assert(UseAVX > 0, "required"); 6448 6449 BasicType to_elem_bt = vector_element_basic_type(this); 6450 int vlen_enc = vector_length_encoding(this, $src); 6451 6452 if (to_elem_bt == T_BYTE) { 6453 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 6454 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6455 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6456 } else { 6457 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 6458 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6459 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6460 } 6461 %} 6462 ins_pipe( pipe_slow ); 6463 %} 6464 6465 instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6466 predicate(UseAVX <= 2 && 6467 (vector_length_in_bytes(n->in(1)) == 32) && 6468 (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src 6469 match(Set dst (VectorCastI2X src)); 6470 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} 6471 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6472 ins_encode %{ 6473 assert(UseAVX > 0, "required"); 6474 6475 BasicType to_elem_bt = vector_element_basic_type(this); 6476 int vlen_enc = vector_length_encoding(this, $src); 6477 6478 if (to_elem_bt == T_BYTE) { 6479 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 6480 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 6481 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6482 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6483 } else { 6484 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 6485 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6486 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 6487 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6488 } 6489 %} 6490 ins_pipe( pipe_slow ); 6491 %} 6492 6493 instruct vcastItoX_evex(vec dst, vec src) %{ 6494 predicate(UseAVX > 2 || 6495 (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src 6496 match(Set dst (VectorCastI2X src)); 6497 format %{ "vector_cast_i2x $dst,$src\t!" %} 6498 ins_encode %{ 6499 assert(UseAVX > 0, "required"); 6500 6501 BasicType dst_elem_bt = vector_element_basic_type(this); 6502 int src_vlen_enc = vector_length_encoding(this, $src); 6503 int dst_vlen_enc = vector_length_encoding(this); 6504 switch (dst_elem_bt) { 6505 case T_BYTE: 6506 if (!VM_Version::supports_avx512vl()) { 6507 src_vlen_enc = Assembler::AVX_512bit; 6508 } 6509 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6510 break; 6511 case T_SHORT: 6512 if (!VM_Version::supports_avx512vl()) { 6513 src_vlen_enc = Assembler::AVX_512bit; 6514 } 6515 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6516 break; 6517 case T_FLOAT: 6518 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 6519 break; 6520 case T_LONG: 6521 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 6522 break; 6523 case T_DOUBLE: 6524 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 6525 break; 6526 default: 6527 ShouldNotReachHere(); 6528 } 6529 %} 6530 ins_pipe( pipe_slow ); 6531 %} 6532 6533 instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ 6534 predicate((vector_element_basic_type(n) == T_BYTE || vector_element_basic_type(n) == T_SHORT) && 6535 UseAVX <= 2); 6536 match(Set dst (VectorCastL2X src)); 6537 effect(TEMP scratch); 6538 format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %} 6539 ins_encode %{ 6540 assert(UseAVX > 0, "required"); 6541 6542 int vlen = vector_length_in_bytes(this, $src); 6543 BasicType to_elem_bt = vector_element_basic_type(this); 6544 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 6545 : ExternalAddress(vector_int_to_short_mask()); 6546 if (vlen <= 16) { 6547 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 6548 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 6549 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6550 } else { 6551 assert(vlen <= 32, "required"); 6552 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 6553 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 6554 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 6555 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6556 } 6557 if (to_elem_bt == T_BYTE) { 6558 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6559 } 6560 %} 6561 ins_pipe( pipe_slow ); 6562 %} 6563 6564 instruct vcastLtoX_evex(vec dst, vec src) %{ 6565 predicate(UseAVX > 2 || 6566 (vector_element_basic_type(n) == T_INT || 6567 vector_element_basic_type(n) == T_FLOAT || 6568 vector_element_basic_type(n) == T_DOUBLE)); 6569 match(Set dst (VectorCastL2X src)); 6570 format %{ "vector_cast_l2x $dst,$src\t!" %} 6571 ins_encode %{ 6572 BasicType to_elem_bt = vector_element_basic_type(this); 6573 int vlen = vector_length_in_bytes(this, $src); 6574 int vlen_enc = vector_length_encoding(this, $src); 6575 switch (to_elem_bt) { 6576 case T_BYTE: 6577 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 6578 vlen_enc = Assembler::AVX_512bit; 6579 } 6580 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6581 break; 6582 case T_SHORT: 6583 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 6584 vlen_enc = Assembler::AVX_512bit; 6585 } 6586 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6587 break; 6588 case T_INT: 6589 if (vlen == 8) { 6590 if ($dst$$XMMRegister != $src$$XMMRegister) { 6591 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6592 } 6593 } else if (vlen == 16) { 6594 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 6595 } else if (vlen == 32) { 6596 if (UseAVX > 2) { 6597 if (!VM_Version::supports_avx512vl()) { 6598 vlen_enc = Assembler::AVX_512bit; 6599 } 6600 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6601 } else { 6602 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 6603 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 6604 } 6605 } else { // vlen == 64 6606 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6607 } 6608 break; 6609 case T_FLOAT: 6610 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 6611 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6612 break; 6613 case T_DOUBLE: 6614 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 6615 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6616 break; 6617 6618 default: assert(false, "%s", type2name(to_elem_bt)); 6619 } 6620 %} 6621 ins_pipe( pipe_slow ); 6622 %} 6623 6624 instruct vcastFtoD_reg(vec dst, vec src) %{ 6625 predicate(vector_element_basic_type(n) == T_DOUBLE); 6626 match(Set dst (VectorCastF2X src)); 6627 format %{ "vector_cast_f2x $dst,$src\t!" %} 6628 ins_encode %{ 6629 int vlen_enc = vector_length_encoding(this); 6630 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6631 %} 6632 ins_pipe( pipe_slow ); 6633 %} 6634 6635 instruct vcastDtoF_reg(vec dst, vec src) %{ 6636 predicate(vector_element_basic_type(n) == T_FLOAT); 6637 match(Set dst (VectorCastD2X src)); 6638 format %{ "vector_cast_d2x $dst,$src\t!" %} 6639 ins_encode %{ 6640 int vlen_enc = vector_length_encoding(this, $src); 6641 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6642 %} 6643 ins_pipe( pipe_slow ); 6644 %} 6645 6646 // --------------------------------- VectorMaskCmp -------------------------------------- 6647 6648 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 6649 predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 6650 vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 6651 is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 6652 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6653 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 6654 ins_encode %{ 6655 int vlen_enc = vector_length_encoding(this, $src1); 6656 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 6657 if (vector_element_basic_type(this, $src1) == T_FLOAT) 6658 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6659 else 6660 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6661 %} 6662 ins_pipe( pipe_slow ); 6663 %} 6664 6665 instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{ 6666 predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 6667 is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 6668 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6669 effect(TEMP scratch); 6670 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6671 ins_encode %{ 6672 int vlen_enc = Assembler::AVX_512bit; 6673 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 6674 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 6675 KRegister mask = k0; // The comparison itself is not being masked. 6676 if (vector_element_basic_type(this, $src1) == T_FLOAT) { 6677 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6678 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 6679 } else { 6680 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6681 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 6682 } 6683 %} 6684 ins_pipe( pipe_slow ); 6685 %} 6686 6687 instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{ 6688 predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 6689 vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 6690 is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 6691 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6692 effect(TEMP scratch); 6693 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6694 ins_encode %{ 6695 int vlen_enc = vector_length_encoding(this, $src1); 6696 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 6697 Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1)); 6698 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register); 6699 %} 6700 ins_pipe( pipe_slow ); 6701 %} 6702 6703 instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{ 6704 predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 6705 is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 6706 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6707 effect(TEMP scratch); 6708 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6709 ins_encode %{ 6710 assert(UseAVX > 2, "required"); 6711 6712 int vlen_enc = Assembler::AVX_512bit; 6713 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 6714 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 6715 KRegister mask = k0; // The comparison itself is not being masked. 6716 bool merge = false; 6717 BasicType src1_elem_bt = vector_element_basic_type(this, $src1); 6718 6719 switch (src1_elem_bt) { 6720 case T_BYTE: { 6721 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6722 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 6723 break; 6724 } 6725 case T_SHORT: { 6726 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6727 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 6728 break; 6729 } 6730 case T_INT: { 6731 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6732 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 6733 break; 6734 } 6735 case T_LONG: { 6736 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6737 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 6738 break; 6739 } 6740 6741 default: assert(false, "%s", type2name(src1_elem_bt)); 6742 } 6743 %} 6744 ins_pipe( pipe_slow ); 6745 %} 6746 6747 // Extract 6748 6749 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 6750 predicate(vector_length_in_bytes(n->in(1)) <= 16); // src 6751 match(Set dst (ExtractI src idx)); 6752 match(Set dst (ExtractS src idx)); 6753 #ifdef _LP64 6754 match(Set dst (ExtractB src idx)); 6755 #endif 6756 ins_encode %{ 6757 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6758 6759 BasicType elem_bt = vector_element_basic_type(this, $src); 6760 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 6761 %} 6762 ins_pipe( pipe_slow ); 6763 %} 6764 6765 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 6766 predicate(vector_length_in_bytes(n->in(1)) == 32 || // src 6767 vector_length_in_bytes(n->in(1)) == 64); // src 6768 match(Set dst (ExtractI src idx)); 6769 match(Set dst (ExtractS src idx)); 6770 #ifdef _LP64 6771 match(Set dst (ExtractB src idx)); 6772 #endif 6773 effect(TEMP vtmp); 6774 ins_encode %{ 6775 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6776 6777 BasicType elem_bt = vector_element_basic_type(this, $src); 6778 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 6779 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 6780 %} 6781 ins_pipe( pipe_slow ); 6782 %} 6783 6784 #ifdef _LP64 6785 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 6786 predicate(vector_length(n->in(1)) <= 2); // src 6787 match(Set dst (ExtractL src idx)); 6788 ins_encode %{ 6789 assert(UseSSE >= 4, "required"); 6790 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6791 6792 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 6793 %} 6794 ins_pipe( pipe_slow ); 6795 %} 6796 6797 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 6798 predicate(vector_length(n->in(1)) == 4 || // src 6799 vector_length(n->in(1)) == 8); // src 6800 match(Set dst (ExtractL src idx)); 6801 effect(TEMP vtmp); 6802 ins_encode %{ 6803 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6804 6805 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 6806 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 6807 %} 6808 ins_pipe( pipe_slow ); 6809 %} 6810 #endif 6811 6812 instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 6813 predicate(vector_length(n->in(1)) <= 4); 6814 match(Set dst (ExtractF src idx)); 6815 effect(TEMP dst, TEMP tmp, TEMP vtmp); 6816 ins_encode %{ 6817 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6818 6819 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); 6820 %} 6821 ins_pipe( pipe_slow ); 6822 %} 6823 6824 instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 6825 predicate(vector_length(n->in(1)/*src*/) == 8 || 6826 vector_length(n->in(1)/*src*/) == 16); 6827 match(Set dst (ExtractF src idx)); 6828 effect(TEMP tmp, TEMP vtmp); 6829 ins_encode %{ 6830 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6831 6832 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 6833 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); 6834 %} 6835 ins_pipe( pipe_slow ); 6836 %} 6837 6838 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 6839 predicate(vector_length(n->in(1)) == 2); // src 6840 match(Set dst (ExtractD src idx)); 6841 ins_encode %{ 6842 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6843 6844 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 6845 %} 6846 ins_pipe( pipe_slow ); 6847 %} 6848 6849 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 6850 predicate(vector_length(n->in(1)) == 4 || // src 6851 vector_length(n->in(1)) == 8); // src 6852 match(Set dst (ExtractD src idx)); 6853 effect(TEMP vtmp); 6854 ins_encode %{ 6855 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6856 6857 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 6858 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 6859 %} 6860 ins_pipe( pipe_slow ); 6861 %} 6862 6863 // --------------------------------- Vector Blend -------------------------------------- 6864 6865 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 6866 predicate(UseAVX == 0); 6867 match(Set dst (VectorBlend (Binary dst src) mask)); 6868 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 6869 effect(TEMP tmp); 6870 ins_encode %{ 6871 assert(UseSSE >= 4, "required"); 6872 6873 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 6874 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 6875 } 6876 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 6877 %} 6878 ins_pipe( pipe_slow ); 6879 %} 6880 6881 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 6882 predicate(UseAVX > 0 && 6883 vector_length_in_bytes(n) <= 32 && 6884 is_integral_type(vector_element_basic_type(n))); 6885 match(Set dst (VectorBlend (Binary src1 src2) mask)); 6886 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 6887 ins_encode %{ 6888 int vlen_enc = vector_length_encoding(this); 6889 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 6890 %} 6891 ins_pipe( pipe_slow ); 6892 %} 6893 6894 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 6895 predicate(UseAVX > 0 && 6896 vector_length_in_bytes(n) <= 32 && 6897 !is_integral_type(vector_element_basic_type(n))); 6898 match(Set dst (VectorBlend (Binary src1 src2) mask)); 6899 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 6900 ins_encode %{ 6901 int vlen_enc = vector_length_encoding(this); 6902 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 6903 %} 6904 ins_pipe( pipe_slow ); 6905 %} 6906 6907 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch) %{ 6908 predicate(vector_length_in_bytes(n) == 64); 6909 match(Set dst (VectorBlend (Binary src1 src2) mask)); 6910 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 6911 effect(TEMP scratch); 6912 ins_encode %{ 6913 int vlen_enc = Assembler::AVX_512bit; 6914 BasicType elem_bt = vector_element_basic_type(this); 6915 KRegister ktmp = k2; 6916 __ evpcmp(elem_bt, ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); 6917 __ evpblend(elem_bt, $dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6918 %} 6919 ins_pipe( pipe_slow ); 6920 %} 6921 6922 // --------------------------------- ABS -------------------------------------- 6923 // a = |a| 6924 instruct vabsB_reg(vec dst, vec src) %{ 6925 match(Set dst (AbsVB src)); 6926 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 6927 ins_encode %{ 6928 uint vlen = vector_length(this); 6929 if (vlen <= 16) { 6930 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 6931 } else { 6932 int vlen_enc = vector_length_encoding(this); 6933 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6934 } 6935 %} 6936 ins_pipe( pipe_slow ); 6937 %} 6938 6939 instruct vabsS_reg(vec dst, vec src) %{ 6940 match(Set dst (AbsVS src)); 6941 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 6942 ins_encode %{ 6943 uint vlen = vector_length(this); 6944 if (vlen <= 8) { 6945 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 6946 } else { 6947 int vlen_enc = vector_length_encoding(this); 6948 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6949 } 6950 %} 6951 ins_pipe( pipe_slow ); 6952 %} 6953 6954 instruct vabsI_reg(vec dst, vec src) %{ 6955 match(Set dst (AbsVI src)); 6956 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 6957 ins_encode %{ 6958 uint vlen = vector_length(this); 6959 if (vlen <= 4) { 6960 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 6961 } else { 6962 int vlen_enc = vector_length_encoding(this); 6963 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6964 } 6965 %} 6966 ins_pipe( pipe_slow ); 6967 %} 6968 6969 instruct vabsL_reg(vec dst, vec src) %{ 6970 match(Set dst (AbsVL src)); 6971 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 6972 ins_encode %{ 6973 assert(UseAVX > 2, "required"); 6974 int vlen_enc = vector_length_encoding(this); 6975 if (!VM_Version::supports_avx512vl()) { 6976 vlen_enc = Assembler::AVX_512bit; 6977 } 6978 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6979 %} 6980 ins_pipe( pipe_slow ); 6981 %} 6982 6983 // --------------------------------- ABSNEG -------------------------------------- 6984 6985 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 6986 predicate(vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 6987 match(Set dst (AbsVF src)); 6988 match(Set dst (NegVF src)); 6989 effect(TEMP scratch); 6990 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 6991 ins_cost(150); 6992 ins_encode %{ 6993 int opcode = this->ideal_Opcode(); 6994 int vlen = vector_length(this); 6995 if (vlen == 2) { 6996 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 6997 } else { 6998 assert(vlen == 8 || vlen == 16, "required"); 6999 int vlen_enc = vector_length_encoding(this); 7000 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7001 } 7002 %} 7003 ins_pipe( pipe_slow ); 7004 %} 7005 7006 instruct vabsneg4F(vec dst, rRegI scratch) %{ 7007 predicate(vector_length(n) == 4); 7008 match(Set dst (AbsVF dst)); 7009 match(Set dst (NegVF dst)); 7010 effect(TEMP scratch); 7011 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7012 ins_cost(150); 7013 ins_encode %{ 7014 int opcode = this->ideal_Opcode(); 7015 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 7016 %} 7017 ins_pipe( pipe_slow ); 7018 %} 7019 7020 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 7021 match(Set dst (AbsVD src)); 7022 match(Set dst (NegVD src)); 7023 effect(TEMP scratch); 7024 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7025 ins_encode %{ 7026 int opcode = this->ideal_Opcode(); 7027 uint vlen = vector_length(this); 7028 if (vlen == 2) { 7029 assert(UseSSE >= 2, "required"); 7030 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7031 } else { 7032 int vlen_enc = vector_length_encoding(this); 7033 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7034 } 7035 %} 7036 ins_pipe( pipe_slow ); 7037 %} 7038 7039 //------------------------------------- NOT -------------------------------------------- 7040 7041 instruct vnotB(vec dst, vec src) %{ 7042 predicate(UseAVX == 0); 7043 match(Set dst (NotV src)); 7044 effect(TEMP dst); 7045 format %{ "vector_not $dst,$src\t!" %} 7046 ins_encode %{ 7047 int vlen = vector_length_in_bytes(this); 7048 switch(vlen) { 7049 default: 7050 assert(0, "Incorrect vector length"); 7051 break; 7052 case 4: { 7053 __ movdl($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 7054 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7055 } break; 7056 case 8: { 7057 __ movq($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 7058 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7059 } break; 7060 case 16: { 7061 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 7062 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7063 } break; 7064 } 7065 %} 7066 ins_pipe( pipe_slow ); 7067 %} 7068 7069 instruct vnotB_reg(vec dst, vec src, rRegP scratch) %{ 7070 predicate(UseAVX > 0); 7071 match(Set dst (NotV src)); 7072 effect(TEMP scratch); 7073 format %{ "vector_not $dst,$src\t! using $scratch as rRegP" %} 7074 ins_encode %{ 7075 int vlen_enc = vector_length_encoding(this); 7076 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vlen_enc, $scratch$$Register); 7077 %} 7078 ins_pipe( pipe_slow ); 7079 %} 7080 7081 //------------------------------------- VectorTest -------------------------------------------- 7082 7083 #ifdef _LP64 7084 instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7085 predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7086 match(Set dst (VectorTest src1 src2 )); 7087 effect(KILL cr); 7088 format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} 7089 ins_encode %{ 7090 int vlen = vector_length_in_bytes(this, $src1); 7091 int vlen_enc = vector_length_encoding(vlen); 7092 if (vlen <= 32) { 7093 if (UseAVX == 0) { 7094 assert(vlen <= 16, "required"); 7095 __ ptest($src1$$XMMRegister, $src2$$XMMRegister); 7096 } else { 7097 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7098 } 7099 } else { 7100 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 7101 __ evpcmpeqb(ktmp, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7102 __ kortestql(ktmp, ktmp); 7103 } 7104 __ setb(Assembler::carrySet, $dst$$Register); 7105 __ movzbl($dst$$Register, $dst$$Register); 7106 %} 7107 ins_pipe( pipe_slow ); 7108 %} 7109 7110 instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7111 predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7112 match(Set dst (VectorTest src1 src2 )); 7113 effect(KILL cr); 7114 format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} 7115 ins_encode %{ 7116 int vlen = vector_length_in_bytes(this, $src1); 7117 int vlen_enc = vector_length_encoding(vlen); 7118 if (vlen <= 32) { 7119 if (UseAVX == 0) { 7120 assert(vlen <= 16, "required"); 7121 __ ptest($src1$$XMMRegister, $src2$$XMMRegister); 7122 } else { 7123 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7124 } 7125 } else { 7126 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 7127 __ evpcmpeqb(ktmp, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7128 __ ktestql(ktmp, ktmp); 7129 } 7130 __ setb(Assembler::notZero, $dst$$Register); 7131 __ movzbl($dst$$Register, $dst$$Register); 7132 %} 7133 ins_pipe( pipe_slow ); 7134 %} 7135 #endif 7136 7137 //------------------------------------- LoadMask -------------------------------------------- 7138 7139 instruct loadMask(vec dst, vec src) %{ 7140 match(Set dst (VectorLoadMask src)); 7141 effect(TEMP dst); 7142 format %{ "vector_loadmask_byte $dst,$src\n\t" %} 7143 ins_encode %{ 7144 int vlen_in_bytes = vector_length_in_bytes(this); 7145 BasicType elem_bt = vector_element_basic_type(this); 7146 7147 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt); 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 //------------------------------------- StoreMask -------------------------------------------- 7153 7154 instruct storeMask1B(vec dst, vec src, immI_1 size) %{ 7155 predicate(vector_length(n) < 64 || VM_Version::supports_avx512vlbw()); 7156 match(Set dst (VectorStoreMask src size)); 7157 format %{ "vector_store_mask $dst,$src\t!" %} 7158 ins_encode %{ 7159 assert(UseSSE >= 3, "required"); 7160 if (vector_length_in_bytes(this) <= 16) { 7161 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7162 } else { 7163 assert(UseAVX >= 2, "required"); 7164 int src_vlen_enc = vector_length_encoding(this, $src); 7165 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7166 } 7167 %} 7168 ins_pipe( pipe_slow ); 7169 %} 7170 7171 instruct storeMask2B(vec dst, vec src, immI_2 size) %{ 7172 predicate(vector_length(n) <= 8); 7173 match(Set dst (VectorStoreMask src size)); 7174 format %{ "vector_store_mask $dst,$src\n\t" %} 7175 ins_encode %{ 7176 assert(UseSSE >= 3, "required"); 7177 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7178 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7179 %} 7180 ins_pipe( pipe_slow ); 7181 %} 7182 7183 instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{ 7184 predicate(vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 7185 match(Set dst (VectorStoreMask src size)); 7186 effect(TEMP dst); 7187 format %{ "vector_store_mask $dst,$src\t!" %} 7188 ins_encode %{ 7189 int vlen_enc = Assembler::AVX_128bit; 7190 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 7191 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister,vlen_enc); 7192 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7193 %} 7194 ins_pipe( pipe_slow ); 7195 %} 7196 7197 instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{ 7198 predicate(VM_Version::supports_avx512bw()); 7199 match(Set dst (VectorStoreMask src size)); 7200 format %{ "vector_store_mask $dst,$src\t!" %} 7201 ins_encode %{ 7202 int src_vlen_enc = vector_length_encoding(this, $src); 7203 int dst_vlen_enc = vector_length_encoding(this); 7204 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7205 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7206 %} 7207 ins_pipe( pipe_slow ); 7208 %} 7209 7210 instruct storeMask4B(vec dst, vec src, immI_4 size) %{ 7211 predicate (vector_length(n) <= 4 && UseAVX <= 2); 7212 match(Set dst (VectorStoreMask src size)); 7213 format %{ "vector_store_mask $dst,$src\t!" %} 7214 ins_encode %{ 7215 assert(UseSSE >= 3, "required"); 7216 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7217 __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); 7218 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7219 %} 7220 ins_pipe( pipe_slow ); 7221 %} 7222 7223 instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{ 7224 predicate(vector_length(n) == 8 && UseAVX <= 2); 7225 match(Set dst (VectorStoreMask src size)); 7226 format %{ "vector_store_mask $dst,$src\t!" %} 7227 effect(TEMP dst); 7228 ins_encode %{ 7229 int vlen_enc = Assembler::AVX_128bit; 7230 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 7231 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7232 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7233 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7234 %} 7235 ins_pipe( pipe_slow ); 7236 %} 7237 7238 instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{ 7239 predicate(UseAVX > 2); 7240 match(Set dst (VectorStoreMask src size)); 7241 format %{ "vector_store_mask $dst,$src\t!" %} 7242 ins_encode %{ 7243 int src_vlen_enc = vector_length_encoding(this, $src); 7244 int dst_vlen_enc = vector_length_encoding(this); 7245 if (!VM_Version::supports_avx512vl()) { 7246 src_vlen_enc = Assembler::AVX_512bit; 7247 } 7248 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7249 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7250 %} 7251 ins_pipe( pipe_slow ); 7252 %} 7253 7254 instruct storeMask8B(vec dst, vec src, immI_8 size) %{ 7255 predicate(vector_length(n) == 2 && UseAVX <= 2); 7256 match(Set dst (VectorStoreMask src size)); 7257 format %{ "vector_store_mask $dst,$src\t!" %} 7258 ins_encode %{ 7259 assert(UseSSE >= 3, "required"); 7260 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 7261 __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); 7262 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7263 __ pabsb($dst$$XMMRegister, $dst$$XMMRegister); 7264 %} 7265 ins_pipe( pipe_slow ); 7266 %} 7267 7268 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{ 7269 predicate(vector_length(n) == 4 && UseAVX <= 2); 7270 match(Set dst (VectorStoreMask src size)); 7271 format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %} 7272 effect(TEMP dst, TEMP vtmp); 7273 ins_encode %{ 7274 int vlen_enc = Assembler::AVX_128bit; 7275 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 7276 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7277 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 7278 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7279 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7280 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{ 7286 predicate(UseAVX > 2); 7287 match(Set dst (VectorStoreMask src size)); 7288 format %{ "vector_store_mask $dst,$src\t!" %} 7289 ins_encode %{ 7290 int src_vlen_enc = vector_length_encoding(this, $src); 7291 int dst_vlen_enc = vector_length_encoding(this); 7292 if (!VM_Version::supports_avx512vl()) { 7293 src_vlen_enc = Assembler::AVX_512bit; 7294 } 7295 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7296 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7297 %} 7298 ins_pipe( pipe_slow ); 7299 %} 7300 7301 //-------------------------------- Load Iota Indices ---------------------------------- 7302 7303 instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ 7304 predicate(vector_element_basic_type(n) == T_BYTE); 7305 match(Set dst (VectorLoadConst src)); 7306 effect(TEMP scratch); 7307 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 7308 ins_encode %{ 7309 int vlen_in_bytes = vector_length_in_bytes(this); 7310 __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); 7311 %} 7312 ins_pipe( pipe_slow ); 7313 %} 7314 7315 //-------------------------------- Rearrange ---------------------------------- 7316 7317 // LoadShuffle/Rearrange for Byte 7318 7319 instruct loadShuffleB(vec dst) %{ 7320 predicate(vector_element_basic_type(n) == T_BYTE); 7321 match(Set dst (VectorLoadShuffle dst)); 7322 format %{ "vector_load_shuffle $dst, $dst" %} 7323 ins_encode %{ 7324 // empty 7325 %} 7326 ins_pipe( pipe_slow ); 7327 %} 7328 7329 instruct rearrangeB(vec dst, vec shuffle) %{ 7330 predicate(vector_element_basic_type(n) == T_BYTE && 7331 vector_length(n) < 32); 7332 match(Set dst (VectorRearrange dst shuffle)); 7333 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7334 ins_encode %{ 7335 assert(UseSSE >= 4, "required"); 7336 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7337 %} 7338 ins_pipe( pipe_slow ); 7339 %} 7340 7341 instruct rearrangeB_avx(vec dst, vec src, vec shuffle) %{ 7342 predicate(vector_element_basic_type(n) == T_BYTE && 7343 vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 7344 match(Set dst (VectorRearrange src shuffle)); 7345 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7346 ins_encode %{ 7347 __ vpshufb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, Assembler::AVX_256bit); 7348 %} 7349 ins_pipe( pipe_slow ); 7350 %} 7351 7352 instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ 7353 predicate(vector_element_basic_type(n) == T_BYTE && 7354 vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 7355 match(Set dst (VectorRearrange src shuffle)); 7356 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7357 ins_encode %{ 7358 int vlen_enc = vector_length_encoding(this); 7359 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7360 %} 7361 ins_pipe( pipe_slow ); 7362 %} 7363 7364 // LoadShuffle/Rearrange for Short 7365 7366 instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7367 predicate(vector_element_basic_type(n) == T_SHORT && 7368 vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 7369 match(Set dst (VectorLoadShuffle src)); 7370 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7371 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7372 ins_encode %{ 7373 // Create a byte shuffle mask from short shuffle mask 7374 // only byte shuffle instruction available on these platforms 7375 7376 // Multiply each shuffle by two to get byte index 7377 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 7378 __ psllw($vtmp$$XMMRegister, 1); 7379 7380 // Duplicate to create 2 copies of byte index 7381 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 7382 __ psllw($dst$$XMMRegister, 8); 7383 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 7384 7385 // Add one to get alternate byte index 7386 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); 7387 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 7388 %} 7389 ins_pipe( pipe_slow ); 7390 %} 7391 7392 instruct rearrangeS(vec dst, vec shuffle) %{ 7393 predicate(vector_element_basic_type(n) == T_SHORT && 7394 vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 7395 match(Set dst (VectorRearrange dst shuffle)); 7396 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7397 ins_encode %{ 7398 assert(UseSSE >= 4, "required"); 7399 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7400 %} 7401 ins_pipe( pipe_slow ); 7402 %} 7403 7404 instruct loadShuffleS_evex(vec dst, vec src) %{ 7405 predicate(vector_element_basic_type(n) == T_SHORT && 7406 VM_Version::supports_avx512bw()); 7407 match(Set dst (VectorLoadShuffle src)); 7408 format %{ "vector_load_shuffle $dst, $src" %} 7409 ins_encode %{ 7410 int vlen_enc = vector_length_encoding(this); 7411 if (!VM_Version::supports_avx512vl()) { 7412 vlen_enc = Assembler::AVX_512bit; 7413 } 7414 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7415 %} 7416 ins_pipe( pipe_slow ); 7417 %} 7418 7419 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 7420 predicate(vector_element_basic_type(n) == T_SHORT && 7421 VM_Version::supports_avx512bw()); 7422 match(Set dst (VectorRearrange src shuffle)); 7423 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7424 ins_encode %{ 7425 int vlen_enc = vector_length_encoding(this); 7426 if (!VM_Version::supports_avx512vl()) { 7427 vlen_enc = Assembler::AVX_512bit; 7428 } 7429 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7430 %} 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 // LoadShuffle/Rearrange for Integer and Float 7435 7436 instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7437 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7438 vector_length(n) == 4 && UseAVX < 2); 7439 match(Set dst (VectorLoadShuffle src)); 7440 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7441 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7442 ins_encode %{ 7443 assert(UseSSE >= 4, "required"); 7444 7445 // Create a byte shuffle mask from int shuffle mask 7446 // only byte shuffle instruction available on these platforms 7447 7448 // Duplicate and multiply each shuffle by 4 7449 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 7450 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 7451 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 7452 __ psllw($vtmp$$XMMRegister, 2); 7453 7454 // Duplicate again to create 4 copies of byte index 7455 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 7456 __ psllw($dst$$XMMRegister, 8); 7457 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 7458 7459 // Add 3,2,1,0 to get alternate byte index 7460 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); 7461 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 7462 %} 7463 ins_pipe( pipe_slow ); 7464 %} 7465 7466 instruct rearrangeI(vec dst, vec shuffle) %{ 7467 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7468 vector_length(n) == 4 && UseAVX < 2); 7469 match(Set dst (VectorRearrange dst shuffle)); 7470 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7471 ins_encode %{ 7472 assert(UseSSE >= 4, "required"); 7473 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7474 %} 7475 ins_pipe( pipe_slow ); 7476 %} 7477 7478 instruct loadShuffleI_avx(vec dst, vec src) %{ 7479 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7480 UseAVX >= 2); 7481 match(Set dst (VectorLoadShuffle src)); 7482 format %{ "vector_load_shuffle $dst, $src" %} 7483 ins_encode %{ 7484 int vlen_enc = vector_length_encoding(this); 7485 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7486 %} 7487 ins_pipe( pipe_slow ); 7488 %} 7489 7490 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 7491 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7492 UseAVX >= 2); 7493 match(Set dst (VectorRearrange src shuffle)); 7494 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7495 ins_encode %{ 7496 int vlen_enc = vector_length_encoding(this); 7497 if (vlen_enc == Assembler::AVX_128bit) { 7498 vlen_enc = Assembler::AVX_256bit; 7499 } 7500 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7501 %} 7502 ins_pipe( pipe_slow ); 7503 %} 7504 7505 // LoadShuffle/Rearrange for Long and Double 7506 7507 instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7508 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7509 vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 7510 match(Set dst (VectorLoadShuffle src)); 7511 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7512 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7513 ins_encode %{ 7514 assert(UseAVX >= 2, "required"); 7515 7516 int vlen_enc = vector_length_encoding(this); 7517 // Create a double word shuffle mask from long shuffle mask 7518 // only double word shuffle instruction available on these platforms 7519 7520 // Multiply each shuffle by two to get double word index 7521 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 7522 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 7523 7524 // Duplicate each double word shuffle 7525 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 7526 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7527 7528 // Add one to get alternate double word index 7529 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register); 7530 %} 7531 ins_pipe( pipe_slow ); 7532 %} 7533 7534 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 7535 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7536 vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 7537 match(Set dst (VectorRearrange src shuffle)); 7538 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7539 ins_encode %{ 7540 assert(UseAVX >= 2, "required"); 7541 7542 int vlen_enc = vector_length_encoding(this); 7543 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 instruct loadShuffleL_evex(vec dst, vec src) %{ 7549 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7550 (vector_length(n) == 8 || VM_Version::supports_avx512vl())); 7551 match(Set dst (VectorLoadShuffle src)); 7552 format %{ "vector_load_shuffle $dst, $src" %} 7553 ins_encode %{ 7554 assert(UseAVX > 2, "required"); 7555 7556 int vlen_enc = vector_length_encoding(this); 7557 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7558 %} 7559 ins_pipe( pipe_slow ); 7560 %} 7561 7562 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 7563 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7564 (vector_length(n) == 8 || VM_Version::supports_avx512vl())); 7565 match(Set dst (VectorRearrange src shuffle)); 7566 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7567 ins_encode %{ 7568 assert(UseAVX > 2, "required"); 7569 7570 int vlen_enc = vector_length_encoding(this); 7571 if (vlen_enc == Assembler::AVX_128bit) { 7572 vlen_enc = Assembler::AVX_256bit; 7573 } 7574 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 // --------------------------------- FMA -------------------------------------- 7580 // a * b + c 7581 7582 instruct vfmaF_reg(vec a, vec b, vec c) %{ 7583 match(Set c (FmaVF c (Binary a b))); 7584 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 7585 ins_cost(150); 7586 ins_encode %{ 7587 assert(UseFMA, "not enabled"); 7588 int vlen_enc = vector_length_encoding(this); 7589 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 7590 %} 7591 ins_pipe( pipe_slow ); 7592 %} 7593 7594 instruct vfmaF_mem(vec a, memory b, vec c) %{ 7595 match(Set c (FmaVF c (Binary a (LoadVector b)))); 7596 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 7597 ins_cost(150); 7598 ins_encode %{ 7599 assert(UseFMA, "not enabled"); 7600 int vlen_enc = vector_length_encoding(this); 7601 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 7602 %} 7603 ins_pipe( pipe_slow ); 7604 %} 7605 7606 instruct vfmaD_reg(vec a, vec b, vec c) %{ 7607 match(Set c (FmaVD c (Binary a b))); 7608 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 7609 ins_cost(150); 7610 ins_encode %{ 7611 assert(UseFMA, "not enabled"); 7612 int vlen_enc = vector_length_encoding(this); 7613 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 7614 %} 7615 ins_pipe( pipe_slow ); 7616 %} 7617 7618 instruct vfmaD_mem(vec a, memory b, vec c) %{ 7619 match(Set c (FmaVD c (Binary a (LoadVector b)))); 7620 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 7621 ins_cost(150); 7622 ins_encode %{ 7623 assert(UseFMA, "not enabled"); 7624 int vlen_enc = vector_length_encoding(this); 7625 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 7626 %} 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 // --------------------------------- Vector Multiply Add -------------------------------------- 7631 7632 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 7633 predicate(UseAVX == 0); 7634 match(Set dst (MulAddVS2VI dst src1)); 7635 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 7636 ins_encode %{ 7637 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 7638 %} 7639 ins_pipe( pipe_slow ); 7640 %} 7641 7642 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 7643 predicate(UseAVX > 0); 7644 match(Set dst (MulAddVS2VI src1 src2)); 7645 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 7646 ins_encode %{ 7647 int vlen_enc = vector_length_encoding(this); 7648 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7649 %} 7650 ins_pipe( pipe_slow ); 7651 %} 7652 7653 // --------------------------------- Vector Multiply Add Add ---------------------------------- 7654 7655 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 7656 predicate(VM_Version::supports_avx512_vnni()); 7657 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 7658 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 7659 ins_encode %{ 7660 assert(UseAVX > 2, "required"); 7661 int vlen_enc = vector_length_encoding(this); 7662 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7663 %} 7664 ins_pipe( pipe_slow ); 7665 ins_cost(10); 7666 %} 7667 7668 // --------------------------------- PopCount -------------------------------------- 7669 7670 instruct vpopcountI(vec dst, vec src) %{ 7671 match(Set dst (PopCountVI src)); 7672 format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %} 7673 ins_encode %{ 7674 assert(UsePopCountInstruction, "not enabled"); 7675 7676 int vlen_enc = vector_length_encoding(this); 7677 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7678 %} 7679 ins_pipe( pipe_slow ); 7680 %} 7681