1 // 2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 %} 1102 1103 1104 //----------SOURCE BLOCK------------------------------------------------------- 1105 // This is a block of C++ code which provides values, functions, and 1106 // definitions necessary in the rest of the architecture description 1107 1108 source_hpp %{ 1109 // Header information of the source block. 1110 // Method declarations/definitions which are used outside 1111 // the ad-scope can conveniently be defined here. 1112 // 1113 // To keep related declarations/definitions/uses close together, 1114 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1115 1116 class NativeJump; 1117 1118 class CallStubImpl { 1119 1120 //-------------------------------------------------------------- 1121 //---< Used for optimization in Compile::shorten_branches >--- 1122 //-------------------------------------------------------------- 1123 1124 public: 1125 // Size of call trampoline stub. 1126 static uint size_call_trampoline() { 1127 return 0; // no call trampolines on this platform 1128 } 1129 1130 // number of relocations needed by a call trampoline stub 1131 static uint reloc_call_trampoline() { 1132 return 0; // no call trampolines on this platform 1133 } 1134 }; 1135 1136 class HandlerImpl { 1137 1138 public: 1139 1140 static int emit_exception_handler(CodeBuffer &cbuf); 1141 static int emit_deopt_handler(CodeBuffer& cbuf); 1142 1143 static uint size_exception_handler() { 1144 // NativeCall instruction size is the same as NativeJump. 1145 // exception handler starts out as jump and can be patched to 1146 // a call be deoptimization. (4932387) 1147 // Note that this value is also credited (in output.cpp) to 1148 // the size of the code section. 1149 return NativeJump::instruction_size; 1150 } 1151 1152 #ifdef _LP64 1153 static uint size_deopt_handler() { 1154 // three 5 byte instructions plus one move for unreachable address. 1155 return 15+3; 1156 } 1157 #else 1158 static uint size_deopt_handler() { 1159 // NativeCall instruction size is the same as NativeJump. 1160 // exception handler starts out as jump and can be patched to 1161 // a call be deoptimization. (4932387) 1162 // Note that this value is also credited (in output.cpp) to 1163 // the size of the code section. 1164 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1165 } 1166 #endif 1167 }; 1168 1169 1170 inline uint vector_length(const Node* n) { 1171 const TypeVect* vt = n->bottom_type()->is_vect(); 1172 return vt->length(); 1173 } 1174 1175 inline uint vector_length(const MachNode* use, MachOper* opnd) { 1176 uint def_idx = use->operand_index(opnd); 1177 Node* def = use->in(def_idx); 1178 return def->bottom_type()->is_vect()->length(); 1179 } 1180 1181 inline uint vector_length_in_bytes(const Node* n) { 1182 const TypeVect* vt = n->bottom_type()->is_vect(); 1183 return vt->length_in_bytes(); 1184 } 1185 1186 inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { 1187 uint def_idx = use->operand_index(opnd); 1188 Node* def = use->in(def_idx); 1189 return def->bottom_type()->is_vect()->length_in_bytes(); 1190 } 1191 1192 inline BasicType vector_element_basic_type(const Node *n) { 1193 return n->bottom_type()->is_vect()->element_basic_type(); 1194 } 1195 1196 inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) { 1197 uint def_idx = use->operand_index(opnd); 1198 Node* def = use->in(def_idx); 1199 return def->bottom_type()->is_vect()->element_basic_type(); 1200 } 1201 1202 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1203 switch(bytes) { 1204 case 4: // fall-through 1205 case 8: // fall-through 1206 case 16: return Assembler::AVX_128bit; 1207 case 32: return Assembler::AVX_256bit; 1208 case 64: return Assembler::AVX_512bit; 1209 1210 default: { 1211 ShouldNotReachHere(); 1212 return Assembler::AVX_NoVec; 1213 } 1214 } 1215 } 1216 1217 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1218 return vector_length_encoding(vector_length_in_bytes(n)); 1219 } 1220 1221 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1222 uint def_idx = use->operand_index(opnd); 1223 Node* def = use->in(def_idx); 1224 return vector_length_encoding(def); 1225 } 1226 1227 class Node::PD { 1228 public: 1229 enum NodeFlags { 1230 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1231 _last_flag = Flag_intel_jcc_erratum 1232 }; 1233 }; 1234 1235 %} // end source_hpp 1236 1237 source %{ 1238 1239 #include "opto/addnode.hpp" 1240 #include "c2_intelJccErratum_x86.hpp" 1241 1242 void PhaseOutput::pd_perform_mach_node_analysis() { 1243 if (VM_Version::has_intel_jcc_erratum()) { 1244 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1245 _buf_sizes._code += extra_padding; 1246 } 1247 } 1248 1249 int MachNode::pd_alignment_required() const { 1250 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1251 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1252 return IntelJccErratum::largest_jcc_size() + 1; 1253 } else { 1254 return 1; 1255 } 1256 } 1257 1258 int MachNode::compute_padding(int current_offset) const { 1259 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1260 Compile* C = Compile::current(); 1261 PhaseOutput* output = C->output(); 1262 Block* block = output->block(); 1263 int index = output->index(); 1264 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1265 } else { 1266 return 0; 1267 } 1268 } 1269 1270 // Emit exception handler code. 1271 // Stuff framesize into a register and call a VM stub routine. 1272 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1273 1274 // Note that the code buffer's insts_mark is always relative to insts. 1275 // That's why we must use the macroassembler to generate a handler. 1276 C2_MacroAssembler _masm(&cbuf); 1277 address base = __ start_a_stub(size_exception_handler()); 1278 if (base == NULL) { 1279 ciEnv::current()->record_failure("CodeCache is full"); 1280 return 0; // CodeBuffer::expand failed 1281 } 1282 int offset = __ offset(); 1283 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1284 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1285 __ end_a_stub(); 1286 return offset; 1287 } 1288 1289 // Emit deopt handler code. 1290 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1291 1292 // Note that the code buffer's insts_mark is always relative to insts. 1293 // That's why we must use the macroassembler to generate a handler. 1294 C2_MacroAssembler _masm(&cbuf); 1295 address base = __ start_a_stub(size_deopt_handler()); 1296 if (base == NULL) { 1297 ciEnv::current()->record_failure("CodeCache is full"); 1298 return 0; // CodeBuffer::expand failed 1299 } 1300 int offset = __ offset(); 1301 1302 #ifdef _LP64 1303 address the_pc = (address) __ pc(); 1304 Label next; 1305 // push a "the_pc" on the stack without destroying any registers 1306 // as they all may be live. 1307 1308 // push address of "next" 1309 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1310 __ bind(next); 1311 // adjust it so it matches "the_pc" 1312 __ subptr(Address(rsp, 0), __ offset() - offset); 1313 #else 1314 InternalAddress here(__ pc()); 1315 __ pushptr(here.addr()); 1316 #endif 1317 1318 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1319 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1320 __ end_a_stub(); 1321 return offset; 1322 } 1323 1324 Assembler::Width widthForType(BasicType bt) { 1325 if (bt == T_BYTE) { 1326 return Assembler::B; 1327 } else if (bt == T_SHORT) { 1328 return Assembler::W; 1329 } else if (bt == T_INT) { 1330 return Assembler::D; 1331 } else { 1332 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1333 return Assembler::Q; 1334 } 1335 } 1336 1337 //============================================================================= 1338 1339 // Float masks come from different places depending on platform. 1340 #ifdef _LP64 1341 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1342 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1343 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1344 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1345 #else 1346 static address float_signmask() { return (address)float_signmask_pool; } 1347 static address float_signflip() { return (address)float_signflip_pool; } 1348 static address double_signmask() { return (address)double_signmask_pool; } 1349 static address double_signflip() { return (address)double_signflip_pool; } 1350 #endif 1351 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1352 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1353 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1354 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1355 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1356 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1357 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1358 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1359 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1360 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1361 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1362 1363 //============================================================================= 1364 const bool Matcher::match_rule_supported(int opcode) { 1365 if (!has_match_rule(opcode)) { 1366 return false; // no match rule present 1367 } 1368 switch (opcode) { 1369 case Op_AbsVL: 1370 case Op_StoreVectorScatter: 1371 if (UseAVX < 3) { 1372 return false; 1373 } 1374 break; 1375 case Op_PopCountI: 1376 case Op_PopCountL: 1377 if (!UsePopCountInstruction) { 1378 return false; 1379 } 1380 break; 1381 case Op_PopCountVI: 1382 if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) { 1383 return false; 1384 } 1385 break; 1386 case Op_MulVI: 1387 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1388 return false; 1389 } 1390 break; 1391 case Op_MulVL: 1392 if (UseSSE < 4) { // only with SSE4_1 or AVX 1393 return false; 1394 } 1395 break; 1396 case Op_MulReductionVL: 1397 if (VM_Version::supports_avx512dq() == false) { 1398 return false; 1399 } 1400 break; 1401 case Op_AddReductionVL: 1402 if (UseSSE < 2) { // requires at least SSE2 1403 return false; 1404 } 1405 break; 1406 case Op_AbsVB: 1407 case Op_AbsVS: 1408 case Op_AbsVI: 1409 case Op_AddReductionVI: 1410 case Op_AndReductionV: 1411 case Op_OrReductionV: 1412 case Op_XorReductionV: 1413 if (UseSSE < 3) { // requires at least SSSE3 1414 return false; 1415 } 1416 break; 1417 case Op_VectorLoadShuffle: 1418 case Op_VectorRearrange: 1419 case Op_MulReductionVI: 1420 if (UseSSE < 4) { // requires at least SSE4 1421 return false; 1422 } 1423 break; 1424 case Op_SqrtVD: 1425 case Op_SqrtVF: 1426 case Op_VectorMaskCmp: 1427 case Op_VectorCastB2X: 1428 case Op_VectorCastS2X: 1429 case Op_VectorCastI2X: 1430 case Op_VectorCastL2X: 1431 case Op_VectorCastF2X: 1432 case Op_VectorCastD2X: 1433 if (UseAVX < 1) { // enabled for AVX only 1434 return false; 1435 } 1436 break; 1437 case Op_CompareAndSwapL: 1438 #ifdef _LP64 1439 case Op_CompareAndSwapP: 1440 #endif 1441 if (!VM_Version::supports_cx8()) { 1442 return false; 1443 } 1444 break; 1445 case Op_CMoveVF: 1446 case Op_CMoveVD: 1447 if (UseAVX < 1) { // enabled for AVX only 1448 return false; 1449 } 1450 break; 1451 case Op_StrIndexOf: 1452 if (!UseSSE42Intrinsics) { 1453 return false; 1454 } 1455 break; 1456 case Op_StrIndexOfChar: 1457 if (!UseSSE42Intrinsics) { 1458 return false; 1459 } 1460 break; 1461 case Op_OnSpinWait: 1462 if (VM_Version::supports_on_spin_wait() == false) { 1463 return false; 1464 } 1465 break; 1466 case Op_MulVB: 1467 case Op_LShiftVB: 1468 case Op_RShiftVB: 1469 case Op_URShiftVB: 1470 case Op_VectorInsert: 1471 case Op_VectorLoadMask: 1472 case Op_VectorStoreMask: 1473 case Op_VectorBlend: 1474 if (UseSSE < 4) { 1475 return false; 1476 } 1477 break; 1478 #ifdef _LP64 1479 case Op_MaxD: 1480 case Op_MaxF: 1481 case Op_MinD: 1482 case Op_MinF: 1483 if (UseAVX < 1) { // enabled for AVX only 1484 return false; 1485 } 1486 break; 1487 #endif 1488 case Op_CacheWB: 1489 case Op_CacheWBPreSync: 1490 case Op_CacheWBPostSync: 1491 if (!VM_Version::supports_data_cache_line_flush()) { 1492 return false; 1493 } 1494 break; 1495 case Op_ExtractB: 1496 case Op_ExtractL: 1497 case Op_ExtractI: 1498 case Op_RoundDoubleMode: 1499 if (UseSSE < 4) { 1500 return false; 1501 } 1502 break; 1503 case Op_RoundDoubleModeV: 1504 if (VM_Version::supports_avx() == false) { 1505 return false; // 128bit vroundpd is not available 1506 } 1507 break; 1508 case Op_LoadVectorGather: 1509 if (UseAVX < 2) { 1510 return false; 1511 } 1512 break; 1513 case Op_FmaVD: 1514 case Op_FmaVF: 1515 if (!UseFMA) { 1516 return false; 1517 } 1518 break; 1519 case Op_MacroLogicV: 1520 if (UseAVX < 3 || !UseVectorMacroLogic) { 1521 return false; 1522 } 1523 break; 1524 #ifndef _LP64 1525 case Op_AddReductionVF: 1526 case Op_AddReductionVD: 1527 case Op_MulReductionVF: 1528 case Op_MulReductionVD: 1529 if (UseSSE < 1) { // requires at least SSE 1530 return false; 1531 } 1532 break; 1533 case Op_MulAddVS2VI: 1534 case Op_RShiftVL: 1535 case Op_AbsVD: 1536 case Op_NegVD: 1537 if (UseSSE < 2) { 1538 return false; 1539 } 1540 break; 1541 #endif // !LP64 1542 } 1543 return true; // Match rules are supported by default. 1544 } 1545 1546 //------------------------------------------------------------------------ 1547 1548 // Identify extra cases that we might want to provide match rules for vector nodes and 1549 // other intrinsics guarded with vector length (vlen) and element type (bt). 1550 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1551 if (!match_rule_supported(opcode)) { 1552 return false; 1553 } 1554 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1555 // * SSE2 supports 128bit vectors for all types; 1556 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1557 // * AVX2 supports 256bit vectors for all types; 1558 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1559 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1560 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1561 // And MaxVectorSize is taken into account as well. 1562 if (!vector_size_supported(bt, vlen)) { 1563 return false; 1564 } 1565 // Special cases which require vector length follow: 1566 // * implementation limitations 1567 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1568 // * 128bit vroundpd instruction is present only in AVX1 1569 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1570 switch (opcode) { 1571 case Op_AbsVF: 1572 case Op_NegVF: 1573 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1574 return false; // 512bit vandps and vxorps are not available 1575 } 1576 break; 1577 case Op_AbsVD: 1578 case Op_NegVD: 1579 case Op_MulVL: 1580 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1581 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1582 } 1583 break; 1584 case Op_CMoveVF: 1585 if (vlen != 8) { 1586 return false; // implementation limitation (only vcmov8F_reg is present) 1587 } 1588 break; 1589 case Op_MacroLogicV: 1590 if (!VM_Version::supports_evex() || 1591 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1592 return false; 1593 } 1594 break; 1595 case Op_CMoveVD: 1596 if (vlen != 4) { 1597 return false; // implementation limitation (only vcmov4D_reg is present) 1598 } 1599 break; 1600 case Op_MaxV: 1601 case Op_MinV: 1602 if (UseSSE < 4 && is_integral_type(bt)) { 1603 return false; 1604 } 1605 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1606 // Float/Double intrinsics are enabled for AVX family currently. 1607 if (UseAVX == 0) { 1608 return false; 1609 } 1610 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1611 return false; 1612 } 1613 } 1614 break; 1615 case Op_AddReductionVI: 1616 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1617 return false; 1618 } 1619 // fallthrough 1620 case Op_AndReductionV: 1621 case Op_OrReductionV: 1622 case Op_XorReductionV: 1623 if (is_subword_type(bt) && (UseSSE < 4)) { 1624 return false; 1625 } 1626 #ifndef _LP64 1627 if (bt == T_BYTE || bt == T_LONG) { 1628 return false; 1629 } 1630 #endif 1631 break; 1632 #ifndef _LP64 1633 case Op_VectorInsert: 1634 if (bt == T_LONG || bt == T_DOUBLE) { 1635 return false; 1636 } 1637 break; 1638 #endif 1639 case Op_MinReductionV: 1640 case Op_MaxReductionV: 1641 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1642 return false; 1643 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1644 return false; 1645 } 1646 // Float/Double intrinsics enabled for AVX family. 1647 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1648 return false; 1649 } 1650 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1651 return false; 1652 } 1653 #ifndef _LP64 1654 if (bt == T_BYTE || bt == T_LONG) { 1655 return false; 1656 } 1657 #endif 1658 break; 1659 case Op_VectorTest: 1660 if (UseSSE < 4) { 1661 return false; // Implementation limitation 1662 } else if (size_in_bits < 128) { 1663 return false; // Implementation limitation 1664 } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) { 1665 return false; // Implementation limitation 1666 } 1667 break; 1668 case Op_VectorLoadShuffle: 1669 case Op_VectorRearrange: 1670 if(vlen == 2) { 1671 return false; // Implementation limitation due to how shuffle is loaded 1672 } else if (size_in_bits == 256 && UseAVX < 2) { 1673 return false; // Implementation limitation 1674 } else if (bt == T_BYTE && size_in_bits >= 256 && !VM_Version::supports_avx512_vbmi()) { 1675 return false; // Implementation limitation 1676 } else if (bt == T_SHORT && size_in_bits >= 256 && !VM_Version::supports_avx512bw()) { 1677 return false; // Implementation limitation 1678 } 1679 break; 1680 case Op_VectorLoadMask: 1681 if (size_in_bits == 256 && UseAVX < 2) { 1682 return false; // Implementation limitation 1683 } 1684 // fallthrough 1685 case Op_VectorStoreMask: 1686 if (vlen == 2) { 1687 return false; // Implementation limitation 1688 } 1689 break; 1690 case Op_VectorCastB2X: 1691 if (size_in_bits == 256 && UseAVX < 2) { 1692 return false; // Implementation limitation 1693 } 1694 break; 1695 case Op_VectorCastS2X: 1696 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1697 return false; 1698 } 1699 break; 1700 case Op_VectorCastI2X: 1701 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1702 return false; 1703 } 1704 break; 1705 case Op_VectorCastL2X: 1706 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1707 return false; 1708 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1709 return false; 1710 } 1711 break; 1712 case Op_VectorCastF2X: 1713 case Op_VectorCastD2X: 1714 if (is_integral_type(bt)) { 1715 // Casts from FP to integral types require special fixup logic not easily 1716 // implementable with vectors. 1717 return false; // Implementation limitation 1718 } 1719 case Op_MulReductionVI: 1720 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1721 return false; 1722 } 1723 break; 1724 case Op_StoreVectorScatter: 1725 if(bt == T_BYTE || bt == T_SHORT) { 1726 return false; 1727 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1728 return false; 1729 } 1730 // fallthrough 1731 case Op_LoadVectorGather: 1732 if (size_in_bits == 64 ) { 1733 return false; 1734 } 1735 break; 1736 } 1737 return true; // Per default match rules are supported. 1738 } 1739 1740 // x86 supports generic vector operands: vec and legVec. 1741 const bool Matcher::supports_generic_vector_operands = true; 1742 1743 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 1744 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 1745 bool legacy = (generic_opnd->opcode() == LEGVEC); 1746 if (!VM_Version::supports_avx512vlbwdq() && // KNL 1747 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 1748 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 1749 return new legVecZOper(); 1750 } 1751 if (legacy) { 1752 switch (ideal_reg) { 1753 case Op_VecS: return new legVecSOper(); 1754 case Op_VecD: return new legVecDOper(); 1755 case Op_VecX: return new legVecXOper(); 1756 case Op_VecY: return new legVecYOper(); 1757 case Op_VecZ: return new legVecZOper(); 1758 } 1759 } else { 1760 switch (ideal_reg) { 1761 case Op_VecS: return new vecSOper(); 1762 case Op_VecD: return new vecDOper(); 1763 case Op_VecX: return new vecXOper(); 1764 case Op_VecY: return new vecYOper(); 1765 case Op_VecZ: return new vecZOper(); 1766 } 1767 } 1768 ShouldNotReachHere(); 1769 return NULL; 1770 } 1771 1772 bool Matcher::is_generic_reg2reg_move(MachNode* m) { 1773 switch (m->rule()) { 1774 case MoveVec2Leg_rule: 1775 case MoveLeg2Vec_rule: 1776 return true; 1777 default: 1778 return false; 1779 } 1780 } 1781 1782 bool Matcher::is_generic_vector(MachOper* opnd) { 1783 switch (opnd->opcode()) { 1784 case VEC: 1785 case LEGVEC: 1786 return true; 1787 default: 1788 return false; 1789 } 1790 } 1791 1792 //------------------------------------------------------------------------ 1793 1794 bool Matcher::supports_vector_variable_shifts(void) { 1795 return (UseAVX >= 2); 1796 } 1797 1798 const bool Matcher::has_predicated_vectors(void) { 1799 bool ret_value = false; 1800 if (UseAVX > 2) { 1801 ret_value = VM_Version::supports_avx512vl(); 1802 } 1803 1804 return ret_value; 1805 } 1806 1807 const int Matcher::float_pressure(int default_pressure_threshold) { 1808 int float_pressure_threshold = default_pressure_threshold; 1809 #ifdef _LP64 1810 if (UseAVX > 2) { 1811 // Increase pressure threshold on machines with AVX3 which have 1812 // 2x more XMM registers. 1813 float_pressure_threshold = default_pressure_threshold * 2; 1814 } 1815 #endif 1816 return float_pressure_threshold; 1817 } 1818 1819 // Max vector size in bytes. 0 if not supported. 1820 const int Matcher::vector_width_in_bytes(BasicType bt) { 1821 assert(is_java_primitive(bt), "only primitive type vectors"); 1822 if (UseSSE < 2) return 0; 1823 // SSE2 supports 128bit vectors for all types. 1824 // AVX2 supports 256bit vectors for all types. 1825 // AVX2/EVEX supports 512bit vectors for all types. 1826 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1827 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1828 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1829 size = (UseAVX > 2) ? 64 : 32; 1830 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1831 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1832 // Use flag to limit vector size. 1833 size = MIN2(size,(int)MaxVectorSize); 1834 // Minimum 2 values in vector (or 4 for bytes). 1835 switch (bt) { 1836 case T_DOUBLE: 1837 case T_LONG: 1838 if (size < 16) return 0; 1839 break; 1840 case T_FLOAT: 1841 case T_INT: 1842 if (size < 8) return 0; 1843 break; 1844 case T_BOOLEAN: 1845 if (size < 4) return 0; 1846 break; 1847 case T_CHAR: 1848 if (size < 4) return 0; 1849 break; 1850 case T_BYTE: 1851 if (size < 4) return 0; 1852 break; 1853 case T_SHORT: 1854 if (size < 4) return 0; 1855 break; 1856 default: 1857 ShouldNotReachHere(); 1858 } 1859 return size; 1860 } 1861 1862 // Limits on vector size (number of elements) loaded into vector. 1863 const int Matcher::max_vector_size(const BasicType bt) { 1864 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1865 } 1866 const int Matcher::min_vector_size(const BasicType bt) { 1867 int max_size = max_vector_size(bt); 1868 // Min size which can be loaded into vector is 4 bytes. 1869 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1870 return MIN2(size,max_size); 1871 } 1872 1873 // Vector ideal reg corresponding to specified size in bytes 1874 const uint Matcher::vector_ideal_reg(int size) { 1875 assert(MaxVectorSize >= size, ""); 1876 switch(size) { 1877 case 4: return Op_VecS; 1878 case 8: return Op_VecD; 1879 case 16: return Op_VecX; 1880 case 32: return Op_VecY; 1881 case 64: return Op_VecZ; 1882 } 1883 ShouldNotReachHere(); 1884 return 0; 1885 } 1886 1887 // x86 supports misaligned vectors store/load. 1888 const bool Matcher::misaligned_vectors_ok() { 1889 return true; 1890 } 1891 1892 // x86 AES instructions are compatible with SunJCE expanded 1893 // keys, hence we do not need to pass the original key to stubs 1894 const bool Matcher::pass_original_key_for_aes() { 1895 return false; 1896 } 1897 1898 1899 const bool Matcher::convi2l_type_required = true; 1900 1901 // Check for shift by small constant as well 1902 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1903 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1904 shift->in(2)->get_int() <= 3 && 1905 // Are there other uses besides address expressions? 1906 !matcher->is_visited(shift)) { 1907 address_visited.set(shift->_idx); // Flag as address_visited 1908 mstack.push(shift->in(2), Matcher::Visit); 1909 Node *conv = shift->in(1); 1910 #ifdef _LP64 1911 // Allow Matcher to match the rule which bypass 1912 // ConvI2L operation for an array index on LP64 1913 // if the index value is positive. 1914 if (conv->Opcode() == Op_ConvI2L && 1915 conv->as_Type()->type()->is_long()->_lo >= 0 && 1916 // Are there other uses besides address expressions? 1917 !matcher->is_visited(conv)) { 1918 address_visited.set(conv->_idx); // Flag as address_visited 1919 mstack.push(conv->in(1), Matcher::Pre_Visit); 1920 } else 1921 #endif 1922 mstack.push(conv, Matcher::Pre_Visit); 1923 return true; 1924 } 1925 return false; 1926 } 1927 1928 // This function identifies sub-graphs in which a 'load' node is 1929 // input to two different nodes, and such that it can be matched 1930 // with BMI instructions like blsi, blsr, etc. 1931 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 1932 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 1933 // refers to the same node. 1934 // 1935 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 1936 // This is a temporary solution until we make DAGs expressible in ADL. 1937 template<typename ConType> 1938 class FusedPatternMatcher { 1939 Node* _op1_node; 1940 Node* _mop_node; 1941 int _con_op; 1942 1943 static int match_next(Node* n, int next_op, int next_op_idx) { 1944 if (n->in(1) == NULL || n->in(2) == NULL) { 1945 return -1; 1946 } 1947 1948 if (next_op_idx == -1) { // n is commutative, try rotations 1949 if (n->in(1)->Opcode() == next_op) { 1950 return 1; 1951 } else if (n->in(2)->Opcode() == next_op) { 1952 return 2; 1953 } 1954 } else { 1955 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 1956 if (n->in(next_op_idx)->Opcode() == next_op) { 1957 return next_op_idx; 1958 } 1959 } 1960 return -1; 1961 } 1962 1963 public: 1964 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 1965 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 1966 1967 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 1968 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 1969 typename ConType::NativeType con_value) { 1970 if (_op1_node->Opcode() != op1) { 1971 return false; 1972 } 1973 if (_mop_node->outcnt() > 2) { 1974 return false; 1975 } 1976 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 1977 if (op1_op2_idx == -1) { 1978 return false; 1979 } 1980 // Memory operation must be the other edge 1981 int op1_mop_idx = (op1_op2_idx & 1) + 1; 1982 1983 // Check that the mop node is really what we want 1984 if (_op1_node->in(op1_mop_idx) == _mop_node) { 1985 Node* op2_node = _op1_node->in(op1_op2_idx); 1986 if (op2_node->outcnt() > 1) { 1987 return false; 1988 } 1989 assert(op2_node->Opcode() == op2, "Should be"); 1990 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 1991 if (op2_con_idx == -1) { 1992 return false; 1993 } 1994 // Memory operation must be the other edge 1995 int op2_mop_idx = (op2_con_idx & 1) + 1; 1996 // Check that the memory operation is the same node 1997 if (op2_node->in(op2_mop_idx) == _mop_node) { 1998 // Now check the constant 1999 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2000 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2001 return true; 2002 } 2003 } 2004 } 2005 return false; 2006 } 2007 }; 2008 2009 static bool is_bmi_pattern(Node* n, Node* m) { 2010 assert(UseBMI1Instructions, "sanity"); 2011 if (n != NULL && m != NULL) { 2012 if (m->Opcode() == Op_LoadI) { 2013 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2014 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2015 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2016 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2017 } else if (m->Opcode() == Op_LoadL) { 2018 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2019 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2020 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2021 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2022 } 2023 } 2024 return false; 2025 } 2026 2027 // Should the matcher clone input 'm' of node 'n'? 2028 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2029 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2030 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2031 mstack.push(m, Visit); 2032 return true; 2033 } 2034 return false; 2035 } 2036 2037 // Should the Matcher clone shifts on addressing modes, expecting them 2038 // to be subsumed into complex addressing expressions or compute them 2039 // into registers? 2040 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2041 Node *off = m->in(AddPNode::Offset); 2042 if (off->is_Con()) { 2043 address_visited.test_set(m->_idx); // Flag as address_visited 2044 Node *adr = m->in(AddPNode::Address); 2045 2046 // Intel can handle 2 adds in addressing mode 2047 // AtomicAdd is not an addressing expression. 2048 // Cheap to find it by looking for screwy base. 2049 if (adr->is_AddP() && 2050 !adr->in(AddPNode::Base)->is_top() && 2051 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2052 // Are there other uses besides address expressions? 2053 !is_visited(adr)) { 2054 address_visited.set(adr->_idx); // Flag as address_visited 2055 Node *shift = adr->in(AddPNode::Offset); 2056 if (!clone_shift(shift, this, mstack, address_visited)) { 2057 mstack.push(shift, Pre_Visit); 2058 } 2059 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2060 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2061 } else { 2062 mstack.push(adr, Pre_Visit); 2063 } 2064 2065 // Clone X+offset as it also folds into most addressing expressions 2066 mstack.push(off, Visit); 2067 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2068 return true; 2069 } else if (clone_shift(off, this, mstack, address_visited)) { 2070 address_visited.test_set(m->_idx); // Flag as address_visited 2071 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2072 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2073 return true; 2074 } 2075 return false; 2076 } 2077 2078 void Compile::reshape_address(AddPNode* addp) { 2079 } 2080 2081 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2082 switch (bt) { 2083 case BoolTest::eq: return Assembler::eq; 2084 case BoolTest::ne: return Assembler::neq; 2085 case BoolTest::le: return Assembler::le; 2086 case BoolTest::ge: return Assembler::nlt; 2087 case BoolTest::lt: return Assembler::lt; 2088 case BoolTest::gt: return Assembler::nle; 2089 default : ShouldNotReachHere(); return Assembler::_false; 2090 } 2091 } 2092 2093 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2094 switch (bt) { 2095 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2096 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2097 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2098 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2099 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2100 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2101 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2102 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2103 } 2104 } 2105 2106 // Helper methods for MachSpillCopyNode::implementation(). 2107 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 2108 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2109 // In 64-bit VM size calculation is very complex. Emitting instructions 2110 // into scratch buffer is used to get size in 64-bit VM. 2111 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 2112 assert(ireg == Op_VecS || // 32bit vector 2113 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2114 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2115 "no non-adjacent vector moves" ); 2116 if (cbuf) { 2117 C2_MacroAssembler _masm(cbuf); 2118 int offset = __ offset(); 2119 switch (ireg) { 2120 case Op_VecS: // copy whole register 2121 case Op_VecD: 2122 case Op_VecX: 2123 #ifndef _LP64 2124 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2125 #else 2126 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2127 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2128 } else { 2129 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2130 } 2131 #endif 2132 break; 2133 case Op_VecY: 2134 #ifndef _LP64 2135 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2136 #else 2137 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2138 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2139 } else { 2140 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2141 } 2142 #endif 2143 break; 2144 case Op_VecZ: 2145 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2146 break; 2147 default: 2148 ShouldNotReachHere(); 2149 } 2150 int size = __ offset() - offset; 2151 #ifdef ASSERT 2152 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2153 assert(!do_size || size == 4, "incorrect size calculattion"); 2154 #endif 2155 return size; 2156 #ifndef PRODUCT 2157 } else if (!do_size) { 2158 switch (ireg) { 2159 case Op_VecS: 2160 case Op_VecD: 2161 case Op_VecX: 2162 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2163 break; 2164 case Op_VecY: 2165 case Op_VecZ: 2166 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2167 break; 2168 default: 2169 ShouldNotReachHere(); 2170 } 2171 #endif 2172 } 2173 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 2174 return (UseAVX > 2) ? 6 : 4; 2175 } 2176 2177 int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 2178 int stack_offset, int reg, uint ireg, outputStream* st) { 2179 // In 64-bit VM size calculation is very complex. Emitting instructions 2180 // into scratch buffer is used to get size in 64-bit VM. 2181 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 2182 if (cbuf) { 2183 C2_MacroAssembler _masm(cbuf); 2184 int offset = __ offset(); 2185 if (is_load) { 2186 switch (ireg) { 2187 case Op_VecS: 2188 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2189 break; 2190 case Op_VecD: 2191 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2192 break; 2193 case Op_VecX: 2194 #ifndef _LP64 2195 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2196 #else 2197 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2198 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2199 } else { 2200 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2201 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2202 } 2203 #endif 2204 break; 2205 case Op_VecY: 2206 #ifndef _LP64 2207 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2208 #else 2209 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2210 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2211 } else { 2212 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2213 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2214 } 2215 #endif 2216 break; 2217 case Op_VecZ: 2218 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2219 break; 2220 default: 2221 ShouldNotReachHere(); 2222 } 2223 } else { // store 2224 switch (ireg) { 2225 case Op_VecS: 2226 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2227 break; 2228 case Op_VecD: 2229 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2230 break; 2231 case Op_VecX: 2232 #ifndef _LP64 2233 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2234 #else 2235 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2236 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2237 } 2238 else { 2239 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2240 } 2241 #endif 2242 break; 2243 case Op_VecY: 2244 #ifndef _LP64 2245 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2246 #else 2247 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2248 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2249 } 2250 else { 2251 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2252 } 2253 #endif 2254 break; 2255 case Op_VecZ: 2256 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2257 break; 2258 default: 2259 ShouldNotReachHere(); 2260 } 2261 } 2262 int size = __ offset() - offset; 2263 #ifdef ASSERT 2264 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 2265 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2266 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 2267 #endif 2268 return size; 2269 #ifndef PRODUCT 2270 } else if (!do_size) { 2271 if (is_load) { 2272 switch (ireg) { 2273 case Op_VecS: 2274 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2275 break; 2276 case Op_VecD: 2277 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2278 break; 2279 case Op_VecX: 2280 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2281 break; 2282 case Op_VecY: 2283 case Op_VecZ: 2284 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2285 break; 2286 default: 2287 ShouldNotReachHere(); 2288 } 2289 } else { // store 2290 switch (ireg) { 2291 case Op_VecS: 2292 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2293 break; 2294 case Op_VecD: 2295 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2296 break; 2297 case Op_VecX: 2298 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2299 break; 2300 case Op_VecY: 2301 case Op_VecZ: 2302 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2303 break; 2304 default: 2305 ShouldNotReachHere(); 2306 } 2307 } 2308 #endif 2309 } 2310 bool is_single_byte = false; 2311 int vec_len = 0; 2312 if ((UseAVX > 2) && (stack_offset != 0)) { 2313 int tuple_type = Assembler::EVEX_FVM; 2314 int input_size = Assembler::EVEX_32bit; 2315 switch (ireg) { 2316 case Op_VecS: 2317 tuple_type = Assembler::EVEX_T1S; 2318 break; 2319 case Op_VecD: 2320 tuple_type = Assembler::EVEX_T1S; 2321 input_size = Assembler::EVEX_64bit; 2322 break; 2323 case Op_VecX: 2324 break; 2325 case Op_VecY: 2326 vec_len = 1; 2327 break; 2328 case Op_VecZ: 2329 vec_len = 2; 2330 break; 2331 } 2332 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2333 } 2334 int offset_size = 0; 2335 int size = 5; 2336 if (UseAVX > 2 ) { 2337 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2338 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2339 size += 2; // Need an additional two bytes for EVEX encoding 2340 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2341 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2342 } else { 2343 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2344 size += 2; // Need an additional two bytes for EVEX encodding 2345 } 2346 } else { 2347 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2348 } 2349 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2350 return size+offset_size; 2351 } 2352 2353 static inline jint replicate4_imm(int con, int width) { 2354 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2355 assert(width == 1 || width == 2, "only byte or short types here"); 2356 int bit_width = width * 8; 2357 jint val = con; 2358 val &= (1 << bit_width) - 1; // mask off sign bits 2359 while(bit_width < 32) { 2360 val |= (val << bit_width); 2361 bit_width <<= 1; 2362 } 2363 return val; 2364 } 2365 2366 static inline jlong replicate8_imm(int con, int width) { 2367 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2368 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2369 int bit_width = width * 8; 2370 jlong val = con; 2371 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2372 while(bit_width < 64) { 2373 val |= (val << bit_width); 2374 bit_width <<= 1; 2375 } 2376 return val; 2377 } 2378 2379 #ifndef PRODUCT 2380 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2381 st->print("nop \t# %d bytes pad for loops and calls", _count); 2382 } 2383 #endif 2384 2385 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2386 C2_MacroAssembler _masm(&cbuf); 2387 __ nop(_count); 2388 } 2389 2390 uint MachNopNode::size(PhaseRegAlloc*) const { 2391 return _count; 2392 } 2393 2394 #ifndef PRODUCT 2395 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2396 st->print("# breakpoint"); 2397 } 2398 #endif 2399 2400 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2401 C2_MacroAssembler _masm(&cbuf); 2402 __ int3(); 2403 } 2404 2405 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2406 return MachNode::size(ra_); 2407 } 2408 2409 %} 2410 2411 encode %{ 2412 2413 enc_class call_epilog %{ 2414 if (VerifyStackAtCalls) { 2415 // Check that stack depth is unchanged: find majik cookie on stack 2416 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2417 C2_MacroAssembler _masm(&cbuf); 2418 Label L; 2419 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2420 __ jccb(Assembler::equal, L); 2421 // Die if stack mismatch 2422 __ int3(); 2423 __ bind(L); 2424 } 2425 %} 2426 2427 %} 2428 2429 // Operands for bound floating pointer register arguments 2430 operand rxmm0() %{ 2431 constraint(ALLOC_IN_RC(xmm0_reg)); 2432 match(VecX); 2433 format%{%} 2434 interface(REG_INTER); 2435 %} 2436 2437 //----------OPERANDS----------------------------------------------------------- 2438 // Operand definitions must precede instruction definitions for correct parsing 2439 // in the ADLC because operands constitute user defined types which are used in 2440 // instruction definitions. 2441 2442 // Vectors 2443 2444 // Dummy generic vector class. Should be used for all vector operands. 2445 // Replaced with vec[SDXYZ] during post-selection pass. 2446 operand vec() %{ 2447 constraint(ALLOC_IN_RC(dynamic)); 2448 match(VecX); 2449 match(VecY); 2450 match(VecZ); 2451 match(VecS); 2452 match(VecD); 2453 2454 format %{ %} 2455 interface(REG_INTER); 2456 %} 2457 2458 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2459 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2460 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2461 // runtime code generation via reg_class_dynamic. 2462 operand legVec() %{ 2463 constraint(ALLOC_IN_RC(dynamic)); 2464 match(VecX); 2465 match(VecY); 2466 match(VecZ); 2467 match(VecS); 2468 match(VecD); 2469 2470 format %{ %} 2471 interface(REG_INTER); 2472 %} 2473 2474 // Replaces vec during post-selection cleanup. See above. 2475 operand vecS() %{ 2476 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2477 match(VecS); 2478 2479 format %{ %} 2480 interface(REG_INTER); 2481 %} 2482 2483 // Replaces legVec during post-selection cleanup. See above. 2484 operand legVecS() %{ 2485 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2486 match(VecS); 2487 2488 format %{ %} 2489 interface(REG_INTER); 2490 %} 2491 2492 // Replaces vec during post-selection cleanup. See above. 2493 operand vecD() %{ 2494 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2495 match(VecD); 2496 2497 format %{ %} 2498 interface(REG_INTER); 2499 %} 2500 2501 // Replaces legVec during post-selection cleanup. See above. 2502 operand legVecD() %{ 2503 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2504 match(VecD); 2505 2506 format %{ %} 2507 interface(REG_INTER); 2508 %} 2509 2510 // Replaces vec during post-selection cleanup. See above. 2511 operand vecX() %{ 2512 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2513 match(VecX); 2514 2515 format %{ %} 2516 interface(REG_INTER); 2517 %} 2518 2519 // Replaces legVec during post-selection cleanup. See above. 2520 operand legVecX() %{ 2521 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2522 match(VecX); 2523 2524 format %{ %} 2525 interface(REG_INTER); 2526 %} 2527 2528 // Replaces vec during post-selection cleanup. See above. 2529 operand vecY() %{ 2530 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2531 match(VecY); 2532 2533 format %{ %} 2534 interface(REG_INTER); 2535 %} 2536 2537 // Replaces legVec during post-selection cleanup. See above. 2538 operand legVecY() %{ 2539 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2540 match(VecY); 2541 2542 format %{ %} 2543 interface(REG_INTER); 2544 %} 2545 2546 // Replaces vec during post-selection cleanup. See above. 2547 operand vecZ() %{ 2548 constraint(ALLOC_IN_RC(vectorz_reg)); 2549 match(VecZ); 2550 2551 format %{ %} 2552 interface(REG_INTER); 2553 %} 2554 2555 // Replaces legVec during post-selection cleanup. See above. 2556 operand legVecZ() %{ 2557 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2558 match(VecZ); 2559 2560 format %{ %} 2561 interface(REG_INTER); 2562 %} 2563 2564 // Comparison Code for FP conditional move 2565 operand cmpOp_vcmppd() %{ 2566 match(Bool); 2567 2568 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2569 n->as_Bool()->_test._test != BoolTest::no_overflow); 2570 format %{ "" %} 2571 interface(COND_INTER) %{ 2572 equal (0x0, "eq"); 2573 less (0x1, "lt"); 2574 less_equal (0x2, "le"); 2575 not_equal (0xC, "ne"); 2576 greater_equal(0xD, "ge"); 2577 greater (0xE, "gt"); 2578 //TODO cannot compile (adlc breaks) without two next lines with error: 2579 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2580 // equal' for overflow. 2581 overflow (0x20, "o"); // not really supported by the instruction 2582 no_overflow (0x21, "no"); // not really supported by the instruction 2583 %} 2584 %} 2585 2586 2587 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2588 2589 // ============================================================================ 2590 2591 instruct ShouldNotReachHere() %{ 2592 match(Halt); 2593 format %{ "stop\t# ShouldNotReachHere" %} 2594 ins_encode %{ 2595 if (is_reachable()) { 2596 __ stop(_halt_reason); 2597 } 2598 %} 2599 ins_pipe(pipe_slow); 2600 %} 2601 2602 // =================================EVEX special=============================== 2603 2604 instruct setMask(rRegI dst, rRegI src) %{ 2605 predicate(Matcher::has_predicated_vectors()); 2606 match(Set dst (SetVectMaskI src)); 2607 effect(TEMP dst); 2608 format %{ "setvectmask $dst, $src" %} 2609 ins_encode %{ 2610 __ setvectmask($dst$$Register, $src$$Register); 2611 %} 2612 ins_pipe(pipe_slow); 2613 %} 2614 2615 // ============================================================================ 2616 2617 instruct addF_reg(regF dst, regF src) %{ 2618 predicate((UseSSE>=1) && (UseAVX == 0)); 2619 match(Set dst (AddF dst src)); 2620 2621 format %{ "addss $dst, $src" %} 2622 ins_cost(150); 2623 ins_encode %{ 2624 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2625 %} 2626 ins_pipe(pipe_slow); 2627 %} 2628 2629 instruct addF_mem(regF dst, memory src) %{ 2630 predicate((UseSSE>=1) && (UseAVX == 0)); 2631 match(Set dst (AddF dst (LoadF src))); 2632 2633 format %{ "addss $dst, $src" %} 2634 ins_cost(150); 2635 ins_encode %{ 2636 __ addss($dst$$XMMRegister, $src$$Address); 2637 %} 2638 ins_pipe(pipe_slow); 2639 %} 2640 2641 instruct addF_imm(regF dst, immF con) %{ 2642 predicate((UseSSE>=1) && (UseAVX == 0)); 2643 match(Set dst (AddF dst con)); 2644 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2645 ins_cost(150); 2646 ins_encode %{ 2647 __ addss($dst$$XMMRegister, $constantaddress($con)); 2648 %} 2649 ins_pipe(pipe_slow); 2650 %} 2651 2652 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2653 predicate(UseAVX > 0); 2654 match(Set dst (AddF src1 src2)); 2655 2656 format %{ "vaddss $dst, $src1, $src2" %} 2657 ins_cost(150); 2658 ins_encode %{ 2659 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2660 %} 2661 ins_pipe(pipe_slow); 2662 %} 2663 2664 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2665 predicate(UseAVX > 0); 2666 match(Set dst (AddF src1 (LoadF src2))); 2667 2668 format %{ "vaddss $dst, $src1, $src2" %} 2669 ins_cost(150); 2670 ins_encode %{ 2671 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2672 %} 2673 ins_pipe(pipe_slow); 2674 %} 2675 2676 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2677 predicate(UseAVX > 0); 2678 match(Set dst (AddF src con)); 2679 2680 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2681 ins_cost(150); 2682 ins_encode %{ 2683 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2684 %} 2685 ins_pipe(pipe_slow); 2686 %} 2687 2688 instruct addD_reg(regD dst, regD src) %{ 2689 predicate((UseSSE>=2) && (UseAVX == 0)); 2690 match(Set dst (AddD dst src)); 2691 2692 format %{ "addsd $dst, $src" %} 2693 ins_cost(150); 2694 ins_encode %{ 2695 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2696 %} 2697 ins_pipe(pipe_slow); 2698 %} 2699 2700 instruct addD_mem(regD dst, memory src) %{ 2701 predicate((UseSSE>=2) && (UseAVX == 0)); 2702 match(Set dst (AddD dst (LoadD src))); 2703 2704 format %{ "addsd $dst, $src" %} 2705 ins_cost(150); 2706 ins_encode %{ 2707 __ addsd($dst$$XMMRegister, $src$$Address); 2708 %} 2709 ins_pipe(pipe_slow); 2710 %} 2711 2712 instruct addD_imm(regD dst, immD con) %{ 2713 predicate((UseSSE>=2) && (UseAVX == 0)); 2714 match(Set dst (AddD dst con)); 2715 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2716 ins_cost(150); 2717 ins_encode %{ 2718 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2719 %} 2720 ins_pipe(pipe_slow); 2721 %} 2722 2723 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2724 predicate(UseAVX > 0); 2725 match(Set dst (AddD src1 src2)); 2726 2727 format %{ "vaddsd $dst, $src1, $src2" %} 2728 ins_cost(150); 2729 ins_encode %{ 2730 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2731 %} 2732 ins_pipe(pipe_slow); 2733 %} 2734 2735 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2736 predicate(UseAVX > 0); 2737 match(Set dst (AddD src1 (LoadD src2))); 2738 2739 format %{ "vaddsd $dst, $src1, $src2" %} 2740 ins_cost(150); 2741 ins_encode %{ 2742 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2743 %} 2744 ins_pipe(pipe_slow); 2745 %} 2746 2747 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2748 predicate(UseAVX > 0); 2749 match(Set dst (AddD src con)); 2750 2751 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2752 ins_cost(150); 2753 ins_encode %{ 2754 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2755 %} 2756 ins_pipe(pipe_slow); 2757 %} 2758 2759 instruct subF_reg(regF dst, regF src) %{ 2760 predicate((UseSSE>=1) && (UseAVX == 0)); 2761 match(Set dst (SubF dst src)); 2762 2763 format %{ "subss $dst, $src" %} 2764 ins_cost(150); 2765 ins_encode %{ 2766 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2767 %} 2768 ins_pipe(pipe_slow); 2769 %} 2770 2771 instruct subF_mem(regF dst, memory src) %{ 2772 predicate((UseSSE>=1) && (UseAVX == 0)); 2773 match(Set dst (SubF dst (LoadF src))); 2774 2775 format %{ "subss $dst, $src" %} 2776 ins_cost(150); 2777 ins_encode %{ 2778 __ subss($dst$$XMMRegister, $src$$Address); 2779 %} 2780 ins_pipe(pipe_slow); 2781 %} 2782 2783 instruct subF_imm(regF dst, immF con) %{ 2784 predicate((UseSSE>=1) && (UseAVX == 0)); 2785 match(Set dst (SubF dst con)); 2786 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2787 ins_cost(150); 2788 ins_encode %{ 2789 __ subss($dst$$XMMRegister, $constantaddress($con)); 2790 %} 2791 ins_pipe(pipe_slow); 2792 %} 2793 2794 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2795 predicate(UseAVX > 0); 2796 match(Set dst (SubF src1 src2)); 2797 2798 format %{ "vsubss $dst, $src1, $src2" %} 2799 ins_cost(150); 2800 ins_encode %{ 2801 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2802 %} 2803 ins_pipe(pipe_slow); 2804 %} 2805 2806 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2807 predicate(UseAVX > 0); 2808 match(Set dst (SubF src1 (LoadF src2))); 2809 2810 format %{ "vsubss $dst, $src1, $src2" %} 2811 ins_cost(150); 2812 ins_encode %{ 2813 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2814 %} 2815 ins_pipe(pipe_slow); 2816 %} 2817 2818 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2819 predicate(UseAVX > 0); 2820 match(Set dst (SubF src con)); 2821 2822 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2823 ins_cost(150); 2824 ins_encode %{ 2825 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2826 %} 2827 ins_pipe(pipe_slow); 2828 %} 2829 2830 instruct subD_reg(regD dst, regD src) %{ 2831 predicate((UseSSE>=2) && (UseAVX == 0)); 2832 match(Set dst (SubD dst src)); 2833 2834 format %{ "subsd $dst, $src" %} 2835 ins_cost(150); 2836 ins_encode %{ 2837 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2838 %} 2839 ins_pipe(pipe_slow); 2840 %} 2841 2842 instruct subD_mem(regD dst, memory src) %{ 2843 predicate((UseSSE>=2) && (UseAVX == 0)); 2844 match(Set dst (SubD dst (LoadD src))); 2845 2846 format %{ "subsd $dst, $src" %} 2847 ins_cost(150); 2848 ins_encode %{ 2849 __ subsd($dst$$XMMRegister, $src$$Address); 2850 %} 2851 ins_pipe(pipe_slow); 2852 %} 2853 2854 instruct subD_imm(regD dst, immD con) %{ 2855 predicate((UseSSE>=2) && (UseAVX == 0)); 2856 match(Set dst (SubD dst con)); 2857 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2858 ins_cost(150); 2859 ins_encode %{ 2860 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2861 %} 2862 ins_pipe(pipe_slow); 2863 %} 2864 2865 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2866 predicate(UseAVX > 0); 2867 match(Set dst (SubD src1 src2)); 2868 2869 format %{ "vsubsd $dst, $src1, $src2" %} 2870 ins_cost(150); 2871 ins_encode %{ 2872 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2873 %} 2874 ins_pipe(pipe_slow); 2875 %} 2876 2877 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2878 predicate(UseAVX > 0); 2879 match(Set dst (SubD src1 (LoadD src2))); 2880 2881 format %{ "vsubsd $dst, $src1, $src2" %} 2882 ins_cost(150); 2883 ins_encode %{ 2884 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2885 %} 2886 ins_pipe(pipe_slow); 2887 %} 2888 2889 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2890 predicate(UseAVX > 0); 2891 match(Set dst (SubD src con)); 2892 2893 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2894 ins_cost(150); 2895 ins_encode %{ 2896 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2897 %} 2898 ins_pipe(pipe_slow); 2899 %} 2900 2901 instruct mulF_reg(regF dst, regF src) %{ 2902 predicate((UseSSE>=1) && (UseAVX == 0)); 2903 match(Set dst (MulF dst src)); 2904 2905 format %{ "mulss $dst, $src" %} 2906 ins_cost(150); 2907 ins_encode %{ 2908 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2909 %} 2910 ins_pipe(pipe_slow); 2911 %} 2912 2913 instruct mulF_mem(regF dst, memory src) %{ 2914 predicate((UseSSE>=1) && (UseAVX == 0)); 2915 match(Set dst (MulF dst (LoadF src))); 2916 2917 format %{ "mulss $dst, $src" %} 2918 ins_cost(150); 2919 ins_encode %{ 2920 __ mulss($dst$$XMMRegister, $src$$Address); 2921 %} 2922 ins_pipe(pipe_slow); 2923 %} 2924 2925 instruct mulF_imm(regF dst, immF con) %{ 2926 predicate((UseSSE>=1) && (UseAVX == 0)); 2927 match(Set dst (MulF dst con)); 2928 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2929 ins_cost(150); 2930 ins_encode %{ 2931 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2932 %} 2933 ins_pipe(pipe_slow); 2934 %} 2935 2936 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2937 predicate(UseAVX > 0); 2938 match(Set dst (MulF src1 src2)); 2939 2940 format %{ "vmulss $dst, $src1, $src2" %} 2941 ins_cost(150); 2942 ins_encode %{ 2943 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2944 %} 2945 ins_pipe(pipe_slow); 2946 %} 2947 2948 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2949 predicate(UseAVX > 0); 2950 match(Set dst (MulF src1 (LoadF src2))); 2951 2952 format %{ "vmulss $dst, $src1, $src2" %} 2953 ins_cost(150); 2954 ins_encode %{ 2955 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2956 %} 2957 ins_pipe(pipe_slow); 2958 %} 2959 2960 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2961 predicate(UseAVX > 0); 2962 match(Set dst (MulF src con)); 2963 2964 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2965 ins_cost(150); 2966 ins_encode %{ 2967 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2968 %} 2969 ins_pipe(pipe_slow); 2970 %} 2971 2972 instruct mulD_reg(regD dst, regD src) %{ 2973 predicate((UseSSE>=2) && (UseAVX == 0)); 2974 match(Set dst (MulD dst src)); 2975 2976 format %{ "mulsd $dst, $src" %} 2977 ins_cost(150); 2978 ins_encode %{ 2979 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2980 %} 2981 ins_pipe(pipe_slow); 2982 %} 2983 2984 instruct mulD_mem(regD dst, memory src) %{ 2985 predicate((UseSSE>=2) && (UseAVX == 0)); 2986 match(Set dst (MulD dst (LoadD src))); 2987 2988 format %{ "mulsd $dst, $src" %} 2989 ins_cost(150); 2990 ins_encode %{ 2991 __ mulsd($dst$$XMMRegister, $src$$Address); 2992 %} 2993 ins_pipe(pipe_slow); 2994 %} 2995 2996 instruct mulD_imm(regD dst, immD con) %{ 2997 predicate((UseSSE>=2) && (UseAVX == 0)); 2998 match(Set dst (MulD dst con)); 2999 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3000 ins_cost(150); 3001 ins_encode %{ 3002 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3003 %} 3004 ins_pipe(pipe_slow); 3005 %} 3006 3007 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3008 predicate(UseAVX > 0); 3009 match(Set dst (MulD src1 src2)); 3010 3011 format %{ "vmulsd $dst, $src1, $src2" %} 3012 ins_cost(150); 3013 ins_encode %{ 3014 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3015 %} 3016 ins_pipe(pipe_slow); 3017 %} 3018 3019 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3020 predicate(UseAVX > 0); 3021 match(Set dst (MulD src1 (LoadD src2))); 3022 3023 format %{ "vmulsd $dst, $src1, $src2" %} 3024 ins_cost(150); 3025 ins_encode %{ 3026 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3027 %} 3028 ins_pipe(pipe_slow); 3029 %} 3030 3031 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3032 predicate(UseAVX > 0); 3033 match(Set dst (MulD src con)); 3034 3035 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3036 ins_cost(150); 3037 ins_encode %{ 3038 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3039 %} 3040 ins_pipe(pipe_slow); 3041 %} 3042 3043 instruct divF_reg(regF dst, regF src) %{ 3044 predicate((UseSSE>=1) && (UseAVX == 0)); 3045 match(Set dst (DivF dst src)); 3046 3047 format %{ "divss $dst, $src" %} 3048 ins_cost(150); 3049 ins_encode %{ 3050 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3051 %} 3052 ins_pipe(pipe_slow); 3053 %} 3054 3055 instruct divF_mem(regF dst, memory src) %{ 3056 predicate((UseSSE>=1) && (UseAVX == 0)); 3057 match(Set dst (DivF dst (LoadF src))); 3058 3059 format %{ "divss $dst, $src" %} 3060 ins_cost(150); 3061 ins_encode %{ 3062 __ divss($dst$$XMMRegister, $src$$Address); 3063 %} 3064 ins_pipe(pipe_slow); 3065 %} 3066 3067 instruct divF_imm(regF dst, immF con) %{ 3068 predicate((UseSSE>=1) && (UseAVX == 0)); 3069 match(Set dst (DivF dst con)); 3070 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3071 ins_cost(150); 3072 ins_encode %{ 3073 __ divss($dst$$XMMRegister, $constantaddress($con)); 3074 %} 3075 ins_pipe(pipe_slow); 3076 %} 3077 3078 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3079 predicate(UseAVX > 0); 3080 match(Set dst (DivF src1 src2)); 3081 3082 format %{ "vdivss $dst, $src1, $src2" %} 3083 ins_cost(150); 3084 ins_encode %{ 3085 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3086 %} 3087 ins_pipe(pipe_slow); 3088 %} 3089 3090 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3091 predicate(UseAVX > 0); 3092 match(Set dst (DivF src1 (LoadF src2))); 3093 3094 format %{ "vdivss $dst, $src1, $src2" %} 3095 ins_cost(150); 3096 ins_encode %{ 3097 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3098 %} 3099 ins_pipe(pipe_slow); 3100 %} 3101 3102 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3103 predicate(UseAVX > 0); 3104 match(Set dst (DivF src con)); 3105 3106 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3107 ins_cost(150); 3108 ins_encode %{ 3109 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3110 %} 3111 ins_pipe(pipe_slow); 3112 %} 3113 3114 instruct divD_reg(regD dst, regD src) %{ 3115 predicate((UseSSE>=2) && (UseAVX == 0)); 3116 match(Set dst (DivD dst src)); 3117 3118 format %{ "divsd $dst, $src" %} 3119 ins_cost(150); 3120 ins_encode %{ 3121 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3122 %} 3123 ins_pipe(pipe_slow); 3124 %} 3125 3126 instruct divD_mem(regD dst, memory src) %{ 3127 predicate((UseSSE>=2) && (UseAVX == 0)); 3128 match(Set dst (DivD dst (LoadD src))); 3129 3130 format %{ "divsd $dst, $src" %} 3131 ins_cost(150); 3132 ins_encode %{ 3133 __ divsd($dst$$XMMRegister, $src$$Address); 3134 %} 3135 ins_pipe(pipe_slow); 3136 %} 3137 3138 instruct divD_imm(regD dst, immD con) %{ 3139 predicate((UseSSE>=2) && (UseAVX == 0)); 3140 match(Set dst (DivD dst con)); 3141 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3142 ins_cost(150); 3143 ins_encode %{ 3144 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3145 %} 3146 ins_pipe(pipe_slow); 3147 %} 3148 3149 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3150 predicate(UseAVX > 0); 3151 match(Set dst (DivD src1 src2)); 3152 3153 format %{ "vdivsd $dst, $src1, $src2" %} 3154 ins_cost(150); 3155 ins_encode %{ 3156 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3157 %} 3158 ins_pipe(pipe_slow); 3159 %} 3160 3161 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3162 predicate(UseAVX > 0); 3163 match(Set dst (DivD src1 (LoadD src2))); 3164 3165 format %{ "vdivsd $dst, $src1, $src2" %} 3166 ins_cost(150); 3167 ins_encode %{ 3168 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3169 %} 3170 ins_pipe(pipe_slow); 3171 %} 3172 3173 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3174 predicate(UseAVX > 0); 3175 match(Set dst (DivD src con)); 3176 3177 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3178 ins_cost(150); 3179 ins_encode %{ 3180 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3181 %} 3182 ins_pipe(pipe_slow); 3183 %} 3184 3185 instruct absF_reg(regF dst) %{ 3186 predicate((UseSSE>=1) && (UseAVX == 0)); 3187 match(Set dst (AbsF dst)); 3188 ins_cost(150); 3189 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3190 ins_encode %{ 3191 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3192 %} 3193 ins_pipe(pipe_slow); 3194 %} 3195 3196 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3197 predicate(UseAVX > 0); 3198 match(Set dst (AbsF src)); 3199 ins_cost(150); 3200 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3201 ins_encode %{ 3202 int vlen_enc = Assembler::AVX_128bit; 3203 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3204 ExternalAddress(float_signmask()), vlen_enc); 3205 %} 3206 ins_pipe(pipe_slow); 3207 %} 3208 3209 instruct absD_reg(regD dst) %{ 3210 predicate((UseSSE>=2) && (UseAVX == 0)); 3211 match(Set dst (AbsD dst)); 3212 ins_cost(150); 3213 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3214 "# abs double by sign masking" %} 3215 ins_encode %{ 3216 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3217 %} 3218 ins_pipe(pipe_slow); 3219 %} 3220 3221 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3222 predicate(UseAVX > 0); 3223 match(Set dst (AbsD src)); 3224 ins_cost(150); 3225 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3226 "# abs double by sign masking" %} 3227 ins_encode %{ 3228 int vlen_enc = Assembler::AVX_128bit; 3229 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3230 ExternalAddress(double_signmask()), vlen_enc); 3231 %} 3232 ins_pipe(pipe_slow); 3233 %} 3234 3235 instruct negF_reg(regF dst) %{ 3236 predicate((UseSSE>=1) && (UseAVX == 0)); 3237 match(Set dst (NegF dst)); 3238 ins_cost(150); 3239 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3240 ins_encode %{ 3241 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3242 %} 3243 ins_pipe(pipe_slow); 3244 %} 3245 3246 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3247 predicate(UseAVX > 0); 3248 match(Set dst (NegF src)); 3249 ins_cost(150); 3250 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3251 ins_encode %{ 3252 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3253 ExternalAddress(float_signflip())); 3254 %} 3255 ins_pipe(pipe_slow); 3256 %} 3257 3258 instruct negD_reg(regD dst) %{ 3259 predicate((UseSSE>=2) && (UseAVX == 0)); 3260 match(Set dst (NegD dst)); 3261 ins_cost(150); 3262 format %{ "xorpd $dst, [0x8000000000000000]\t" 3263 "# neg double by sign flipping" %} 3264 ins_encode %{ 3265 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3266 %} 3267 ins_pipe(pipe_slow); 3268 %} 3269 3270 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3271 predicate(UseAVX > 0); 3272 match(Set dst (NegD src)); 3273 ins_cost(150); 3274 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3275 "# neg double by sign flipping" %} 3276 ins_encode %{ 3277 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3278 ExternalAddress(double_signflip())); 3279 %} 3280 ins_pipe(pipe_slow); 3281 %} 3282 3283 instruct sqrtF_reg(regF dst, regF src) %{ 3284 predicate(UseSSE>=1); 3285 match(Set dst (SqrtF src)); 3286 3287 format %{ "sqrtss $dst, $src" %} 3288 ins_cost(150); 3289 ins_encode %{ 3290 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 3291 %} 3292 ins_pipe(pipe_slow); 3293 %} 3294 3295 instruct sqrtF_mem(regF dst, memory src) %{ 3296 predicate(UseSSE>=1); 3297 match(Set dst (SqrtF (LoadF src))); 3298 3299 format %{ "sqrtss $dst, $src" %} 3300 ins_cost(150); 3301 ins_encode %{ 3302 __ sqrtss($dst$$XMMRegister, $src$$Address); 3303 %} 3304 ins_pipe(pipe_slow); 3305 %} 3306 3307 instruct sqrtF_imm(regF dst, immF con) %{ 3308 predicate(UseSSE>=1); 3309 match(Set dst (SqrtF con)); 3310 3311 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3312 ins_cost(150); 3313 ins_encode %{ 3314 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 3315 %} 3316 ins_pipe(pipe_slow); 3317 %} 3318 3319 instruct sqrtD_reg(regD dst, regD src) %{ 3320 predicate(UseSSE>=2); 3321 match(Set dst (SqrtD src)); 3322 3323 format %{ "sqrtsd $dst, $src" %} 3324 ins_cost(150); 3325 ins_encode %{ 3326 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 3327 %} 3328 ins_pipe(pipe_slow); 3329 %} 3330 3331 instruct sqrtD_mem(regD dst, memory src) %{ 3332 predicate(UseSSE>=2); 3333 match(Set dst (SqrtD (LoadD src))); 3334 3335 format %{ "sqrtsd $dst, $src" %} 3336 ins_cost(150); 3337 ins_encode %{ 3338 __ sqrtsd($dst$$XMMRegister, $src$$Address); 3339 %} 3340 ins_pipe(pipe_slow); 3341 %} 3342 3343 instruct sqrtD_imm(regD dst, immD con) %{ 3344 predicate(UseSSE>=2); 3345 match(Set dst (SqrtD con)); 3346 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3347 ins_cost(150); 3348 ins_encode %{ 3349 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 3350 %} 3351 ins_pipe(pipe_slow); 3352 %} 3353 3354 // ---------------------------------------- VectorReinterpret ------------------------------------ 3355 3356 instruct reinterpret(vec dst) %{ 3357 predicate(vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1))); // dst == src 3358 match(Set dst (VectorReinterpret dst)); 3359 ins_cost(125); 3360 format %{ "vector_reinterpret $dst\t!" %} 3361 ins_encode %{ 3362 // empty 3363 %} 3364 ins_pipe( pipe_slow ); 3365 %} 3366 3367 instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ 3368 predicate(UseAVX == 0 && 3369 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3370 match(Set dst (VectorReinterpret src)); 3371 ins_cost(125); 3372 effect(TEMP dst, TEMP scratch); 3373 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3374 ins_encode %{ 3375 assert(vector_length_in_bytes(this) <= 16, "required"); 3376 assert(vector_length_in_bytes(this, $src) <= 8, "required"); 3377 3378 int src_vlen_in_bytes = vector_length_in_bytes(this, $src); 3379 if (src_vlen_in_bytes == 4) { 3380 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3381 } else { 3382 assert(src_vlen_in_bytes == 8, ""); 3383 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 3384 } 3385 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3386 %} 3387 ins_pipe( pipe_slow ); 3388 %} 3389 3390 instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ 3391 predicate(UseAVX > 0 && 3392 (vector_length_in_bytes(n->in(1)) == 4) && // src 3393 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3394 match(Set dst (VectorReinterpret src)); 3395 ins_cost(125); 3396 effect(TEMP scratch); 3397 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3398 ins_encode %{ 3399 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register); 3400 %} 3401 ins_pipe( pipe_slow ); 3402 %} 3403 3404 3405 instruct vreinterpret_expand(legVec dst, vec src) %{ 3406 predicate(UseAVX > 0 && 3407 (vector_length_in_bytes(n->in(1)) > 4) && // src 3408 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3409 match(Set dst (VectorReinterpret src)); 3410 ins_cost(125); 3411 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3412 ins_encode %{ 3413 switch (vector_length_in_bytes(this, $src)) { 3414 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3415 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3416 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3417 default: ShouldNotReachHere(); 3418 } 3419 %} 3420 ins_pipe( pipe_slow ); 3421 %} 3422 3423 instruct reinterpret_shrink(vec dst, legVec src) %{ 3424 predicate(vector_length_in_bytes(n->in(1)) > vector_length_in_bytes(n)); // src > dst 3425 match(Set dst (VectorReinterpret src)); 3426 ins_cost(125); 3427 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3428 ins_encode %{ 3429 switch (vector_length_in_bytes(this)) { 3430 case 4: __ movflt ($dst$$XMMRegister, $src$$XMMRegister); break; 3431 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3432 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3433 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3434 default: ShouldNotReachHere(); 3435 } 3436 %} 3437 ins_pipe( pipe_slow ); 3438 %} 3439 3440 // ---------------------------------------------------------------------------------------------------- 3441 3442 #ifdef _LP64 3443 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3444 match(Set dst (RoundDoubleMode src rmode)); 3445 format %{ "roundsd $dst,$src" %} 3446 ins_cost(150); 3447 ins_encode %{ 3448 assert(UseSSE >= 4, "required"); 3449 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3450 %} 3451 ins_pipe(pipe_slow); 3452 %} 3453 3454 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3455 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3456 format %{ "roundsd $dst,$src" %} 3457 ins_cost(150); 3458 ins_encode %{ 3459 assert(UseSSE >= 4, "required"); 3460 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3461 %} 3462 ins_pipe(pipe_slow); 3463 %} 3464 3465 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 3466 match(Set dst (RoundDoubleMode con rmode)); 3467 effect(TEMP scratch_reg); 3468 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3469 ins_cost(150); 3470 ins_encode %{ 3471 assert(UseSSE >= 4, "required"); 3472 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 3473 %} 3474 ins_pipe(pipe_slow); 3475 %} 3476 3477 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3478 predicate(vector_length(n) < 8); 3479 match(Set dst (RoundDoubleModeV src rmode)); 3480 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3481 ins_encode %{ 3482 assert(UseAVX > 0, "required"); 3483 int vlen_enc = vector_length_encoding(this); 3484 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3485 %} 3486 ins_pipe( pipe_slow ); 3487 %} 3488 3489 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3490 predicate(vector_length(n) == 8); 3491 match(Set dst (RoundDoubleModeV src rmode)); 3492 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3493 ins_encode %{ 3494 assert(UseAVX > 2, "required"); 3495 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3496 %} 3497 ins_pipe( pipe_slow ); 3498 %} 3499 3500 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3501 predicate(vector_length(n) < 8); 3502 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3503 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3504 ins_encode %{ 3505 assert(UseAVX > 0, "required"); 3506 int vlen_enc = vector_length_encoding(this); 3507 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3508 %} 3509 ins_pipe( pipe_slow ); 3510 %} 3511 3512 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3513 predicate(vector_length(n) == 8); 3514 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3515 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3516 ins_encode %{ 3517 assert(UseAVX > 2, "required"); 3518 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3519 %} 3520 ins_pipe( pipe_slow ); 3521 %} 3522 #endif // _LP64 3523 3524 instruct onspinwait() %{ 3525 match(OnSpinWait); 3526 ins_cost(200); 3527 3528 format %{ 3529 $$template 3530 $$emit$$"pause\t! membar_onspinwait" 3531 %} 3532 ins_encode %{ 3533 __ pause(); 3534 %} 3535 ins_pipe(pipe_slow); 3536 %} 3537 3538 // a * b + c 3539 instruct fmaD_reg(regD a, regD b, regD c) %{ 3540 predicate(UseFMA); 3541 match(Set c (FmaD c (Binary a b))); 3542 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3543 ins_cost(150); 3544 ins_encode %{ 3545 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3546 %} 3547 ins_pipe( pipe_slow ); 3548 %} 3549 3550 // a * b + c 3551 instruct fmaF_reg(regF a, regF b, regF c) %{ 3552 predicate(UseFMA); 3553 match(Set c (FmaF c (Binary a b))); 3554 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3555 ins_cost(150); 3556 ins_encode %{ 3557 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3558 %} 3559 ins_pipe( pipe_slow ); 3560 %} 3561 3562 // ====================VECTOR INSTRUCTIONS===================================== 3563 3564 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3565 instruct MoveVec2Leg(legVec dst, vec src) %{ 3566 match(Set dst src); 3567 format %{ "" %} 3568 ins_encode %{ 3569 ShouldNotReachHere(); 3570 %} 3571 ins_pipe( fpu_reg_reg ); 3572 %} 3573 3574 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3575 match(Set dst src); 3576 format %{ "" %} 3577 ins_encode %{ 3578 ShouldNotReachHere(); 3579 %} 3580 ins_pipe( fpu_reg_reg ); 3581 %} 3582 3583 // ============================================================================ 3584 3585 // Load vectors generic operand pattern 3586 instruct loadV(vec dst, memory mem) %{ 3587 match(Set dst (LoadVector mem)); 3588 ins_cost(125); 3589 format %{ "load_vector $dst,$mem" %} 3590 ins_encode %{ 3591 switch (vector_length_in_bytes(this)) { 3592 case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; 3593 case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; 3594 case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; 3595 case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break; 3596 case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break; 3597 default: ShouldNotReachHere(); 3598 } 3599 %} 3600 ins_pipe( pipe_slow ); 3601 %} 3602 3603 // Store vectors generic operand pattern. 3604 instruct storeV(memory mem, vec src) %{ 3605 match(Set mem (StoreVector mem src)); 3606 ins_cost(145); 3607 format %{ "store_vector $mem,$src\n\t" %} 3608 ins_encode %{ 3609 switch (vector_length_in_bytes(this, $src)) { 3610 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3611 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3612 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3613 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3614 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3615 default: ShouldNotReachHere(); 3616 } 3617 %} 3618 ins_pipe( pipe_slow ); 3619 %} 3620 3621 // ---------------------------------------- Gather ------------------------------------ 3622 3623 // Gather INT, LONG, FLOAT, DOUBLE 3624 3625 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3626 predicate(vector_length_in_bytes(n) <= 32); 3627 match(Set dst (LoadVectorGather mem idx)); 3628 effect(TEMP dst, TEMP tmp, TEMP mask); 3629 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3630 ins_encode %{ 3631 assert(UseAVX >= 2, "sanity"); 3632 3633 int vlen_enc = vector_length_encoding(this); 3634 BasicType elem_bt = vector_element_basic_type(this); 3635 3636 assert(vector_length_in_bytes(this) >= 16, "sanity"); 3637 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3638 3639 if (vlen_enc == Assembler::AVX_128bit) { 3640 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3641 } else { 3642 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3643 } 3644 __ lea($tmp$$Register, $mem$$Address); 3645 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3646 %} 3647 ins_pipe( pipe_slow ); 3648 %} 3649 3650 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{ 3651 predicate(vector_length_in_bytes(n) == 64); 3652 match(Set dst (LoadVectorGather mem idx)); 3653 effect(TEMP dst, TEMP tmp); 3654 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %} 3655 ins_encode %{ 3656 assert(UseAVX > 2, "sanity"); 3657 3658 int vlen_enc = vector_length_encoding(this); 3659 BasicType elem_bt = vector_element_basic_type(this); 3660 3661 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3662 3663 KRegister ktmp = k2; 3664 __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3665 __ lea($tmp$$Register, $mem$$Address); 3666 __ evgather(elem_bt, $dst$$XMMRegister, ktmp, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 3667 %} 3668 ins_pipe( pipe_slow ); 3669 %} 3670 3671 // ====================Scatter======================================= 3672 3673 // Scatter INT, LONG, FLOAT, DOUBLE 3674 3675 instruct scatter(memory mem, vec src, vec idx, rRegP tmp) %{ 3676 match(Set mem (StoreVectorScatter mem (Binary src idx))); 3677 effect(TEMP tmp); 3678 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 3679 ins_encode %{ 3680 assert(UseAVX > 2, "sanity"); 3681 3682 int vlen_enc = vector_length_encoding(this, $src); 3683 BasicType elem_bt = vector_element_basic_type(this, $src); 3684 3685 assert(vector_length_in_bytes(this, $src) >= 16, "sanity"); 3686 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3687 3688 KRegister ktmp = k2; 3689 __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3690 __ lea($tmp$$Register, $mem$$Address); 3691 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, ktmp, $src$$XMMRegister, vlen_enc); 3692 %} 3693 ins_pipe( pipe_slow ); 3694 %} 3695 3696 // ====================REPLICATE======================================= 3697 3698 // Replicate byte scalar to be vector 3699 instruct ReplB_reg(vec dst, rRegI src) %{ 3700 match(Set dst (ReplicateB src)); 3701 format %{ "replicateB $dst,$src" %} 3702 ins_encode %{ 3703 uint vlen = vector_length(this); 3704 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3705 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 3706 int vlen_enc = vector_length_encoding(this); 3707 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 3708 } else { 3709 __ movdl($dst$$XMMRegister, $src$$Register); 3710 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3711 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3712 if (vlen >= 16) { 3713 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3714 if (vlen >= 32) { 3715 assert(vlen == 32, "sanity"); 3716 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3717 } 3718 } 3719 } 3720 %} 3721 ins_pipe( pipe_slow ); 3722 %} 3723 3724 instruct ReplB_mem(vec dst, memory mem) %{ 3725 predicate(VM_Version::supports_avx2()); 3726 match(Set dst (ReplicateB (LoadB mem))); 3727 format %{ "replicateB $dst,$mem" %} 3728 ins_encode %{ 3729 int vlen_enc = vector_length_encoding(this); 3730 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 3731 %} 3732 ins_pipe( pipe_slow ); 3733 %} 3734 3735 instruct ReplB_imm(vec dst, immI con) %{ 3736 match(Set dst (ReplicateB con)); 3737 format %{ "replicateB $dst,$con" %} 3738 ins_encode %{ 3739 uint vlen = vector_length(this); 3740 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1)); 3741 if (vlen == 4) { 3742 __ movdl($dst$$XMMRegister, const_addr); 3743 } else { 3744 __ movq($dst$$XMMRegister, const_addr); 3745 if (vlen >= 16) { 3746 if (VM_Version::supports_avx2()) { 3747 int vlen_enc = vector_length_encoding(this); 3748 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3749 } else { 3750 assert(vlen == 16, "sanity"); 3751 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3752 } 3753 } 3754 } 3755 %} 3756 ins_pipe( pipe_slow ); 3757 %} 3758 3759 // Replicate byte scalar zero to be vector 3760 instruct ReplB_zero(vec dst, immI_0 zero) %{ 3761 match(Set dst (ReplicateB zero)); 3762 format %{ "replicateB $dst,$zero" %} 3763 ins_encode %{ 3764 uint vlen = vector_length(this); 3765 if (vlen <= 16) { 3766 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3767 } else { 3768 // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ). 3769 int vlen_enc = vector_length_encoding(this); 3770 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3771 } 3772 %} 3773 ins_pipe( fpu_reg_reg ); 3774 %} 3775 3776 // ====================ReplicateS======================================= 3777 3778 instruct ReplS_reg(vec dst, rRegI src) %{ 3779 match(Set dst (ReplicateS src)); 3780 format %{ "replicateS $dst,$src" %} 3781 ins_encode %{ 3782 uint vlen = vector_length(this); 3783 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3784 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 3785 int vlen_enc = vector_length_encoding(this); 3786 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 3787 } else { 3788 __ movdl($dst$$XMMRegister, $src$$Register); 3789 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3790 if (vlen >= 8) { 3791 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3792 if (vlen >= 16) { 3793 assert(vlen == 16, "sanity"); 3794 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3795 } 3796 } 3797 } 3798 %} 3799 ins_pipe( pipe_slow ); 3800 %} 3801 3802 instruct ReplS_mem(vec dst, memory mem) %{ 3803 predicate(VM_Version::supports_avx2()); 3804 match(Set dst (ReplicateS (LoadS mem))); 3805 format %{ "replicateS $dst,$mem" %} 3806 ins_encode %{ 3807 int vlen_enc = vector_length_encoding(this); 3808 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 3809 %} 3810 ins_pipe( pipe_slow ); 3811 %} 3812 3813 instruct ReplS_imm(vec dst, immI con) %{ 3814 match(Set dst (ReplicateS con)); 3815 format %{ "replicateS $dst,$con" %} 3816 ins_encode %{ 3817 uint vlen = vector_length(this); 3818 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2)); 3819 if (vlen == 2) { 3820 __ movdl($dst$$XMMRegister, const_addr); 3821 } else { 3822 __ movq($dst$$XMMRegister, const_addr); 3823 if (vlen >= 8) { 3824 if (VM_Version::supports_avx2()) { 3825 int vlen_enc = vector_length_encoding(this); 3826 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3827 } else { 3828 assert(vlen == 8, "sanity"); 3829 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3830 } 3831 } 3832 } 3833 %} 3834 ins_pipe( fpu_reg_reg ); 3835 %} 3836 3837 instruct ReplS_zero(vec dst, immI_0 zero) %{ 3838 match(Set dst (ReplicateS zero)); 3839 format %{ "replicateS $dst,$zero" %} 3840 ins_encode %{ 3841 uint vlen = vector_length(this); 3842 if (vlen <= 8) { 3843 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3844 } else { 3845 int vlen_enc = vector_length_encoding(this); 3846 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3847 } 3848 %} 3849 ins_pipe( fpu_reg_reg ); 3850 %} 3851 3852 // ====================ReplicateI======================================= 3853 3854 instruct ReplI_reg(vec dst, rRegI src) %{ 3855 match(Set dst (ReplicateI src)); 3856 format %{ "replicateI $dst,$src" %} 3857 ins_encode %{ 3858 uint vlen = vector_length(this); 3859 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3860 int vlen_enc = vector_length_encoding(this); 3861 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 3862 } else { 3863 __ movdl($dst$$XMMRegister, $src$$Register); 3864 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3865 if (vlen >= 8) { 3866 assert(vlen == 8, "sanity"); 3867 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3868 } 3869 } 3870 %} 3871 ins_pipe( pipe_slow ); 3872 %} 3873 3874 instruct ReplI_mem(vec dst, memory mem) %{ 3875 match(Set dst (ReplicateI (LoadI mem))); 3876 format %{ "replicateI $dst,$mem" %} 3877 ins_encode %{ 3878 uint vlen = vector_length(this); 3879 if (vlen <= 4) { 3880 __ movdl($dst$$XMMRegister, $mem$$Address); 3881 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3882 } else { 3883 assert(VM_Version::supports_avx2(), "sanity"); 3884 int vlen_enc = vector_length_encoding(this); 3885 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 3886 } 3887 %} 3888 ins_pipe( pipe_slow ); 3889 %} 3890 3891 instruct ReplI_imm(vec dst, immI con) %{ 3892 match(Set dst (ReplicateI con)); 3893 format %{ "replicateI $dst,$con" %} 3894 ins_encode %{ 3895 uint vlen = vector_length(this); 3896 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4)); 3897 if (vlen <= 4) { 3898 __ movq($dst$$XMMRegister, const_addr); 3899 if (vlen == 4) { 3900 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3901 } 3902 } else { 3903 assert(VM_Version::supports_avx2(), "sanity"); 3904 int vlen_enc = vector_length_encoding(this); 3905 __ movq($dst$$XMMRegister, const_addr); 3906 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3907 } 3908 %} 3909 ins_pipe( pipe_slow ); 3910 %} 3911 3912 // Replicate integer (4 byte) scalar zero to be vector 3913 instruct ReplI_zero(vec dst, immI_0 zero) %{ 3914 match(Set dst (ReplicateI zero)); 3915 format %{ "replicateI $dst,$zero" %} 3916 ins_encode %{ 3917 uint vlen = vector_length(this); 3918 if (vlen <= 4) { 3919 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3920 } else { 3921 int vlen_enc = vector_length_encoding(this); 3922 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3923 } 3924 %} 3925 ins_pipe( fpu_reg_reg ); 3926 %} 3927 3928 instruct ReplI_M1(vec dst, immI_M1 con) %{ 3929 predicate(UseAVX > 0); 3930 match(Set dst (ReplicateB con)); 3931 match(Set dst (ReplicateS con)); 3932 match(Set dst (ReplicateI con)); 3933 effect(TEMP dst); 3934 format %{ "vallones $dst" %} 3935 ins_encode %{ 3936 int vector_len = vector_length_encoding(this); 3937 __ vallones($dst$$XMMRegister, vector_len); 3938 %} 3939 ins_pipe( pipe_slow ); 3940 %} 3941 3942 // ====================ReplicateL======================================= 3943 3944 #ifdef _LP64 3945 // Replicate long (8 byte) scalar to be vector 3946 instruct ReplL_reg(vec dst, rRegL src) %{ 3947 match(Set dst (ReplicateL src)); 3948 format %{ "replicateL $dst,$src" %} 3949 ins_encode %{ 3950 uint vlen = vector_length(this); 3951 if (vlen == 2) { 3952 __ movdq($dst$$XMMRegister, $src$$Register); 3953 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3954 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3955 int vlen_enc = vector_length_encoding(this); 3956 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 3957 } else { 3958 assert(vlen == 4, "sanity"); 3959 __ movdq($dst$$XMMRegister, $src$$Register); 3960 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3961 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3962 } 3963 %} 3964 ins_pipe( pipe_slow ); 3965 %} 3966 #else // _LP64 3967 // Replicate long (8 byte) scalar to be vector 3968 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 3969 predicate(vector_length(n) <= 4); 3970 match(Set dst (ReplicateL src)); 3971 effect(TEMP dst, USE src, TEMP tmp); 3972 format %{ "replicateL $dst,$src" %} 3973 ins_encode %{ 3974 uint vlen = vector_length(this); 3975 if (vlen == 2) { 3976 __ movdl($dst$$XMMRegister, $src$$Register); 3977 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3978 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3979 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3980 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3981 int vlen_enc = Assembler::AVX_256bit; 3982 __ movdl($dst$$XMMRegister, $src$$Register); 3983 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3984 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3985 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3986 } else { 3987 __ movdl($dst$$XMMRegister, $src$$Register); 3988 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3989 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3990 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3991 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3992 } 3993 %} 3994 ins_pipe( pipe_slow ); 3995 %} 3996 3997 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 3998 predicate(vector_length(n) == 8); 3999 match(Set dst (ReplicateL src)); 4000 effect(TEMP dst, USE src, TEMP tmp); 4001 format %{ "replicateL $dst,$src" %} 4002 ins_encode %{ 4003 if (VM_Version::supports_avx512vl()) { 4004 __ movdl($dst$$XMMRegister, $src$$Register); 4005 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4006 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4007 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4008 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4009 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4010 } else { 4011 int vlen_enc = Assembler::AVX_512bit; 4012 __ movdl($dst$$XMMRegister, $src$$Register); 4013 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4014 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4015 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4016 } 4017 %} 4018 ins_pipe( pipe_slow ); 4019 %} 4020 #endif // _LP64 4021 4022 instruct ReplL_mem(vec dst, memory mem) %{ 4023 match(Set dst (ReplicateL (LoadL mem))); 4024 format %{ "replicateL $dst,$mem" %} 4025 ins_encode %{ 4026 uint vlen = vector_length(this); 4027 if (vlen == 2) { 4028 __ movq($dst$$XMMRegister, $mem$$Address); 4029 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4030 } else { 4031 assert(VM_Version::supports_avx2(), "sanity"); 4032 int vlen_enc = vector_length_encoding(this); 4033 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4034 } 4035 %} 4036 ins_pipe( pipe_slow ); 4037 %} 4038 4039 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4040 instruct ReplL_imm(vec dst, immL con) %{ 4041 match(Set dst (ReplicateL con)); 4042 format %{ "replicateL $dst,$con" %} 4043 ins_encode %{ 4044 uint vlen = vector_length(this); 4045 InternalAddress const_addr = $constantaddress($con); 4046 if (vlen == 2) { 4047 __ movq($dst$$XMMRegister, const_addr); 4048 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4049 } else { 4050 assert(VM_Version::supports_avx2(), "sanity"); 4051 int vlen_enc = vector_length_encoding(this); 4052 __ movq($dst$$XMMRegister, const_addr); 4053 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4054 } 4055 %} 4056 ins_pipe( pipe_slow ); 4057 %} 4058 4059 instruct ReplL_zero(vec dst, immL0 zero) %{ 4060 match(Set dst (ReplicateL zero)); 4061 format %{ "replicateL $dst,$zero" %} 4062 ins_encode %{ 4063 int vlen = vector_length(this); 4064 if (vlen == 2) { 4065 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4066 } else { 4067 int vlen_enc = vector_length_encoding(this); 4068 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4069 } 4070 %} 4071 ins_pipe( fpu_reg_reg ); 4072 %} 4073 4074 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4075 predicate(UseAVX > 0); 4076 match(Set dst (ReplicateL con)); 4077 effect(TEMP dst); 4078 format %{ "vallones $dst" %} 4079 ins_encode %{ 4080 int vector_len = vector_length_encoding(this); 4081 __ vallones($dst$$XMMRegister, vector_len); 4082 %} 4083 ins_pipe( pipe_slow ); 4084 %} 4085 4086 // ====================ReplicateF======================================= 4087 4088 instruct ReplF_reg(vec dst, vlRegF src) %{ 4089 match(Set dst (ReplicateF src)); 4090 format %{ "replicateF $dst,$src" %} 4091 ins_encode %{ 4092 uint vlen = vector_length(this); 4093 if (vlen <= 4) { 4094 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4095 } else if (VM_Version::supports_avx2()) { 4096 int vlen_enc = vector_length_encoding(this); 4097 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4098 } else { 4099 assert(vlen == 8, "sanity"); 4100 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4101 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4102 } 4103 %} 4104 ins_pipe( pipe_slow ); 4105 %} 4106 4107 instruct ReplF_mem(vec dst, memory mem) %{ 4108 match(Set dst (ReplicateF (LoadF mem))); 4109 format %{ "replicateF $dst,$mem" %} 4110 ins_encode %{ 4111 uint vlen = vector_length(this); 4112 if (vlen <= 4) { 4113 __ movdl($dst$$XMMRegister, $mem$$Address); 4114 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4115 } else { 4116 assert(VM_Version::supports_avx(), "sanity"); 4117 int vlen_enc = vector_length_encoding(this); 4118 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4119 } 4120 %} 4121 ins_pipe( pipe_slow ); 4122 %} 4123 4124 instruct ReplF_zero(vec dst, immF0 zero) %{ 4125 match(Set dst (ReplicateF zero)); 4126 format %{ "replicateF $dst,$zero" %} 4127 ins_encode %{ 4128 uint vlen = vector_length(this); 4129 if (vlen <= 4) { 4130 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4131 } else { 4132 int vlen_enc = vector_length_encoding(this); 4133 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4134 } 4135 %} 4136 ins_pipe( fpu_reg_reg ); 4137 %} 4138 4139 // ====================ReplicateD======================================= 4140 4141 // Replicate double (8 bytes) scalar to be vector 4142 instruct ReplD_reg(vec dst, vlRegD src) %{ 4143 match(Set dst (ReplicateD src)); 4144 format %{ "replicateD $dst,$src" %} 4145 ins_encode %{ 4146 uint vlen = vector_length(this); 4147 if (vlen == 2) { 4148 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4149 } else if (VM_Version::supports_avx2()) { 4150 int vlen_enc = vector_length_encoding(this); 4151 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4152 } else { 4153 assert(vlen == 4, "sanity"); 4154 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4155 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4156 } 4157 %} 4158 ins_pipe( pipe_slow ); 4159 %} 4160 4161 instruct ReplD_mem(vec dst, memory mem) %{ 4162 match(Set dst (ReplicateD (LoadD mem))); 4163 format %{ "replicateD $dst,$mem" %} 4164 ins_encode %{ 4165 uint vlen = vector_length(this); 4166 if (vlen == 2) { 4167 __ movq($dst$$XMMRegister, $mem$$Address); 4168 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 4169 } else { 4170 assert(VM_Version::supports_avx(), "sanity"); 4171 int vlen_enc = vector_length_encoding(this); 4172 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4173 } 4174 %} 4175 ins_pipe( pipe_slow ); 4176 %} 4177 4178 instruct ReplD_zero(vec dst, immD0 zero) %{ 4179 match(Set dst (ReplicateD zero)); 4180 format %{ "replicateD $dst,$zero" %} 4181 ins_encode %{ 4182 uint vlen = vector_length(this); 4183 if (vlen == 2) { 4184 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 4185 } else { 4186 int vlen_enc = vector_length_encoding(this); 4187 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4188 } 4189 %} 4190 ins_pipe( fpu_reg_reg ); 4191 %} 4192 4193 // ====================VECTOR INSERT======================================= 4194 4195 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4196 predicate(vector_length_in_bytes(n) < 32); 4197 match(Set dst (VectorInsert (Binary dst val) idx)); 4198 format %{ "vector_insert $dst,$val,$idx" %} 4199 ins_encode %{ 4200 assert(UseSSE >= 4, "required"); 4201 assert(vector_length_in_bytes(this) >= 8, "required"); 4202 4203 BasicType elem_bt = vector_element_basic_type(this); 4204 4205 assert(is_integral_type(elem_bt), ""); 4206 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4207 4208 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4209 %} 4210 ins_pipe( pipe_slow ); 4211 %} 4212 4213 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4214 predicate(vector_length_in_bytes(n) == 32); 4215 match(Set dst (VectorInsert (Binary src val) idx)); 4216 effect(TEMP vtmp); 4217 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4218 ins_encode %{ 4219 int vlen_enc = Assembler::AVX_256bit; 4220 BasicType elem_bt = vector_element_basic_type(this); 4221 int elem_per_lane = 16/type2aelembytes(elem_bt); 4222 int log2epr = log2(elem_per_lane); 4223 4224 assert(is_integral_type(elem_bt), "sanity"); 4225 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4226 4227 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4228 uint y_idx = ($idx$$constant >> log2epr) & 1; 4229 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4230 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4231 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4232 %} 4233 ins_pipe( pipe_slow ); 4234 %} 4235 4236 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4237 predicate(vector_length_in_bytes(n) == 64); 4238 match(Set dst (VectorInsert (Binary src val) idx)); 4239 effect(TEMP vtmp); 4240 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4241 ins_encode %{ 4242 assert(UseAVX > 2, "sanity"); 4243 4244 BasicType elem_bt = vector_element_basic_type(this); 4245 int elem_per_lane = 16/type2aelembytes(elem_bt); 4246 int log2epr = log2(elem_per_lane); 4247 4248 assert(is_integral_type(elem_bt), ""); 4249 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4250 4251 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4252 uint y_idx = ($idx$$constant >> log2epr) & 3; 4253 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4254 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4255 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4256 %} 4257 ins_pipe( pipe_slow ); 4258 %} 4259 4260 #ifdef _LP64 4261 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4262 predicate(vector_length(n) == 2); 4263 match(Set dst (VectorInsert (Binary dst val) idx)); 4264 format %{ "vector_insert $dst,$val,$idx" %} 4265 ins_encode %{ 4266 assert(UseSSE >= 4, "required"); 4267 assert(vector_element_basic_type(this) == T_LONG, ""); 4268 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4269 4270 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4271 %} 4272 ins_pipe( pipe_slow ); 4273 %} 4274 4275 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4276 predicate(vector_length(n) == 4); 4277 match(Set dst (VectorInsert (Binary src val) idx)); 4278 effect(TEMP vtmp); 4279 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4280 ins_encode %{ 4281 assert(vector_element_basic_type(this) == T_LONG, ""); 4282 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4283 4284 uint x_idx = $idx$$constant & right_n_bits(1); 4285 uint y_idx = ($idx$$constant >> 1) & 1; 4286 int vlen_enc = Assembler::AVX_256bit; 4287 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4288 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4289 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4290 %} 4291 ins_pipe( pipe_slow ); 4292 %} 4293 4294 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4295 predicate(vector_length(n) == 8); 4296 match(Set dst (VectorInsert (Binary src val) idx)); 4297 effect(TEMP vtmp); 4298 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4299 ins_encode %{ 4300 assert(vector_element_basic_type(this) == T_LONG, "sanity"); 4301 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4302 4303 uint x_idx = $idx$$constant & right_n_bits(1); 4304 uint y_idx = ($idx$$constant >> 1) & 3; 4305 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4306 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4307 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4308 %} 4309 ins_pipe( pipe_slow ); 4310 %} 4311 #endif 4312 4313 instruct insertF(vec dst, regF val, immU8 idx) %{ 4314 predicate(vector_length(n) < 8); 4315 match(Set dst (VectorInsert (Binary dst val) idx)); 4316 format %{ "vector_insert $dst,$val,$idx" %} 4317 ins_encode %{ 4318 assert(UseSSE >= 4, "sanity"); 4319 4320 assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); 4321 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4322 4323 __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 4324 %} 4325 ins_pipe( pipe_slow ); 4326 %} 4327 4328 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4329 predicate(vector_length(n) >= 8); 4330 match(Set dst (VectorInsert (Binary src val) idx)); 4331 effect(TEMP vtmp); 4332 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4333 ins_encode %{ 4334 assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); 4335 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4336 4337 int vlen = vector_length(this); 4338 uint x_idx = $idx$$constant & right_n_bits(2); 4339 if (vlen == 8) { 4340 uint y_idx = ($idx$$constant >> 2) & 1; 4341 int vlen_enc = Assembler::AVX_256bit; 4342 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4343 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx); 4344 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4345 } else { 4346 assert(vlen == 16, "sanity"); 4347 uint y_idx = ($idx$$constant >> 2) & 3; 4348 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4349 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx); 4350 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4351 } 4352 %} 4353 ins_pipe( pipe_slow ); 4354 %} 4355 4356 #ifdef _LP64 4357 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4358 predicate(vector_length(n) == 2); 4359 match(Set dst (VectorInsert (Binary dst val) idx)); 4360 effect(TEMP tmp); 4361 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4362 ins_encode %{ 4363 assert(UseSSE >= 4, "sanity"); 4364 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4365 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4366 4367 __ movq($tmp$$Register, $val$$XMMRegister); 4368 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4369 %} 4370 ins_pipe( pipe_slow ); 4371 %} 4372 4373 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4374 predicate(vector_length(n) == 4); 4375 match(Set dst (VectorInsert (Binary src val) idx)); 4376 effect(TEMP vtmp, TEMP tmp); 4377 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4378 ins_encode %{ 4379 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4380 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4381 4382 uint x_idx = $idx$$constant & right_n_bits(1); 4383 uint y_idx = ($idx$$constant >> 1) & 1; 4384 int vlen_enc = Assembler::AVX_256bit; 4385 __ movq($tmp$$Register, $val$$XMMRegister); 4386 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4387 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4388 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4389 %} 4390 ins_pipe( pipe_slow ); 4391 %} 4392 4393 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4394 predicate(vector_length(n) == 8); 4395 match(Set dst (VectorInsert (Binary src val) idx)); 4396 effect(TEMP tmp, TEMP vtmp); 4397 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4398 ins_encode %{ 4399 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4400 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4401 4402 uint x_idx = $idx$$constant & right_n_bits(1); 4403 uint y_idx = ($idx$$constant >> 1) & 3; 4404 __ movq($tmp$$Register, $val$$XMMRegister); 4405 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4406 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4407 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4408 %} 4409 ins_pipe( pipe_slow ); 4410 %} 4411 #endif 4412 4413 // ====================REDUCTION ARITHMETIC======================================= 4414 4415 // =======================Int Reduction========================================== 4416 4417 instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4418 predicate(vector_element_basic_type(n->in(2)) == T_INT && 4419 vector_length(n->in(2)) < 16); // src2 4420 match(Set dst (AddReductionVI src1 src2)); 4421 match(Set dst (MulReductionVI src1 src2)); 4422 match(Set dst (AndReductionV src1 src2)); 4423 match(Set dst ( OrReductionV src1 src2)); 4424 match(Set dst (XorReductionV src1 src2)); 4425 match(Set dst (MinReductionV src1 src2)); 4426 match(Set dst (MaxReductionV src1 src2)); 4427 effect(TEMP vtmp1, TEMP vtmp2); 4428 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4429 ins_encode %{ 4430 int opcode = this->ideal_Opcode(); 4431 int vlen = vector_length(this, $src2); 4432 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4433 %} 4434 ins_pipe( pipe_slow ); 4435 %} 4436 4437 instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4438 predicate(vector_element_basic_type(n->in(2)) == T_INT && 4439 vector_length(n->in(2)) == 16); // src2 4440 match(Set dst (AddReductionVI src1 src2)); 4441 match(Set dst (MulReductionVI src1 src2)); 4442 match(Set dst (AndReductionV src1 src2)); 4443 match(Set dst ( OrReductionV src1 src2)); 4444 match(Set dst (XorReductionV src1 src2)); 4445 match(Set dst (MinReductionV src1 src2)); 4446 match(Set dst (MaxReductionV src1 src2)); 4447 effect(TEMP vtmp1, TEMP vtmp2); 4448 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4449 ins_encode %{ 4450 int opcode = this->ideal_Opcode(); 4451 int vlen = vector_length(this, $src2); 4452 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4453 %} 4454 ins_pipe( pipe_slow ); 4455 %} 4456 4457 // =======================Long Reduction========================================== 4458 4459 #ifdef _LP64 4460 instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4461 predicate(vector_element_basic_type(n->in(2)) == T_LONG && 4462 vector_length(n->in(2)) < 8); // src2 4463 match(Set dst (AddReductionVL src1 src2)); 4464 match(Set dst (MulReductionVL src1 src2)); 4465 match(Set dst (AndReductionV src1 src2)); 4466 match(Set dst ( OrReductionV src1 src2)); 4467 match(Set dst (XorReductionV src1 src2)); 4468 match(Set dst (MinReductionV src1 src2)); 4469 match(Set dst (MaxReductionV src1 src2)); 4470 effect(TEMP vtmp1, TEMP vtmp2); 4471 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4472 ins_encode %{ 4473 int opcode = this->ideal_Opcode(); 4474 int vlen = vector_length(this, $src2); 4475 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4476 %} 4477 ins_pipe( pipe_slow ); 4478 %} 4479 4480 instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4481 predicate(vector_element_basic_type(n->in(2)) == T_LONG && 4482 vector_length(n->in(2)) == 8); // src2 4483 match(Set dst (AddReductionVL src1 src2)); 4484 match(Set dst (MulReductionVL src1 src2)); 4485 match(Set dst (AndReductionV src1 src2)); 4486 match(Set dst ( OrReductionV src1 src2)); 4487 match(Set dst (XorReductionV src1 src2)); 4488 match(Set dst (MinReductionV src1 src2)); 4489 match(Set dst (MaxReductionV src1 src2)); 4490 effect(TEMP vtmp1, TEMP vtmp2); 4491 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4492 ins_encode %{ 4493 int opcode = this->ideal_Opcode(); 4494 int vlen = vector_length(this, $src2); 4495 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4496 %} 4497 ins_pipe( pipe_slow ); 4498 %} 4499 #endif // _LP64 4500 4501 // =======================Float Reduction========================================== 4502 4503 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4504 predicate(vector_length(n->in(2)) <= 4); // src 4505 match(Set dst (AddReductionVF dst src)); 4506 match(Set dst (MulReductionVF dst src)); 4507 effect(TEMP dst, TEMP vtmp); 4508 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4509 ins_encode %{ 4510 int opcode = this->ideal_Opcode(); 4511 int vlen = vector_length(this, $src); 4512 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4513 %} 4514 ins_pipe( pipe_slow ); 4515 %} 4516 4517 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4518 predicate(vector_length(n->in(2)) == 8); // src 4519 match(Set dst (AddReductionVF dst src)); 4520 match(Set dst (MulReductionVF dst src)); 4521 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4522 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4523 ins_encode %{ 4524 int opcode = this->ideal_Opcode(); 4525 int vlen = vector_length(this, $src); 4526 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4527 %} 4528 ins_pipe( pipe_slow ); 4529 %} 4530 4531 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4532 predicate(vector_length(n->in(2)) == 16); // src 4533 match(Set dst (AddReductionVF dst src)); 4534 match(Set dst (MulReductionVF dst src)); 4535 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4536 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4537 ins_encode %{ 4538 int opcode = this->ideal_Opcode(); 4539 int vlen = vector_length(this, $src); 4540 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4541 %} 4542 ins_pipe( pipe_slow ); 4543 %} 4544 4545 // =======================Double Reduction========================================== 4546 4547 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4548 predicate(vector_length(n->in(2)) == 2); // src 4549 match(Set dst (AddReductionVD dst src)); 4550 match(Set dst (MulReductionVD dst src)); 4551 effect(TEMP dst, TEMP vtmp); 4552 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4553 ins_encode %{ 4554 int opcode = this->ideal_Opcode(); 4555 int vlen = vector_length(this, $src); 4556 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4557 %} 4558 ins_pipe( pipe_slow ); 4559 %} 4560 4561 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4562 predicate(vector_length(n->in(2)) == 4); // src 4563 match(Set dst (AddReductionVD dst src)); 4564 match(Set dst (MulReductionVD dst src)); 4565 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4566 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4567 ins_encode %{ 4568 int opcode = this->ideal_Opcode(); 4569 int vlen = vector_length(this, $src); 4570 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4571 %} 4572 ins_pipe( pipe_slow ); 4573 %} 4574 4575 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4576 predicate(vector_length(n->in(2)) == 8); // src 4577 match(Set dst (AddReductionVD dst src)); 4578 match(Set dst (MulReductionVD dst src)); 4579 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4580 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4581 ins_encode %{ 4582 int opcode = this->ideal_Opcode(); 4583 int vlen = vector_length(this, $src); 4584 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4585 %} 4586 ins_pipe( pipe_slow ); 4587 %} 4588 4589 // =======================Byte Reduction========================================== 4590 4591 #ifdef _LP64 4592 instruct reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4593 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4594 vector_length(n->in(2)) <= 32); // src2 4595 match(Set dst (AddReductionVI src1 src2)); 4596 match(Set dst (AndReductionV src1 src2)); 4597 match(Set dst ( OrReductionV src1 src2)); 4598 match(Set dst (XorReductionV src1 src2)); 4599 match(Set dst (MinReductionV src1 src2)); 4600 match(Set dst (MaxReductionV src1 src2)); 4601 effect(TEMP vtmp1, TEMP vtmp2); 4602 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4603 ins_encode %{ 4604 int opcode = this->ideal_Opcode(); 4605 int vlen = vector_length(this, $src2); 4606 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4607 %} 4608 ins_pipe( pipe_slow ); 4609 %} 4610 4611 instruct reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4612 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4613 vector_length(n->in(2)) == 64); // src2 4614 match(Set dst (AddReductionVI src1 src2)); 4615 match(Set dst (AndReductionV src1 src2)); 4616 match(Set dst ( OrReductionV src1 src2)); 4617 match(Set dst (XorReductionV src1 src2)); 4618 match(Set dst (MinReductionV src1 src2)); 4619 match(Set dst (MaxReductionV src1 src2)); 4620 effect(TEMP vtmp1, TEMP vtmp2); 4621 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4622 ins_encode %{ 4623 int opcode = this->ideal_Opcode(); 4624 int vlen = vector_length(this, $src2); 4625 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4626 %} 4627 ins_pipe( pipe_slow ); 4628 %} 4629 #endif 4630 4631 // =======================Short Reduction========================================== 4632 4633 instruct reductionS(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4634 predicate(vector_element_basic_type(n->in(2)) == T_SHORT && 4635 vector_length(n->in(2)) <= 16); // src2 4636 match(Set dst (AddReductionVI src1 src2)); 4637 match(Set dst (MulReductionVI src1 src2)); 4638 match(Set dst (AndReductionV src1 src2)); 4639 match(Set dst ( OrReductionV src1 src2)); 4640 match(Set dst (XorReductionV src1 src2)); 4641 match(Set dst (MinReductionV src1 src2)); 4642 match(Set dst (MaxReductionV src1 src2)); 4643 effect(TEMP vtmp1, TEMP vtmp2); 4644 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4645 ins_encode %{ 4646 int opcode = this->ideal_Opcode(); 4647 int vlen = vector_length(this, $src2); 4648 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4649 %} 4650 ins_pipe( pipe_slow ); 4651 %} 4652 4653 instruct reduction32S(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4654 predicate(vector_element_basic_type(n->in(2)) == T_SHORT && 4655 vector_length(n->in(2)) == 32); // src2 4656 match(Set dst (AddReductionVI src1 src2)); 4657 match(Set dst (MulReductionVI src1 src2)); 4658 match(Set dst (AndReductionV src1 src2)); 4659 match(Set dst ( OrReductionV src1 src2)); 4660 match(Set dst (XorReductionV src1 src2)); 4661 match(Set dst (MinReductionV src1 src2)); 4662 match(Set dst (MaxReductionV src1 src2)); 4663 effect(TEMP vtmp1, TEMP vtmp2); 4664 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4665 ins_encode %{ 4666 int opcode = this->ideal_Opcode(); 4667 int vlen = vector_length(this, $src2); 4668 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4669 %} 4670 ins_pipe( pipe_slow ); 4671 %} 4672 4673 // =======================Mul Reduction========================================== 4674 4675 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4676 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4677 vector_length(n->in(2)) <= 32); // src2 4678 match(Set dst (MulReductionVI src1 src2)); 4679 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4680 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4681 ins_encode %{ 4682 int opcode = this->ideal_Opcode(); 4683 int vlen = vector_length(this, $src2); 4684 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4690 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4691 vector_length(n->in(2)) == 64); // src2 4692 match(Set dst (MulReductionVI src1 src2)); 4693 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4694 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4695 ins_encode %{ 4696 int opcode = this->ideal_Opcode(); 4697 int vlen = vector_length(this, $src2); 4698 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4699 %} 4700 ins_pipe( pipe_slow ); 4701 %} 4702 4703 //--------------------Min/Max Float Reduction -------------------- 4704 // Float Min Reduction 4705 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 4706 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4707 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4708 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4709 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4710 vector_length(n->in(2)) == 2); 4711 match(Set dst (MinReductionV src1 src2)); 4712 match(Set dst (MaxReductionV src1 src2)); 4713 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4714 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4715 ins_encode %{ 4716 assert(UseAVX > 0, "sanity"); 4717 4718 int opcode = this->ideal_Opcode(); 4719 int vlen = vector_length(this, $src2); 4720 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4721 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4722 %} 4723 ins_pipe( pipe_slow ); 4724 %} 4725 4726 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 4727 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4728 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4729 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4730 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4731 vector_length(n->in(2)) >= 4); 4732 match(Set dst (MinReductionV src1 src2)); 4733 match(Set dst (MaxReductionV src1 src2)); 4734 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4735 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4736 ins_encode %{ 4737 assert(UseAVX > 0, "sanity"); 4738 4739 int opcode = this->ideal_Opcode(); 4740 int vlen = vector_length(this, $src2); 4741 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4742 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4743 %} 4744 ins_pipe( pipe_slow ); 4745 %} 4746 4747 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 4748 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4749 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4750 vector_length(n->in(2)) == 2); 4751 match(Set dst (MinReductionV dst src)); 4752 match(Set dst (MaxReductionV dst src)); 4753 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4754 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4755 ins_encode %{ 4756 assert(UseAVX > 0, "sanity"); 4757 4758 int opcode = this->ideal_Opcode(); 4759 int vlen = vector_length(this, $src); 4760 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 4761 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4762 %} 4763 ins_pipe( pipe_slow ); 4764 %} 4765 4766 4767 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 4768 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4769 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4770 vector_length(n->in(2)) >= 4); 4771 match(Set dst (MinReductionV dst src)); 4772 match(Set dst (MaxReductionV dst src)); 4773 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4774 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4775 ins_encode %{ 4776 assert(UseAVX > 0, "sanity"); 4777 4778 int opcode = this->ideal_Opcode(); 4779 int vlen = vector_length(this, $src); 4780 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 4781 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 4787 //--------------------Min Double Reduction -------------------- 4788 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 4789 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 4790 rFlagsReg cr) %{ 4791 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4792 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 4793 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 4794 vector_length(n->in(2)) == 2); 4795 match(Set dst (MinReductionV src1 src2)); 4796 match(Set dst (MaxReductionV src1 src2)); 4797 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 4798 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 4799 ins_encode %{ 4800 assert(UseAVX > 0, "sanity"); 4801 4802 int opcode = this->ideal_Opcode(); 4803 int vlen = vector_length(this, $src2); 4804 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 4805 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 4806 %} 4807 ins_pipe( pipe_slow ); 4808 %} 4809 4810 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 4811 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 4812 rFlagsReg cr) %{ 4813 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4814 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 4815 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 4816 vector_length(n->in(2)) >= 4); 4817 match(Set dst (MinReductionV src1 src2)); 4818 match(Set dst (MaxReductionV src1 src2)); 4819 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 4820 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 4821 ins_encode %{ 4822 assert(UseAVX > 0, "sanity"); 4823 4824 int opcode = this->ideal_Opcode(); 4825 int vlen = vector_length(this, $src2); 4826 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 4827 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 4833 instruct minmax_reduction2D_av(legRegD dst, legVec src, 4834 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 4835 rFlagsReg cr) %{ 4836 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4837 vector_length(n->in(2)) == 2); 4838 match(Set dst (MinReductionV dst src)); 4839 match(Set dst (MaxReductionV dst src)); 4840 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 4841 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 4842 ins_encode %{ 4843 assert(UseAVX > 0, "sanity"); 4844 4845 int opcode = this->ideal_Opcode(); 4846 int vlen = vector_length(this, $src); 4847 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 4848 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 4849 %} 4850 ins_pipe( pipe_slow ); 4851 %} 4852 4853 instruct minmax_reductionD_av(legRegD dst, legVec src, 4854 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 4855 rFlagsReg cr) %{ 4856 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4857 vector_length(n->in(2)) >= 4); 4858 match(Set dst (MinReductionV dst src)); 4859 match(Set dst (MaxReductionV dst src)); 4860 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 4861 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 4862 ins_encode %{ 4863 assert(UseAVX > 0, "sanity"); 4864 4865 int opcode = this->ideal_Opcode(); 4866 int vlen = vector_length(this, $src); 4867 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 4868 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 4869 %} 4870 ins_pipe( pipe_slow ); 4871 %} 4872 4873 // ====================VECTOR ARITHMETIC======================================= 4874 4875 // --------------------------------- ADD -------------------------------------- 4876 4877 // Bytes vector add 4878 instruct vaddB(vec dst, vec src) %{ 4879 predicate(UseAVX == 0); 4880 match(Set dst (AddVB dst src)); 4881 format %{ "paddb $dst,$src\t! add packedB" %} 4882 ins_encode %{ 4883 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4884 %} 4885 ins_pipe( pipe_slow ); 4886 %} 4887 4888 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 4889 predicate(UseAVX > 0); 4890 match(Set dst (AddVB src1 src2)); 4891 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 4892 ins_encode %{ 4893 int vlen_enc = vector_length_encoding(this); 4894 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4895 %} 4896 ins_pipe( pipe_slow ); 4897 %} 4898 4899 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 4900 predicate(UseAVX > 0); 4901 match(Set dst (AddVB src (LoadVector mem))); 4902 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 4903 ins_encode %{ 4904 int vlen_enc = vector_length_encoding(this); 4905 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4906 %} 4907 ins_pipe( pipe_slow ); 4908 %} 4909 4910 // Shorts/Chars vector add 4911 instruct vaddS(vec dst, vec src) %{ 4912 predicate(UseAVX == 0); 4913 match(Set dst (AddVS dst src)); 4914 format %{ "paddw $dst,$src\t! add packedS" %} 4915 ins_encode %{ 4916 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 4917 %} 4918 ins_pipe( pipe_slow ); 4919 %} 4920 4921 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 4922 predicate(UseAVX > 0); 4923 match(Set dst (AddVS src1 src2)); 4924 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 4925 ins_encode %{ 4926 int vlen_enc = vector_length_encoding(this); 4927 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4928 %} 4929 ins_pipe( pipe_slow ); 4930 %} 4931 4932 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 4933 predicate(UseAVX > 0); 4934 match(Set dst (AddVS src (LoadVector mem))); 4935 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 4936 ins_encode %{ 4937 int vlen_enc = vector_length_encoding(this); 4938 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4939 %} 4940 ins_pipe( pipe_slow ); 4941 %} 4942 4943 // Integers vector add 4944 instruct vaddI(vec dst, vec src) %{ 4945 predicate(UseAVX == 0); 4946 match(Set dst (AddVI dst src)); 4947 format %{ "paddd $dst,$src\t! add packedI" %} 4948 ins_encode %{ 4949 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 4950 %} 4951 ins_pipe( pipe_slow ); 4952 %} 4953 4954 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 4955 predicate(UseAVX > 0); 4956 match(Set dst (AddVI src1 src2)); 4957 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 4958 ins_encode %{ 4959 int vlen_enc = vector_length_encoding(this); 4960 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4961 %} 4962 ins_pipe( pipe_slow ); 4963 %} 4964 4965 4966 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 4967 predicate(UseAVX > 0); 4968 match(Set dst (AddVI src (LoadVector mem))); 4969 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 4970 ins_encode %{ 4971 int vlen_enc = vector_length_encoding(this); 4972 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4973 %} 4974 ins_pipe( pipe_slow ); 4975 %} 4976 4977 // Longs vector add 4978 instruct vaddL(vec dst, vec src) %{ 4979 predicate(UseAVX == 0); 4980 match(Set dst (AddVL dst src)); 4981 format %{ "paddq $dst,$src\t! add packedL" %} 4982 ins_encode %{ 4983 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 4984 %} 4985 ins_pipe( pipe_slow ); 4986 %} 4987 4988 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 4989 predicate(UseAVX > 0); 4990 match(Set dst (AddVL src1 src2)); 4991 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 4992 ins_encode %{ 4993 int vlen_enc = vector_length_encoding(this); 4994 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4995 %} 4996 ins_pipe( pipe_slow ); 4997 %} 4998 4999 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5000 predicate(UseAVX > 0); 5001 match(Set dst (AddVL src (LoadVector mem))); 5002 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5003 ins_encode %{ 5004 int vlen_enc = vector_length_encoding(this); 5005 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5006 %} 5007 ins_pipe( pipe_slow ); 5008 %} 5009 5010 // Floats vector add 5011 instruct vaddF(vec dst, vec src) %{ 5012 predicate(UseAVX == 0); 5013 match(Set dst (AddVF dst src)); 5014 format %{ "addps $dst,$src\t! add packedF" %} 5015 ins_encode %{ 5016 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5017 %} 5018 ins_pipe( pipe_slow ); 5019 %} 5020 5021 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5022 predicate(UseAVX > 0); 5023 match(Set dst (AddVF src1 src2)); 5024 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5025 ins_encode %{ 5026 int vlen_enc = vector_length_encoding(this); 5027 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5028 %} 5029 ins_pipe( pipe_slow ); 5030 %} 5031 5032 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5033 predicate(UseAVX > 0); 5034 match(Set dst (AddVF src (LoadVector mem))); 5035 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5036 ins_encode %{ 5037 int vlen_enc = vector_length_encoding(this); 5038 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5039 %} 5040 ins_pipe( pipe_slow ); 5041 %} 5042 5043 // Doubles vector add 5044 instruct vaddD(vec dst, vec src) %{ 5045 predicate(UseAVX == 0); 5046 match(Set dst (AddVD dst src)); 5047 format %{ "addpd $dst,$src\t! add packedD" %} 5048 ins_encode %{ 5049 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5050 %} 5051 ins_pipe( pipe_slow ); 5052 %} 5053 5054 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5055 predicate(UseAVX > 0); 5056 match(Set dst (AddVD src1 src2)); 5057 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5058 ins_encode %{ 5059 int vlen_enc = vector_length_encoding(this); 5060 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5061 %} 5062 ins_pipe( pipe_slow ); 5063 %} 5064 5065 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5066 predicate(UseAVX > 0); 5067 match(Set dst (AddVD src (LoadVector mem))); 5068 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5069 ins_encode %{ 5070 int vlen_enc = vector_length_encoding(this); 5071 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5072 %} 5073 ins_pipe( pipe_slow ); 5074 %} 5075 5076 // --------------------------------- SUB -------------------------------------- 5077 5078 // Bytes vector sub 5079 instruct vsubB(vec dst, vec src) %{ 5080 predicate(UseAVX == 0); 5081 match(Set dst (SubVB dst src)); 5082 format %{ "psubb $dst,$src\t! sub packedB" %} 5083 ins_encode %{ 5084 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5085 %} 5086 ins_pipe( pipe_slow ); 5087 %} 5088 5089 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5090 predicate(UseAVX > 0); 5091 match(Set dst (SubVB src1 src2)); 5092 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5093 ins_encode %{ 5094 int vlen_enc = vector_length_encoding(this); 5095 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5096 %} 5097 ins_pipe( pipe_slow ); 5098 %} 5099 5100 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5101 predicate(UseAVX > 0); 5102 match(Set dst (SubVB src (LoadVector mem))); 5103 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5104 ins_encode %{ 5105 int vlen_enc = vector_length_encoding(this); 5106 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5107 %} 5108 ins_pipe( pipe_slow ); 5109 %} 5110 5111 // Shorts/Chars vector sub 5112 instruct vsubS(vec dst, vec src) %{ 5113 predicate(UseAVX == 0); 5114 match(Set dst (SubVS dst src)); 5115 format %{ "psubw $dst,$src\t! sub packedS" %} 5116 ins_encode %{ 5117 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5118 %} 5119 ins_pipe( pipe_slow ); 5120 %} 5121 5122 5123 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5124 predicate(UseAVX > 0); 5125 match(Set dst (SubVS src1 src2)); 5126 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5127 ins_encode %{ 5128 int vlen_enc = vector_length_encoding(this); 5129 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5130 %} 5131 ins_pipe( pipe_slow ); 5132 %} 5133 5134 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5135 predicate(UseAVX > 0); 5136 match(Set dst (SubVS src (LoadVector mem))); 5137 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5138 ins_encode %{ 5139 int vlen_enc = vector_length_encoding(this); 5140 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5141 %} 5142 ins_pipe( pipe_slow ); 5143 %} 5144 5145 // Integers vector sub 5146 instruct vsubI(vec dst, vec src) %{ 5147 predicate(UseAVX == 0); 5148 match(Set dst (SubVI dst src)); 5149 format %{ "psubd $dst,$src\t! sub packedI" %} 5150 ins_encode %{ 5151 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5152 %} 5153 ins_pipe( pipe_slow ); 5154 %} 5155 5156 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5157 predicate(UseAVX > 0); 5158 match(Set dst (SubVI src1 src2)); 5159 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5160 ins_encode %{ 5161 int vlen_enc = vector_length_encoding(this); 5162 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5163 %} 5164 ins_pipe( pipe_slow ); 5165 %} 5166 5167 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5168 predicate(UseAVX > 0); 5169 match(Set dst (SubVI src (LoadVector mem))); 5170 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5171 ins_encode %{ 5172 int vlen_enc = vector_length_encoding(this); 5173 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5174 %} 5175 ins_pipe( pipe_slow ); 5176 %} 5177 5178 // Longs vector sub 5179 instruct vsubL(vec dst, vec src) %{ 5180 predicate(UseAVX == 0); 5181 match(Set dst (SubVL dst src)); 5182 format %{ "psubq $dst,$src\t! sub packedL" %} 5183 ins_encode %{ 5184 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5185 %} 5186 ins_pipe( pipe_slow ); 5187 %} 5188 5189 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5190 predicate(UseAVX > 0); 5191 match(Set dst (SubVL src1 src2)); 5192 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5193 ins_encode %{ 5194 int vlen_enc = vector_length_encoding(this); 5195 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5196 %} 5197 ins_pipe( pipe_slow ); 5198 %} 5199 5200 5201 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5202 predicate(UseAVX > 0); 5203 match(Set dst (SubVL src (LoadVector mem))); 5204 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5205 ins_encode %{ 5206 int vlen_enc = vector_length_encoding(this); 5207 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5208 %} 5209 ins_pipe( pipe_slow ); 5210 %} 5211 5212 // Floats vector sub 5213 instruct vsubF(vec dst, vec src) %{ 5214 predicate(UseAVX == 0); 5215 match(Set dst (SubVF dst src)); 5216 format %{ "subps $dst,$src\t! sub packedF" %} 5217 ins_encode %{ 5218 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5219 %} 5220 ins_pipe( pipe_slow ); 5221 %} 5222 5223 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5224 predicate(UseAVX > 0); 5225 match(Set dst (SubVF src1 src2)); 5226 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5227 ins_encode %{ 5228 int vlen_enc = vector_length_encoding(this); 5229 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5230 %} 5231 ins_pipe( pipe_slow ); 5232 %} 5233 5234 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5235 predicate(UseAVX > 0); 5236 match(Set dst (SubVF src (LoadVector mem))); 5237 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5238 ins_encode %{ 5239 int vlen_enc = vector_length_encoding(this); 5240 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5241 %} 5242 ins_pipe( pipe_slow ); 5243 %} 5244 5245 // Doubles vector sub 5246 instruct vsubD(vec dst, vec src) %{ 5247 predicate(UseAVX == 0); 5248 match(Set dst (SubVD dst src)); 5249 format %{ "subpd $dst,$src\t! sub packedD" %} 5250 ins_encode %{ 5251 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5252 %} 5253 ins_pipe( pipe_slow ); 5254 %} 5255 5256 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5257 predicate(UseAVX > 0); 5258 match(Set dst (SubVD src1 src2)); 5259 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5260 ins_encode %{ 5261 int vlen_enc = vector_length_encoding(this); 5262 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5263 %} 5264 ins_pipe( pipe_slow ); 5265 %} 5266 5267 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5268 predicate(UseAVX > 0); 5269 match(Set dst (SubVD src (LoadVector mem))); 5270 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5271 ins_encode %{ 5272 int vlen_enc = vector_length_encoding(this); 5273 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5274 %} 5275 ins_pipe( pipe_slow ); 5276 %} 5277 5278 // --------------------------------- MUL -------------------------------------- 5279 5280 // Byte vector mul 5281 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5282 predicate(vector_length(n) == 4 || 5283 vector_length(n) == 8); 5284 match(Set dst (MulVB src1 src2)); 5285 effect(TEMP dst, TEMP tmp, TEMP scratch); 5286 format %{"vector_mulB $dst,$src1,$src2" %} 5287 ins_encode %{ 5288 assert(UseSSE > 3, "required"); 5289 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 5290 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 5291 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 5292 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5293 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5294 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5295 %} 5296 ins_pipe( pipe_slow ); 5297 %} 5298 5299 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5300 predicate(vector_length(n) == 16 && UseAVX <= 1); 5301 match(Set dst (MulVB src1 src2)); 5302 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5303 format %{"vector_mulB $dst,$src1,$src2" %} 5304 ins_encode %{ 5305 assert(UseSSE > 3, "required"); 5306 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 5307 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 5308 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 5309 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 5310 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 5311 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5312 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 5313 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 5314 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5315 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5316 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5317 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5318 %} 5319 ins_pipe( pipe_slow ); 5320 %} 5321 5322 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5323 predicate(vector_length(n) == 16 && UseAVX > 1); 5324 match(Set dst (MulVB src1 src2)); 5325 effect(TEMP dst, TEMP tmp, TEMP scratch); 5326 format %{"vector_mulB $dst,$src1,$src2" %} 5327 ins_encode %{ 5328 int vlen_enc = Assembler::AVX_256bit; 5329 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5330 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5331 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5332 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5333 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5334 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 5335 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 5336 %} 5337 ins_pipe( pipe_slow ); 5338 %} 5339 5340 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5341 predicate(vector_length(n) == 32); 5342 match(Set dst (MulVB src1 src2)); 5343 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5344 format %{"vector_mulB $dst,$src1,$src2" %} 5345 ins_encode %{ 5346 assert(UseAVX > 1, "required"); 5347 int vlen_enc = Assembler::AVX_256bit; 5348 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5349 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 5350 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5351 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5352 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5353 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5354 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5355 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5356 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5357 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5358 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5359 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5360 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5361 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5362 %} 5363 ins_pipe( pipe_slow ); 5364 %} 5365 5366 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5367 predicate(vector_length(n) == 64); 5368 match(Set dst (MulVB src1 src2)); 5369 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5370 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 5371 ins_encode %{ 5372 assert(UseAVX > 2, "required"); 5373 int vlen_enc = Assembler::AVX_512bit; 5374 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5375 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 5376 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5377 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5378 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5379 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5380 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5381 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5382 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5383 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5384 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5385 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5386 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5387 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 5388 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5389 %} 5390 ins_pipe( pipe_slow ); 5391 %} 5392 5393 // Shorts/Chars vector mul 5394 instruct vmulS(vec dst, vec src) %{ 5395 predicate(UseAVX == 0); 5396 match(Set dst (MulVS dst src)); 5397 format %{ "pmullw $dst,$src\t! mul packedS" %} 5398 ins_encode %{ 5399 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5400 %} 5401 ins_pipe( pipe_slow ); 5402 %} 5403 5404 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5405 predicate(UseAVX > 0); 5406 match(Set dst (MulVS src1 src2)); 5407 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5408 ins_encode %{ 5409 int vlen_enc = vector_length_encoding(this); 5410 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5411 %} 5412 ins_pipe( pipe_slow ); 5413 %} 5414 5415 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5416 predicate(UseAVX > 0); 5417 match(Set dst (MulVS src (LoadVector mem))); 5418 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5419 ins_encode %{ 5420 int vlen_enc = vector_length_encoding(this); 5421 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5422 %} 5423 ins_pipe( pipe_slow ); 5424 %} 5425 5426 // Integers vector mul 5427 instruct vmulI(vec dst, vec src) %{ 5428 predicate(UseAVX == 0); 5429 match(Set dst (MulVI dst src)); 5430 format %{ "pmulld $dst,$src\t! mul packedI" %} 5431 ins_encode %{ 5432 assert(UseSSE > 3, "required"); 5433 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5434 %} 5435 ins_pipe( pipe_slow ); 5436 %} 5437 5438 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5439 predicate(UseAVX > 0); 5440 match(Set dst (MulVI src1 src2)); 5441 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5442 ins_encode %{ 5443 int vlen_enc = vector_length_encoding(this); 5444 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5445 %} 5446 ins_pipe( pipe_slow ); 5447 %} 5448 5449 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5450 predicate(UseAVX > 0); 5451 match(Set dst (MulVI src (LoadVector mem))); 5452 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5453 ins_encode %{ 5454 int vlen_enc = vector_length_encoding(this); 5455 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5456 %} 5457 ins_pipe( pipe_slow ); 5458 %} 5459 5460 // Longs vector mul 5461 instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 5462 predicate(VM_Version::supports_avx512dq()); 5463 match(Set dst (MulVL src1 src2)); 5464 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 5465 ins_encode %{ 5466 assert(UseAVX > 2, "required"); 5467 int vlen_enc = vector_length_encoding(this); 5468 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5469 %} 5470 ins_pipe( pipe_slow ); 5471 %} 5472 5473 instruct vmulL_mem(vec dst, vec src, memory mem) %{ 5474 predicate(VM_Version::supports_avx512dq()); 5475 match(Set dst (MulVL src (LoadVector mem))); 5476 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 5477 ins_encode %{ 5478 assert(UseAVX > 2, "required"); 5479 int vlen_enc = vector_length_encoding(this); 5480 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5481 %} 5482 ins_pipe( pipe_slow ); 5483 %} 5484 5485 instruct mul2L_reg(vec dst, vec src2, vec tmp) %{ 5486 predicate(vector_length(n) == 2 && !VM_Version::supports_avx512dq()); 5487 match(Set dst (MulVL dst src2)); 5488 effect(TEMP dst, TEMP tmp); 5489 format %{ "pshufd $tmp,$src2, 177\n\t" 5490 "pmulld $tmp,$dst\n\t" 5491 "phaddd $tmp,$tmp\n\t" 5492 "pmovzxdq $tmp,$tmp\n\t" 5493 "psllq $tmp, 32\n\t" 5494 "pmuludq $dst,$src2\n\t" 5495 "paddq $dst,$tmp\n\t! mul packed2L" %} 5496 5497 ins_encode %{ 5498 assert(VM_Version::supports_sse4_1(), "required"); 5499 int vlen_enc = Assembler::AVX_128bit; 5500 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 5501 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 5502 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5503 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 5504 __ psllq($tmp$$XMMRegister, 32); 5505 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 5506 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 5507 %} 5508 ins_pipe( pipe_slow ); 5509 %} 5510 5511 instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, vec tmp, vec tmp1) %{ 5512 predicate(vector_length(n) == 4 && !VM_Version::supports_avx512dq()); 5513 match(Set dst (MulVL src1 src2)); 5514 effect(TEMP tmp1, TEMP tmp); 5515 format %{ "vpshufd $tmp,$src2\n\t" 5516 "vpmulld $tmp,$src1,$tmp\n\t" 5517 "vphaddd $tmp,$tmp,$tmp\n\t" 5518 "vpmovzxdq $tmp,$tmp\n\t" 5519 "vpsllq $tmp,$tmp\n\t" 5520 "vpmuludq $tmp1,$src1,$src2\n\t" 5521 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 5522 ins_encode %{ 5523 int vlen_enc = Assembler::AVX_256bit; 5524 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc); 5525 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5526 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 5527 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5528 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5529 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc); 5530 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5531 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5532 %} 5533 ins_pipe( pipe_slow ); 5534 %} 5535 5536 // Floats vector mul 5537 instruct vmulF(vec dst, vec src) %{ 5538 predicate(UseAVX == 0); 5539 match(Set dst (MulVF dst src)); 5540 format %{ "mulps $dst,$src\t! mul packedF" %} 5541 ins_encode %{ 5542 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5543 %} 5544 ins_pipe( pipe_slow ); 5545 %} 5546 5547 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5548 predicate(UseAVX > 0); 5549 match(Set dst (MulVF src1 src2)); 5550 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5551 ins_encode %{ 5552 int vlen_enc = vector_length_encoding(this); 5553 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5554 %} 5555 ins_pipe( pipe_slow ); 5556 %} 5557 5558 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5559 predicate(UseAVX > 0); 5560 match(Set dst (MulVF src (LoadVector mem))); 5561 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5562 ins_encode %{ 5563 int vlen_enc = vector_length_encoding(this); 5564 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5565 %} 5566 ins_pipe( pipe_slow ); 5567 %} 5568 5569 // Doubles vector mul 5570 instruct vmulD(vec dst, vec src) %{ 5571 predicate(UseAVX == 0); 5572 match(Set dst (MulVD dst src)); 5573 format %{ "mulpd $dst,$src\t! mul packedD" %} 5574 ins_encode %{ 5575 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5576 %} 5577 ins_pipe( pipe_slow ); 5578 %} 5579 5580 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5581 predicate(UseAVX > 0); 5582 match(Set dst (MulVD src1 src2)); 5583 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5584 ins_encode %{ 5585 int vlen_enc = vector_length_encoding(this); 5586 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5587 %} 5588 ins_pipe( pipe_slow ); 5589 %} 5590 5591 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5592 predicate(UseAVX > 0); 5593 match(Set dst (MulVD src (LoadVector mem))); 5594 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5595 ins_encode %{ 5596 int vlen_enc = vector_length_encoding(this); 5597 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5598 %} 5599 ins_pipe( pipe_slow ); 5600 %} 5601 5602 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5603 predicate(vector_length(n) == 8); 5604 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 5605 effect(TEMP dst, USE src1, USE src2); 5606 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 5607 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 5608 %} 5609 ins_encode %{ 5610 assert(UseAVX > 0, "required"); 5611 5612 int vlen_enc = Assembler::AVX_256bit; 5613 int cond = (Assembler::Condition)($copnd$$cmpcode); 5614 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5615 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5616 %} 5617 ins_pipe( pipe_slow ); 5618 %} 5619 5620 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5621 predicate(vector_length(n) == 4); 5622 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 5623 effect(TEMP dst, USE src1, USE src2); 5624 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 5625 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 5626 %} 5627 ins_encode %{ 5628 assert(UseAVX > 0, "required"); 5629 5630 int vlen_enc = Assembler::AVX_256bit; 5631 int cond = (Assembler::Condition)($copnd$$cmpcode); 5632 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5633 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5634 %} 5635 ins_pipe( pipe_slow ); 5636 %} 5637 5638 // --------------------------------- DIV -------------------------------------- 5639 5640 // Floats vector div 5641 instruct vdivF(vec dst, vec src) %{ 5642 predicate(UseAVX == 0); 5643 match(Set dst (DivVF dst src)); 5644 format %{ "divps $dst,$src\t! div packedF" %} 5645 ins_encode %{ 5646 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5647 %} 5648 ins_pipe( pipe_slow ); 5649 %} 5650 5651 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5652 predicate(UseAVX > 0); 5653 match(Set dst (DivVF src1 src2)); 5654 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5655 ins_encode %{ 5656 int vlen_enc = vector_length_encoding(this); 5657 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5658 %} 5659 ins_pipe( pipe_slow ); 5660 %} 5661 5662 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5663 predicate(UseAVX > 0); 5664 match(Set dst (DivVF src (LoadVector mem))); 5665 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 5666 ins_encode %{ 5667 int vlen_enc = vector_length_encoding(this); 5668 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5669 %} 5670 ins_pipe( pipe_slow ); 5671 %} 5672 5673 // Doubles vector div 5674 instruct vdivD(vec dst, vec src) %{ 5675 predicate(UseAVX == 0); 5676 match(Set dst (DivVD dst src)); 5677 format %{ "divpd $dst,$src\t! div packedD" %} 5678 ins_encode %{ 5679 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 5680 %} 5681 ins_pipe( pipe_slow ); 5682 %} 5683 5684 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 5685 predicate(UseAVX > 0); 5686 match(Set dst (DivVD src1 src2)); 5687 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 5688 ins_encode %{ 5689 int vlen_enc = vector_length_encoding(this); 5690 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5691 %} 5692 ins_pipe( pipe_slow ); 5693 %} 5694 5695 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 5696 predicate(UseAVX > 0); 5697 match(Set dst (DivVD src (LoadVector mem))); 5698 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 5699 ins_encode %{ 5700 int vlen_enc = vector_length_encoding(this); 5701 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 // ------------------------------ MinMax --------------------------------------- 5707 5708 // Byte, Short, Int vector Min/Max 5709 instruct minmax_reg_sse(vec dst, vec src) %{ 5710 predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5711 UseAVX == 0); 5712 match(Set dst (MinV dst src)); 5713 match(Set dst (MaxV dst src)); 5714 format %{ "vector_minmax $dst,$src\t! " %} 5715 ins_encode %{ 5716 assert(UseSSE >= 4, "required"); 5717 5718 int opcode = this->ideal_Opcode(); 5719 BasicType elem_bt = vector_element_basic_type(this); 5720 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 5721 %} 5722 ins_pipe( pipe_slow ); 5723 %} 5724 5725 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 5726 predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5727 UseAVX > 0); 5728 match(Set dst (MinV src1 src2)); 5729 match(Set dst (MaxV src1 src2)); 5730 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 5731 ins_encode %{ 5732 int opcode = this->ideal_Opcode(); 5733 int vlen_enc = vector_length_encoding(this); 5734 BasicType elem_bt = vector_element_basic_type(this); 5735 5736 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5737 %} 5738 ins_pipe( pipe_slow ); 5739 %} 5740 5741 // Long vector Min/Max 5742 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 5743 predicate(vector_length_in_bytes(n) == 16 && vector_element_basic_type(n) == T_LONG && 5744 UseAVX == 0); 5745 match(Set dst (MinV dst src)); 5746 match(Set dst (MaxV src dst)); 5747 effect(TEMP dst, TEMP tmp); 5748 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 5749 ins_encode %{ 5750 assert(UseSSE >= 4, "required"); 5751 5752 int opcode = this->ideal_Opcode(); 5753 BasicType elem_bt = vector_element_basic_type(this); 5754 assert(elem_bt == T_LONG, "sanity"); 5755 5756 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 5757 %} 5758 ins_pipe( pipe_slow ); 5759 %} 5760 5761 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 5762 predicate(vector_length_in_bytes(n) <= 32 && vector_element_basic_type(n) == T_LONG && 5763 UseAVX > 0 && !VM_Version::supports_avx512vl()); 5764 match(Set dst (MinV src1 src2)); 5765 match(Set dst (MaxV src1 src2)); 5766 effect(TEMP dst); 5767 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 5768 ins_encode %{ 5769 int vlen_enc = vector_length_encoding(this); 5770 int opcode = this->ideal_Opcode(); 5771 BasicType elem_bt = vector_element_basic_type(this); 5772 assert(elem_bt == T_LONG, "sanity"); 5773 5774 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5775 %} 5776 ins_pipe( pipe_slow ); 5777 %} 5778 5779 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 5780 predicate((vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 5781 vector_element_basic_type(n) == T_LONG); 5782 match(Set dst (MinV src1 src2)); 5783 match(Set dst (MaxV src1 src2)); 5784 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 5785 ins_encode %{ 5786 assert(UseAVX > 2, "required"); 5787 5788 int vlen_enc = vector_length_encoding(this); 5789 int opcode = this->ideal_Opcode(); 5790 BasicType elem_bt = vector_element_basic_type(this); 5791 assert(elem_bt == T_LONG, "sanity"); 5792 5793 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5794 %} 5795 ins_pipe( pipe_slow ); 5796 %} 5797 5798 // Float/Double vector Min/Max 5799 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 5800 predicate(vector_length_in_bytes(n) <= 32 && 5801 is_floating_point_type(vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 5802 UseAVX > 0); 5803 match(Set dst (MinV a b)); 5804 match(Set dst (MaxV a b)); 5805 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 5806 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 5807 ins_encode %{ 5808 assert(UseAVX > 0, "required"); 5809 5810 int opcode = this->ideal_Opcode(); 5811 int vlen_enc = vector_length_encoding(this); 5812 BasicType elem_bt = vector_element_basic_type(this); 5813 5814 __ vminmax_fp(opcode, elem_bt, 5815 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 5816 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 5817 %} 5818 ins_pipe( pipe_slow ); 5819 %} 5820 5821 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{ 5822 predicate(vector_length_in_bytes(n) == 64 && 5823 is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 5824 match(Set dst (MinV a b)); 5825 match(Set dst (MaxV a b)); 5826 effect(USE a, USE b, TEMP atmp, TEMP btmp); 5827 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 5828 ins_encode %{ 5829 assert(UseAVX > 2, "required"); 5830 5831 int opcode = this->ideal_Opcode(); 5832 int vlen_enc = vector_length_encoding(this); 5833 BasicType elem_bt = vector_element_basic_type(this); 5834 5835 KRegister ktmp = k1; 5836 __ evminmax_fp(opcode, elem_bt, 5837 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 5838 ktmp, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 5839 %} 5840 ins_pipe( pipe_slow ); 5841 %} 5842 5843 // --------------------------------- Sqrt -------------------------------------- 5844 5845 instruct vsqrtF_reg(vec dst, vec src) %{ 5846 match(Set dst (SqrtVF src)); 5847 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 5848 ins_encode %{ 5849 assert(UseAVX > 0, "required"); 5850 int vlen_enc = vector_length_encoding(this); 5851 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5852 %} 5853 ins_pipe( pipe_slow ); 5854 %} 5855 5856 instruct vsqrtF_mem(vec dst, memory mem) %{ 5857 match(Set dst (SqrtVF (LoadVector mem))); 5858 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 5859 ins_encode %{ 5860 assert(UseAVX > 0, "required"); 5861 int vlen_enc = vector_length_encoding(this); 5862 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 5863 %} 5864 ins_pipe( pipe_slow ); 5865 %} 5866 5867 // Floating point vector sqrt 5868 instruct vsqrtD_reg(vec dst, vec src) %{ 5869 match(Set dst (SqrtVD src)); 5870 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 5871 ins_encode %{ 5872 assert(UseAVX > 0, "required"); 5873 int vlen_enc = vector_length_encoding(this); 5874 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 instruct vsqrtD_mem(vec dst, memory mem) %{ 5880 match(Set dst (SqrtVD (LoadVector mem))); 5881 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 5882 ins_encode %{ 5883 assert(UseAVX > 0, "required"); 5884 int vlen_enc = vector_length_encoding(this); 5885 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 5886 %} 5887 ins_pipe( pipe_slow ); 5888 %} 5889 5890 // ------------------------------ Shift --------------------------------------- 5891 5892 // Left and right shift count vectors are the same on x86 5893 // (only lowest bits of xmm reg are used for count). 5894 instruct vshiftcnt(vec dst, rRegI cnt) %{ 5895 match(Set dst (LShiftCntV cnt)); 5896 match(Set dst (RShiftCntV cnt)); 5897 format %{ "movdl $dst,$cnt\t! load shift count" %} 5898 ins_encode %{ 5899 __ movdl($dst$$XMMRegister, $cnt$$Register); 5900 %} 5901 ins_pipe( pipe_slow ); 5902 %} 5903 5904 // Byte vector shift 5905 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5906 predicate(vector_length(n) <= 8 && VectorNode::is_vshift_cnt(n->in(2))); 5907 match(Set dst ( LShiftVB src shift)); 5908 match(Set dst ( RShiftVB src shift)); 5909 match(Set dst (URShiftVB src shift)); 5910 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 5911 format %{"vector_byte_shift $dst,$src,$shift" %} 5912 ins_encode %{ 5913 assert(UseSSE > 3, "required"); 5914 int opcode = this->ideal_Opcode(); 5915 bool sign = (opcode != Op_URShiftVB); 5916 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 5917 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 5918 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5919 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5920 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 5926 predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && 5927 UseAVX <= 1); 5928 match(Set dst ( LShiftVB src shift)); 5929 match(Set dst ( RShiftVB src shift)); 5930 match(Set dst (URShiftVB src shift)); 5931 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 5932 format %{"vector_byte_shift $dst,$src,$shift" %} 5933 ins_encode %{ 5934 assert(UseSSE > 3, "required"); 5935 int opcode = this->ideal_Opcode(); 5936 bool sign = (opcode != Op_URShiftVB); 5937 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 5938 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 5939 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 5940 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 5941 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 5942 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5943 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5944 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5945 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5946 %} 5947 ins_pipe( pipe_slow ); 5948 %} 5949 5950 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5951 predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && 5952 UseAVX > 1); 5953 match(Set dst ( LShiftVB src shift)); 5954 match(Set dst ( RShiftVB src shift)); 5955 match(Set dst (URShiftVB src shift)); 5956 effect(TEMP dst, TEMP tmp, TEMP scratch); 5957 format %{"vector_byte_shift $dst,$src,$shift" %} 5958 ins_encode %{ 5959 int opcode = this->ideal_Opcode(); 5960 bool sign = (opcode != Op_URShiftVB); 5961 int vlen_enc = Assembler::AVX_256bit; 5962 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 5963 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5964 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5965 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 5966 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5972 predicate(vector_length(n) == 32 && VectorNode::is_vshift_cnt(n->in(2))); 5973 match(Set dst ( LShiftVB src shift)); 5974 match(Set dst ( RShiftVB src shift)); 5975 match(Set dst (URShiftVB src shift)); 5976 effect(TEMP dst, TEMP tmp, TEMP scratch); 5977 format %{"vector_byte_shift $dst,$src,$shift" %} 5978 ins_encode %{ 5979 assert(UseAVX > 1, "required"); 5980 int opcode = this->ideal_Opcode(); 5981 bool sign = (opcode != Op_URShiftVB); 5982 int vlen_enc = Assembler::AVX_256bit; 5983 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 5984 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5985 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5986 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5987 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5988 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5989 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5990 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5991 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5992 %} 5993 ins_pipe( pipe_slow ); 5994 %} 5995 5996 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 5997 predicate(vector_length(n) == 64 && VectorNode::is_vshift_cnt(n->in(2))); 5998 match(Set dst ( LShiftVB src shift)); 5999 match(Set dst (RShiftVB src shift)); 6000 match(Set dst (URShiftVB src shift)); 6001 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 6002 format %{"vector_byte_shift $dst,$src,$shift" %} 6003 ins_encode %{ 6004 assert(UseAVX > 2, "required"); 6005 int opcode = this->ideal_Opcode(); 6006 bool sign = (opcode != Op_URShiftVB); 6007 int vlen_enc = Assembler::AVX_512bit; 6008 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6009 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6010 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6011 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6012 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6013 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6014 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6015 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6016 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6017 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6018 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 6019 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6020 %} 6021 ins_pipe( pipe_slow ); 6022 %} 6023 6024 // Shorts vector logical right shift produces incorrect Java result 6025 // for negative data because java code convert short value into int with 6026 // sign extension before a shift. But char vectors are fine since chars are 6027 // unsigned values. 6028 // Shorts/Chars vector left shift 6029 instruct vshiftS(vec dst, vec src, vec shift) %{ 6030 predicate(VectorNode::is_vshift_cnt(n->in(2))); 6031 match(Set dst ( LShiftVS src shift)); 6032 match(Set dst ( RShiftVS src shift)); 6033 match(Set dst (URShiftVS src shift)); 6034 effect(TEMP dst, USE src, USE shift); 6035 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6036 ins_encode %{ 6037 int opcode = this->ideal_Opcode(); 6038 if (UseAVX > 0) { 6039 int vlen_enc = vector_length_encoding(this); 6040 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6041 } else { 6042 int vlen = vector_length(this); 6043 if (vlen == 2) { 6044 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6045 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6046 } else if (vlen == 4) { 6047 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6048 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6049 } else { 6050 assert (vlen == 8, "sanity"); 6051 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6052 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6053 } 6054 } 6055 %} 6056 ins_pipe( pipe_slow ); 6057 %} 6058 6059 // Integers vector left shift 6060 instruct vshiftI(vec dst, vec src, vec shift) %{ 6061 predicate(VectorNode::is_vshift_cnt(n->in(2))); 6062 match(Set dst ( LShiftVI src shift)); 6063 match(Set dst ( RShiftVI src shift)); 6064 match(Set dst (URShiftVI src shift)); 6065 effect(TEMP dst, USE src, USE shift); 6066 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6067 ins_encode %{ 6068 int opcode = this->ideal_Opcode(); 6069 if (UseAVX > 0) { 6070 int vlen_enc = vector_length_encoding(this); 6071 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6072 } else { 6073 int vlen = vector_length(this); 6074 if (vlen == 2) { 6075 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6076 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6077 } else { 6078 assert(vlen == 4, "sanity"); 6079 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6080 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6081 } 6082 } 6083 %} 6084 ins_pipe( pipe_slow ); 6085 %} 6086 6087 // Longs vector shift 6088 instruct vshiftL(vec dst, vec src, vec shift) %{ 6089 predicate(VectorNode::is_vshift_cnt(n->in(2))); 6090 match(Set dst ( LShiftVL src shift)); 6091 match(Set dst (URShiftVL src shift)); 6092 effect(TEMP dst, USE src, USE shift); 6093 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6094 ins_encode %{ 6095 int opcode = this->ideal_Opcode(); 6096 if (UseAVX > 0) { 6097 int vlen_enc = vector_length_encoding(this); 6098 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6099 } else { 6100 assert(vector_length(this) == 2, ""); 6101 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6102 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6103 } 6104 %} 6105 ins_pipe( pipe_slow ); 6106 %} 6107 6108 // -------------------ArithmeticRightShift ----------------------------------- 6109 // Long vector arithmetic right shift 6110 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6111 predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX <= 2); 6112 match(Set dst (RShiftVL src shift)); 6113 effect(TEMP dst, TEMP tmp, TEMP scratch); 6114 format %{ "vshiftq $dst,$src,$shift" %} 6115 ins_encode %{ 6116 uint vlen = vector_length(this); 6117 if (vlen == 2) { 6118 assert(UseSSE >= 2, "required"); 6119 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6120 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6121 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6122 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6123 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6124 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6125 } else { 6126 assert(vlen == 4, "sanity"); 6127 assert(UseAVX > 1, "required"); 6128 int vlen_enc = Assembler::AVX_256bit; 6129 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6130 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6131 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6132 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6133 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6134 } 6135 %} 6136 ins_pipe( pipe_slow ); 6137 %} 6138 6139 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6140 predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX > 2); 6141 match(Set dst (RShiftVL src shift)); 6142 format %{ "vshiftq $dst,$src,$shift" %} 6143 ins_encode %{ 6144 int vlen_enc = vector_length_encoding(this); 6145 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6146 %} 6147 ins_pipe( pipe_slow ); 6148 %} 6149 6150 // ------------------- Variable Shift ----------------------------- 6151 // Byte variable shift 6152 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6153 predicate(vector_length(n) <= 8 && 6154 !VectorNode::is_vshift_cnt(n->in(2)) && 6155 !VM_Version::supports_avx512bw()); 6156 match(Set dst ( LShiftVB src shift)); 6157 match(Set dst ( RShiftVB src shift)); 6158 match(Set dst (URShiftVB src shift)); 6159 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6160 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6161 ins_encode %{ 6162 assert(UseAVX >= 2, "required"); 6163 6164 int opcode = this->ideal_Opcode(); 6165 int vlen_enc = Assembler::AVX_128bit; 6166 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6167 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6168 %} 6169 ins_pipe( pipe_slow ); 6170 %} 6171 6172 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6173 predicate(vector_length(n) == 16 && 6174 !VectorNode::is_vshift_cnt(n->in(2)) && 6175 !VM_Version::supports_avx512bw()); 6176 match(Set dst ( LShiftVB src shift)); 6177 match(Set dst ( RShiftVB src shift)); 6178 match(Set dst (URShiftVB src shift)); 6179 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6180 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6181 ins_encode %{ 6182 assert(UseAVX >= 2, "required"); 6183 6184 int opcode = this->ideal_Opcode(); 6185 int vlen_enc = Assembler::AVX_128bit; 6186 // Shift lower half and get word result in dst 6187 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6188 6189 // Shift upper half and get word result in vtmp1 6190 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6191 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6192 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6193 6194 // Merge and down convert the two word results to byte in dst 6195 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6196 %} 6197 ins_pipe( pipe_slow ); 6198 %} 6199 6200 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ 6201 predicate(vector_length(n) == 32 && 6202 !VectorNode::is_vshift_cnt(n->in(2)) && 6203 !VM_Version::supports_avx512bw()); 6204 match(Set dst ( LShiftVB src shift)); 6205 match(Set dst ( RShiftVB src shift)); 6206 match(Set dst (URShiftVB src shift)); 6207 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch); 6208 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %} 6209 ins_encode %{ 6210 assert(UseAVX >= 2, "required"); 6211 6212 int opcode = this->ideal_Opcode(); 6213 int vlen_enc = Assembler::AVX_128bit; 6214 // Process lower 128 bits and get result in dst 6215 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6216 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6217 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6218 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6219 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6220 6221 // Process higher 128 bits and get result in vtmp3 6222 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6223 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6224 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register); 6225 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6226 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6227 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6228 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6229 6230 // Merge the two results in dst 6231 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6232 %} 6233 ins_pipe( pipe_slow ); 6234 %} 6235 6236 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6237 predicate(vector_length(n) <= 32 && 6238 !VectorNode::is_vshift_cnt(n->in(2)) && 6239 VM_Version::supports_avx512bw()); 6240 match(Set dst ( LShiftVB src shift)); 6241 match(Set dst ( RShiftVB src shift)); 6242 match(Set dst (URShiftVB src shift)); 6243 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6244 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6245 ins_encode %{ 6246 assert(UseAVX > 2, "required"); 6247 6248 int opcode = this->ideal_Opcode(); 6249 int vlen_enc = vector_length_encoding(this); 6250 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6251 %} 6252 ins_pipe( pipe_slow ); 6253 %} 6254 6255 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6256 predicate(vector_length(n) == 64 && 6257 !VectorNode::is_vshift_cnt(n->in(2)) && 6258 VM_Version::supports_avx512bw()); 6259 match(Set dst ( LShiftVB src shift)); 6260 match(Set dst ( RShiftVB src shift)); 6261 match(Set dst (URShiftVB src shift)); 6262 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6263 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6264 ins_encode %{ 6265 assert(UseAVX > 2, "required"); 6266 6267 int opcode = this->ideal_Opcode(); 6268 int vlen_enc = Assembler::AVX_256bit; 6269 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6270 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6271 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6272 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6273 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6274 %} 6275 ins_pipe( pipe_slow ); 6276 %} 6277 6278 // Short variable shift 6279 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6280 predicate(vector_length(n) <= 8 && 6281 !VectorNode::is_vshift_cnt(n->in(2)) && 6282 !VM_Version::supports_avx512bw()); 6283 match(Set dst ( LShiftVS src shift)); 6284 match(Set dst ( RShiftVS src shift)); 6285 match(Set dst (URShiftVS src shift)); 6286 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6287 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6288 ins_encode %{ 6289 assert(UseAVX >= 2, "required"); 6290 6291 int opcode = this->ideal_Opcode(); 6292 bool sign = (opcode != Op_URShiftVS); 6293 int vlen_enc = Assembler::AVX_256bit; 6294 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6295 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6296 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6297 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6298 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6299 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6300 %} 6301 ins_pipe( pipe_slow ); 6302 %} 6303 6304 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6305 predicate(vector_length(n) == 16 && 6306 !VectorNode::is_vshift_cnt(n->in(2)) && 6307 !VM_Version::supports_avx512bw()); 6308 match(Set dst ( LShiftVS src shift)); 6309 match(Set dst ( RShiftVS src shift)); 6310 match(Set dst (URShiftVS src shift)); 6311 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6312 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6313 ins_encode %{ 6314 assert(UseAVX >= 2, "required"); 6315 6316 int opcode = this->ideal_Opcode(); 6317 bool sign = (opcode != Op_URShiftVS); 6318 int vlen_enc = Assembler::AVX_256bit; 6319 // Shift lower half, with result in vtmp2 usign vtmp1 as TEMP 6320 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6321 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6322 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6323 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6324 6325 // Shift upper half, with result in dst usign vtmp1 as TEMP 6326 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6327 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6328 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6329 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6330 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6331 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6332 6333 // Merge lower and upper half result into dst 6334 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6335 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6336 %} 6337 ins_pipe( pipe_slow ); 6338 %} 6339 6340 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6341 predicate(!VectorNode::is_vshift_cnt(n->in(2)) && 6342 VM_Version::supports_avx512bw()); 6343 match(Set dst ( LShiftVS src shift)); 6344 match(Set dst ( RShiftVS src shift)); 6345 match(Set dst (URShiftVS src shift)); 6346 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6347 ins_encode %{ 6348 assert(UseAVX > 2, "required"); 6349 6350 int opcode = this->ideal_Opcode(); 6351 int vlen_enc = vector_length_encoding(this); 6352 if (!VM_Version::supports_avx512vl()) { 6353 vlen_enc = Assembler::AVX_512bit; 6354 } 6355 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 //Integer variable shift 6361 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6362 predicate(!VectorNode::is_vshift_cnt(n->in(2))); 6363 match(Set dst ( LShiftVI src shift)); 6364 match(Set dst ( RShiftVI src shift)); 6365 match(Set dst (URShiftVI src shift)); 6366 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6367 ins_encode %{ 6368 assert(UseAVX >= 2, "required"); 6369 6370 int opcode = this->ideal_Opcode(); 6371 int vlen_enc = vector_length_encoding(this); 6372 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 //Long variable shift 6378 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6379 predicate(!VectorNode::is_vshift_cnt(n->in(2))); 6380 match(Set dst ( LShiftVL src shift)); 6381 match(Set dst (URShiftVL src shift)); 6382 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6383 ins_encode %{ 6384 assert(UseAVX >= 2, "required"); 6385 6386 int opcode = this->ideal_Opcode(); 6387 int vlen_enc = vector_length_encoding(this); 6388 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 //Long variable right shift arithmetic 6394 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6395 predicate(vector_length(n) <= 4 && 6396 !VectorNode::is_vshift_cnt(n->in(2)) && 6397 UseAVX == 2); 6398 match(Set dst (RShiftVL src shift)); 6399 effect(TEMP dst, TEMP vtmp); 6400 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6401 ins_encode %{ 6402 int opcode = this->ideal_Opcode(); 6403 int vlen_enc = vector_length_encoding(this); 6404 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6405 $vtmp$$XMMRegister); 6406 %} 6407 ins_pipe( pipe_slow ); 6408 %} 6409 6410 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6411 predicate(!VectorNode::is_vshift_cnt(n->in(2)) && 6412 UseAVX > 2); 6413 match(Set dst (RShiftVL src shift)); 6414 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6415 ins_encode %{ 6416 int opcode = this->ideal_Opcode(); 6417 int vlen_enc = vector_length_encoding(this); 6418 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6419 %} 6420 ins_pipe( pipe_slow ); 6421 %} 6422 6423 // --------------------------------- AND -------------------------------------- 6424 6425 instruct vand(vec dst, vec src) %{ 6426 predicate(UseAVX == 0); 6427 match(Set dst (AndV dst src)); 6428 format %{ "pand $dst,$src\t! and vectors" %} 6429 ins_encode %{ 6430 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6431 %} 6432 ins_pipe( pipe_slow ); 6433 %} 6434 6435 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6436 predicate(UseAVX > 0); 6437 match(Set dst (AndV src1 src2)); 6438 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6439 ins_encode %{ 6440 int vlen_enc = vector_length_encoding(this); 6441 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6442 %} 6443 ins_pipe( pipe_slow ); 6444 %} 6445 6446 instruct vand_mem(vec dst, vec src, memory mem) %{ 6447 predicate(UseAVX > 0); 6448 match(Set dst (AndV src (LoadVector mem))); 6449 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6450 ins_encode %{ 6451 int vlen_enc = vector_length_encoding(this); 6452 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6453 %} 6454 ins_pipe( pipe_slow ); 6455 %} 6456 6457 // --------------------------------- OR --------------------------------------- 6458 6459 instruct vor(vec dst, vec src) %{ 6460 predicate(UseAVX == 0); 6461 match(Set dst (OrV dst src)); 6462 format %{ "por $dst,$src\t! or vectors" %} 6463 ins_encode %{ 6464 __ por($dst$$XMMRegister, $src$$XMMRegister); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 instruct vor_reg(vec dst, vec src1, vec src2) %{ 6470 predicate(UseAVX > 0); 6471 match(Set dst (OrV src1 src2)); 6472 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6473 ins_encode %{ 6474 int vlen_enc = vector_length_encoding(this); 6475 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6476 %} 6477 ins_pipe( pipe_slow ); 6478 %} 6479 6480 instruct vor_mem(vec dst, vec src, memory mem) %{ 6481 predicate(UseAVX > 0); 6482 match(Set dst (OrV src (LoadVector mem))); 6483 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 6484 ins_encode %{ 6485 int vlen_enc = vector_length_encoding(this); 6486 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6487 %} 6488 ins_pipe( pipe_slow ); 6489 %} 6490 6491 // --------------------------------- XOR -------------------------------------- 6492 6493 instruct vxor(vec dst, vec src) %{ 6494 predicate(UseAVX == 0); 6495 match(Set dst (XorV dst src)); 6496 format %{ "pxor $dst,$src\t! xor vectors" %} 6497 ins_encode %{ 6498 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 6504 predicate(UseAVX > 0); 6505 match(Set dst (XorV src1 src2)); 6506 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 6507 ins_encode %{ 6508 int vlen_enc = vector_length_encoding(this); 6509 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vxor_mem(vec dst, vec src, memory mem) %{ 6515 predicate(UseAVX > 0); 6516 match(Set dst (XorV src (LoadVector mem))); 6517 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 6518 ins_encode %{ 6519 int vlen_enc = vector_length_encoding(this); 6520 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 // --------------------------------- VectorCast -------------------------------------- 6526 6527 instruct vcastBtoX(vec dst, vec src) %{ 6528 match(Set dst (VectorCastB2X src)); 6529 format %{ "vector_cast_b2x $dst,$src\t!" %} 6530 ins_encode %{ 6531 assert(UseAVX > 0, "required"); 6532 6533 BasicType to_elem_bt = vector_element_basic_type(this); 6534 int vlen_enc = vector_length_encoding(this); 6535 switch (to_elem_bt) { 6536 case T_SHORT: 6537 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6538 break; 6539 case T_INT: 6540 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6541 break; 6542 case T_FLOAT: 6543 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6544 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6545 break; 6546 case T_LONG: 6547 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6548 break; 6549 case T_DOUBLE: 6550 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6551 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6552 break; 6553 6554 default: assert(false, "%s", type2name(to_elem_bt)); 6555 } 6556 %} 6557 ins_pipe( pipe_slow ); 6558 %} 6559 6560 instruct castStoX(vec dst, vec src, rRegP scratch) %{ 6561 predicate(UseAVX <= 2 && 6562 vector_length(n->in(1)) <= 8 && // src 6563 vector_element_basic_type(n) == T_BYTE); 6564 effect(TEMP scratch); 6565 match(Set dst (VectorCastS2X src)); 6566 format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} 6567 ins_encode %{ 6568 assert(UseAVX > 0, "required"); 6569 6570 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register); 6571 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6572 %} 6573 ins_pipe( pipe_slow ); 6574 %} 6575 6576 instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6577 predicate(UseAVX <= 2 && 6578 vector_length(n->in(1)) == 16 && // src 6579 vector_element_basic_type(n) == T_BYTE); 6580 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6581 match(Set dst (VectorCastS2X src)); 6582 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} 6583 ins_encode %{ 6584 assert(UseAVX > 0, "required"); 6585 6586 int vlen_enc = vector_length_encoding(vector_length_in_bytes(this, $src)); 6587 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6588 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 6589 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6590 %} 6591 ins_pipe( pipe_slow ); 6592 %} 6593 6594 instruct vcastStoX_evex(vec dst, vec src) %{ 6595 predicate(UseAVX > 2 || 6596 (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src 6597 match(Set dst (VectorCastS2X src)); 6598 format %{ "vector_cast_s2x $dst,$src\t!" %} 6599 ins_encode %{ 6600 BasicType to_elem_bt = vector_element_basic_type(this); 6601 int src_vlen_enc = vector_length_encoding(this, $src); 6602 int vlen_enc = vector_length_encoding(this); 6603 switch (to_elem_bt) { 6604 case T_BYTE: 6605 if (!VM_Version::supports_avx512vl()) { 6606 vlen_enc = Assembler::AVX_512bit; 6607 } 6608 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6609 break; 6610 case T_INT: 6611 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6612 break; 6613 case T_FLOAT: 6614 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6615 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6616 break; 6617 case T_LONG: 6618 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6619 break; 6620 case T_DOUBLE: 6621 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6622 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6623 break; 6624 default: 6625 ShouldNotReachHere(); 6626 } 6627 %} 6628 ins_pipe( pipe_slow ); 6629 %} 6630 6631 instruct castItoX(vec dst, vec src, rRegP scratch) %{ 6632 predicate(UseAVX <= 2 && 6633 (vector_length_in_bytes(n->in(1)) <= 16) && 6634 (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src 6635 match(Set dst (VectorCastI2X src)); 6636 format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} 6637 effect(TEMP scratch); 6638 ins_encode %{ 6639 assert(UseAVX > 0, "required"); 6640 6641 BasicType to_elem_bt = vector_element_basic_type(this); 6642 int vlen_enc = vector_length_encoding(this, $src); 6643 6644 if (to_elem_bt == T_BYTE) { 6645 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 6646 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6647 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6648 } else { 6649 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 6650 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6651 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6652 } 6653 %} 6654 ins_pipe( pipe_slow ); 6655 %} 6656 6657 instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6658 predicate(UseAVX <= 2 && 6659 (vector_length_in_bytes(n->in(1)) == 32) && 6660 (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src 6661 match(Set dst (VectorCastI2X src)); 6662 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} 6663 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6664 ins_encode %{ 6665 assert(UseAVX > 0, "required"); 6666 6667 BasicType to_elem_bt = vector_element_basic_type(this); 6668 int vlen_enc = vector_length_encoding(this, $src); 6669 6670 if (to_elem_bt == T_BYTE) { 6671 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 6672 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 6673 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6674 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6675 } else { 6676 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 6677 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6678 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 6679 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6680 } 6681 %} 6682 ins_pipe( pipe_slow ); 6683 %} 6684 6685 instruct vcastItoX_evex(vec dst, vec src) %{ 6686 predicate(UseAVX > 2 || 6687 (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src 6688 match(Set dst (VectorCastI2X src)); 6689 format %{ "vector_cast_i2x $dst,$src\t!" %} 6690 ins_encode %{ 6691 assert(UseAVX > 0, "required"); 6692 6693 BasicType dst_elem_bt = vector_element_basic_type(this); 6694 int src_vlen_enc = vector_length_encoding(this, $src); 6695 int dst_vlen_enc = vector_length_encoding(this); 6696 switch (dst_elem_bt) { 6697 case T_BYTE: 6698 if (!VM_Version::supports_avx512vl()) { 6699 src_vlen_enc = Assembler::AVX_512bit; 6700 } 6701 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6702 break; 6703 case T_SHORT: 6704 if (!VM_Version::supports_avx512vl()) { 6705 src_vlen_enc = Assembler::AVX_512bit; 6706 } 6707 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6708 break; 6709 case T_FLOAT: 6710 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 6711 break; 6712 case T_LONG: 6713 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 6714 break; 6715 case T_DOUBLE: 6716 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 6717 break; 6718 default: 6719 ShouldNotReachHere(); 6720 } 6721 %} 6722 ins_pipe( pipe_slow ); 6723 %} 6724 6725 instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ 6726 predicate((vector_element_basic_type(n) == T_BYTE || vector_element_basic_type(n) == T_SHORT) && 6727 UseAVX <= 2); 6728 match(Set dst (VectorCastL2X src)); 6729 effect(TEMP scratch); 6730 format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %} 6731 ins_encode %{ 6732 assert(UseAVX > 0, "required"); 6733 6734 int vlen = vector_length_in_bytes(this, $src); 6735 BasicType to_elem_bt = vector_element_basic_type(this); 6736 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 6737 : ExternalAddress(vector_int_to_short_mask()); 6738 if (vlen <= 16) { 6739 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 6740 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 6741 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6742 } else { 6743 assert(vlen <= 32, "required"); 6744 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 6745 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 6746 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 6747 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6748 } 6749 if (to_elem_bt == T_BYTE) { 6750 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6751 } 6752 %} 6753 ins_pipe( pipe_slow ); 6754 %} 6755 6756 instruct vcastLtoX_evex(vec dst, vec src) %{ 6757 predicate(UseAVX > 2 || 6758 (vector_element_basic_type(n) == T_INT || 6759 vector_element_basic_type(n) == T_FLOAT || 6760 vector_element_basic_type(n) == T_DOUBLE)); 6761 match(Set dst (VectorCastL2X src)); 6762 format %{ "vector_cast_l2x $dst,$src\t!" %} 6763 ins_encode %{ 6764 BasicType to_elem_bt = vector_element_basic_type(this); 6765 int vlen = vector_length_in_bytes(this, $src); 6766 int vlen_enc = vector_length_encoding(this, $src); 6767 switch (to_elem_bt) { 6768 case T_BYTE: 6769 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 6770 vlen_enc = Assembler::AVX_512bit; 6771 } 6772 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6773 break; 6774 case T_SHORT: 6775 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 6776 vlen_enc = Assembler::AVX_512bit; 6777 } 6778 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6779 break; 6780 case T_INT: 6781 if (vlen == 8) { 6782 if ($dst$$XMMRegister != $src$$XMMRegister) { 6783 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6784 } 6785 } else if (vlen == 16) { 6786 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 6787 } else if (vlen == 32) { 6788 if (UseAVX > 2) { 6789 if (!VM_Version::supports_avx512vl()) { 6790 vlen_enc = Assembler::AVX_512bit; 6791 } 6792 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6793 } else { 6794 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 6795 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 6796 } 6797 } else { // vlen == 64 6798 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6799 } 6800 break; 6801 case T_FLOAT: 6802 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 6803 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6804 break; 6805 case T_DOUBLE: 6806 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 6807 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6808 break; 6809 6810 default: assert(false, "%s", type2name(to_elem_bt)); 6811 } 6812 %} 6813 ins_pipe( pipe_slow ); 6814 %} 6815 6816 instruct vcastFtoD_reg(vec dst, vec src) %{ 6817 predicate(vector_element_basic_type(n) == T_DOUBLE); 6818 match(Set dst (VectorCastF2X src)); 6819 format %{ "vector_cast_f2x $dst,$src\t!" %} 6820 ins_encode %{ 6821 int vlen_enc = vector_length_encoding(this); 6822 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 instruct vcastDtoF_reg(vec dst, vec src) %{ 6828 predicate(vector_element_basic_type(n) == T_FLOAT); 6829 match(Set dst (VectorCastD2X src)); 6830 format %{ "vector_cast_d2x $dst,$src\t!" %} 6831 ins_encode %{ 6832 int vlen_enc = vector_length_encoding(this, $src); 6833 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6834 %} 6835 ins_pipe( pipe_slow ); 6836 %} 6837 6838 // --------------------------------- VectorMaskCmp -------------------------------------- 6839 6840 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 6841 predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 6842 vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 6843 is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 6844 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6845 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 6846 ins_encode %{ 6847 int vlen_enc = vector_length_encoding(this, $src1); 6848 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 6849 if (vector_element_basic_type(this, $src1) == T_FLOAT) { 6850 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6851 } else { 6852 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6853 } 6854 %} 6855 ins_pipe( pipe_slow ); 6856 %} 6857 6858 instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{ 6859 predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 6860 is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 6861 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6862 effect(TEMP scratch); 6863 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6864 ins_encode %{ 6865 int vlen_enc = Assembler::AVX_512bit; 6866 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 6867 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 6868 KRegister mask = k0; // The comparison itself is not being masked. 6869 if (vector_element_basic_type(this, $src1) == T_FLOAT) { 6870 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6871 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 6872 } else { 6873 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6874 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 6875 } 6876 %} 6877 ins_pipe( pipe_slow ); 6878 %} 6879 6880 instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{ 6881 predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 6882 vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 6883 is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 6884 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6885 effect(TEMP scratch); 6886 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6887 ins_encode %{ 6888 int vlen_enc = vector_length_encoding(this, $src1); 6889 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 6890 Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1)); 6891 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{ 6897 predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 6898 is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 6899 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6900 effect(TEMP scratch); 6901 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6902 ins_encode %{ 6903 assert(UseAVX > 2, "required"); 6904 6905 int vlen_enc = Assembler::AVX_512bit; 6906 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 6907 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 6908 KRegister mask = k0; // The comparison itself is not being masked. 6909 bool merge = false; 6910 BasicType src1_elem_bt = vector_element_basic_type(this, $src1); 6911 6912 switch (src1_elem_bt) { 6913 case T_BYTE: { 6914 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6915 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 6916 break; 6917 } 6918 case T_SHORT: { 6919 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6920 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 6921 break; 6922 } 6923 case T_INT: { 6924 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6925 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 6926 break; 6927 } 6928 case T_LONG: { 6929 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6930 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 6931 break; 6932 } 6933 6934 default: assert(false, "%s", type2name(src1_elem_bt)); 6935 } 6936 %} 6937 ins_pipe( pipe_slow ); 6938 %} 6939 6940 // Extract 6941 6942 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 6943 predicate(vector_length_in_bytes(n->in(1)) <= 16); // src 6944 match(Set dst (ExtractI src idx)); 6945 match(Set dst (ExtractS src idx)); 6946 #ifdef _LP64 6947 match(Set dst (ExtractB src idx)); 6948 #endif 6949 ins_encode %{ 6950 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6951 6952 BasicType elem_bt = vector_element_basic_type(this, $src); 6953 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 6954 %} 6955 ins_pipe( pipe_slow ); 6956 %} 6957 6958 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 6959 predicate(vector_length_in_bytes(n->in(1)) == 32 || // src 6960 vector_length_in_bytes(n->in(1)) == 64); // src 6961 match(Set dst (ExtractI src idx)); 6962 match(Set dst (ExtractS src idx)); 6963 #ifdef _LP64 6964 match(Set dst (ExtractB src idx)); 6965 #endif 6966 effect(TEMP vtmp); 6967 ins_encode %{ 6968 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6969 6970 BasicType elem_bt = vector_element_basic_type(this, $src); 6971 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 6972 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 #ifdef _LP64 6978 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 6979 predicate(vector_length(n->in(1)) <= 2); // src 6980 match(Set dst (ExtractL src idx)); 6981 ins_encode %{ 6982 assert(UseSSE >= 4, "required"); 6983 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6984 6985 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 6986 %} 6987 ins_pipe( pipe_slow ); 6988 %} 6989 6990 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 6991 predicate(vector_length(n->in(1)) == 4 || // src 6992 vector_length(n->in(1)) == 8); // src 6993 match(Set dst (ExtractL src idx)); 6994 effect(TEMP vtmp); 6995 ins_encode %{ 6996 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 6997 6998 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 6999 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7000 %} 7001 ins_pipe( pipe_slow ); 7002 %} 7003 #endif 7004 7005 instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7006 predicate(vector_length(n->in(1)) <= 4); 7007 match(Set dst (ExtractF src idx)); 7008 effect(TEMP dst, TEMP tmp, TEMP vtmp); 7009 ins_encode %{ 7010 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7011 7012 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); 7013 %} 7014 ins_pipe( pipe_slow ); 7015 %} 7016 7017 instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7018 predicate(vector_length(n->in(1)/*src*/) == 8 || 7019 vector_length(n->in(1)/*src*/) == 16); 7020 match(Set dst (ExtractF src idx)); 7021 effect(TEMP tmp, TEMP vtmp); 7022 ins_encode %{ 7023 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7024 7025 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7026 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); 7027 %} 7028 ins_pipe( pipe_slow ); 7029 %} 7030 7031 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7032 predicate(vector_length(n->in(1)) == 2); // src 7033 match(Set dst (ExtractD src idx)); 7034 ins_encode %{ 7035 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7036 7037 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7038 %} 7039 ins_pipe( pipe_slow ); 7040 %} 7041 7042 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7043 predicate(vector_length(n->in(1)) == 4 || // src 7044 vector_length(n->in(1)) == 8); // src 7045 match(Set dst (ExtractD src idx)); 7046 effect(TEMP vtmp); 7047 ins_encode %{ 7048 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7049 7050 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7051 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7052 %} 7053 ins_pipe( pipe_slow ); 7054 %} 7055 7056 // --------------------------------- Vector Blend -------------------------------------- 7057 7058 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7059 predicate(UseAVX == 0); 7060 match(Set dst (VectorBlend (Binary dst src) mask)); 7061 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7062 effect(TEMP tmp); 7063 ins_encode %{ 7064 assert(UseSSE >= 4, "required"); 7065 7066 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7067 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7068 } 7069 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7070 %} 7071 ins_pipe( pipe_slow ); 7072 %} 7073 7074 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7075 predicate(UseAVX > 0 && 7076 vector_length_in_bytes(n) <= 32 && 7077 is_integral_type(vector_element_basic_type(n))); 7078 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7079 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7080 ins_encode %{ 7081 int vlen_enc = vector_length_encoding(this); 7082 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7083 %} 7084 ins_pipe( pipe_slow ); 7085 %} 7086 7087 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7088 predicate(UseAVX > 0 && 7089 vector_length_in_bytes(n) <= 32 && 7090 !is_integral_type(vector_element_basic_type(n))); 7091 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7092 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7093 ins_encode %{ 7094 int vlen_enc = vector_length_encoding(this); 7095 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7096 %} 7097 ins_pipe( pipe_slow ); 7098 %} 7099 7100 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch) %{ 7101 predicate(vector_length_in_bytes(n) == 64); 7102 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7103 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 7104 effect(TEMP scratch); 7105 ins_encode %{ 7106 int vlen_enc = Assembler::AVX_512bit; 7107 BasicType elem_bt = vector_element_basic_type(this); 7108 KRegister ktmp = k2; 7109 __ evpcmp(elem_bt, ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); 7110 __ evpblend(elem_bt, $dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7111 %} 7112 ins_pipe( pipe_slow ); 7113 %} 7114 7115 // --------------------------------- ABS -------------------------------------- 7116 // a = |a| 7117 instruct vabsB_reg(vec dst, vec src) %{ 7118 match(Set dst (AbsVB src)); 7119 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7120 ins_encode %{ 7121 uint vlen = vector_length(this); 7122 if (vlen <= 16) { 7123 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7124 } else { 7125 int vlen_enc = vector_length_encoding(this); 7126 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7127 } 7128 %} 7129 ins_pipe( pipe_slow ); 7130 %} 7131 7132 instruct vabsS_reg(vec dst, vec src) %{ 7133 match(Set dst (AbsVS src)); 7134 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7135 ins_encode %{ 7136 uint vlen = vector_length(this); 7137 if (vlen <= 8) { 7138 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7139 } else { 7140 int vlen_enc = vector_length_encoding(this); 7141 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7142 } 7143 %} 7144 ins_pipe( pipe_slow ); 7145 %} 7146 7147 instruct vabsI_reg(vec dst, vec src) %{ 7148 match(Set dst (AbsVI src)); 7149 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7150 ins_encode %{ 7151 uint vlen = vector_length(this); 7152 if (vlen <= 4) { 7153 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7154 } else { 7155 int vlen_enc = vector_length_encoding(this); 7156 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7157 } 7158 %} 7159 ins_pipe( pipe_slow ); 7160 %} 7161 7162 instruct vabsL_reg(vec dst, vec src) %{ 7163 match(Set dst (AbsVL src)); 7164 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7165 ins_encode %{ 7166 assert(UseAVX > 2, "required"); 7167 int vlen_enc = vector_length_encoding(this); 7168 if (!VM_Version::supports_avx512vl()) { 7169 vlen_enc = Assembler::AVX_512bit; 7170 } 7171 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7172 %} 7173 ins_pipe( pipe_slow ); 7174 %} 7175 7176 // --------------------------------- ABSNEG -------------------------------------- 7177 7178 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 7179 predicate(vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7180 match(Set dst (AbsVF src)); 7181 match(Set dst (NegVF src)); 7182 effect(TEMP scratch); 7183 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7184 ins_cost(150); 7185 ins_encode %{ 7186 int opcode = this->ideal_Opcode(); 7187 int vlen = vector_length(this); 7188 if (vlen == 2) { 7189 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7190 } else { 7191 assert(vlen == 8 || vlen == 16, "required"); 7192 int vlen_enc = vector_length_encoding(this); 7193 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7194 } 7195 %} 7196 ins_pipe( pipe_slow ); 7197 %} 7198 7199 instruct vabsneg4F(vec dst, rRegI scratch) %{ 7200 predicate(vector_length(n) == 4); 7201 match(Set dst (AbsVF dst)); 7202 match(Set dst (NegVF dst)); 7203 effect(TEMP scratch); 7204 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7205 ins_cost(150); 7206 ins_encode %{ 7207 int opcode = this->ideal_Opcode(); 7208 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 7209 %} 7210 ins_pipe( pipe_slow ); 7211 %} 7212 7213 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 7214 match(Set dst (AbsVD src)); 7215 match(Set dst (NegVD src)); 7216 effect(TEMP scratch); 7217 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7218 ins_encode %{ 7219 int opcode = this->ideal_Opcode(); 7220 uint vlen = vector_length(this); 7221 if (vlen == 2) { 7222 assert(UseSSE >= 2, "required"); 7223 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7224 } else { 7225 int vlen_enc = vector_length_encoding(this); 7226 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7227 } 7228 %} 7229 ins_pipe( pipe_slow ); 7230 %} 7231 7232 //------------------------------------- VectorTest -------------------------------------------- 7233 7234 #ifdef _LP64 7235 instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7236 predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7237 match(Set dst (VectorTest src1 src2 )); 7238 effect(KILL cr); 7239 format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} 7240 ins_encode %{ 7241 int vlen = vector_length_in_bytes(this, $src1); 7242 int vlen_enc = vector_length_encoding(vlen); 7243 if (vlen <= 32) { 7244 if (UseAVX == 0) { 7245 assert(vlen <= 16, "required"); 7246 __ ptest($src1$$XMMRegister, $src2$$XMMRegister); 7247 } else { 7248 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7249 } 7250 } else { 7251 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 7252 __ evpcmpeqb(ktmp, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7253 __ kortestql(ktmp, ktmp); 7254 } 7255 __ setb(Assembler::carrySet, $dst$$Register); 7256 __ movzbl($dst$$Register, $dst$$Register); 7257 %} 7258 ins_pipe( pipe_slow ); 7259 %} 7260 7261 instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7262 predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7263 match(Set dst (VectorTest src1 src2 )); 7264 effect(KILL cr); 7265 format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} 7266 ins_encode %{ 7267 int vlen = vector_length_in_bytes(this, $src1); 7268 int vlen_enc = vector_length_encoding(vlen); 7269 if (vlen <= 32) { 7270 if (UseAVX == 0) { 7271 assert(vlen <= 16, "required"); 7272 __ ptest($src1$$XMMRegister, $src2$$XMMRegister); 7273 } else { 7274 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7275 } 7276 } else { 7277 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 7278 __ evpcmpeqb(ktmp, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7279 __ ktestql(ktmp, ktmp); 7280 } 7281 __ setb(Assembler::notZero, $dst$$Register); 7282 __ movzbl($dst$$Register, $dst$$Register); 7283 %} 7284 ins_pipe( pipe_slow ); 7285 %} 7286 #endif 7287 7288 //------------------------------------- LoadMask -------------------------------------------- 7289 7290 instruct loadMask(vec dst, vec src) %{ 7291 match(Set dst (VectorLoadMask src)); 7292 effect(TEMP dst); 7293 format %{ "vector_loadmask_byte $dst,$src\n\t" %} 7294 ins_encode %{ 7295 int vlen_in_bytes = vector_length_in_bytes(this); 7296 BasicType elem_bt = vector_element_basic_type(this); 7297 7298 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt); 7299 %} 7300 ins_pipe( pipe_slow ); 7301 %} 7302 7303 //------------------------------------- StoreMask -------------------------------------------- 7304 7305 instruct storeMask1B(vec dst, vec src, immI_1 size) %{ 7306 predicate(vector_length(n) < 64 || VM_Version::supports_avx512vlbw()); 7307 match(Set dst (VectorStoreMask src size)); 7308 format %{ "vector_store_mask $dst,$src\t!" %} 7309 ins_encode %{ 7310 assert(UseSSE >= 3, "required"); 7311 if (vector_length_in_bytes(this) <= 16) { 7312 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7313 } else { 7314 assert(UseAVX >= 2, "required"); 7315 int src_vlen_enc = vector_length_encoding(this, $src); 7316 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7317 } 7318 %} 7319 ins_pipe( pipe_slow ); 7320 %} 7321 7322 instruct storeMask2B(vec dst, vec src, immI_2 size) %{ 7323 predicate(vector_length(n) <= 8); 7324 match(Set dst (VectorStoreMask src size)); 7325 format %{ "vector_store_mask $dst,$src\n\t" %} 7326 ins_encode %{ 7327 assert(UseSSE >= 3, "required"); 7328 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7329 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7330 %} 7331 ins_pipe( pipe_slow ); 7332 %} 7333 7334 instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{ 7335 predicate(vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 7336 match(Set dst (VectorStoreMask src size)); 7337 effect(TEMP dst); 7338 format %{ "vector_store_mask $dst,$src\t!" %} 7339 ins_encode %{ 7340 int vlen_enc = Assembler::AVX_128bit; 7341 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 7342 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister,vlen_enc); 7343 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7344 %} 7345 ins_pipe( pipe_slow ); 7346 %} 7347 7348 instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{ 7349 predicate(VM_Version::supports_avx512bw()); 7350 match(Set dst (VectorStoreMask src size)); 7351 format %{ "vector_store_mask $dst,$src\t!" %} 7352 ins_encode %{ 7353 int src_vlen_enc = vector_length_encoding(this, $src); 7354 int dst_vlen_enc = vector_length_encoding(this); 7355 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7356 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7357 %} 7358 ins_pipe( pipe_slow ); 7359 %} 7360 7361 instruct storeMask4B(vec dst, vec src, immI_4 size) %{ 7362 predicate (vector_length(n) <= 4 && UseAVX <= 2); 7363 match(Set dst (VectorStoreMask src size)); 7364 format %{ "vector_store_mask $dst,$src\t!" %} 7365 ins_encode %{ 7366 assert(UseSSE >= 3, "required"); 7367 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7368 __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); 7369 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7370 %} 7371 ins_pipe( pipe_slow ); 7372 %} 7373 7374 instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{ 7375 predicate(vector_length(n) == 8 && UseAVX <= 2); 7376 match(Set dst (VectorStoreMask src size)); 7377 format %{ "vector_store_mask $dst,$src\t!" %} 7378 effect(TEMP dst); 7379 ins_encode %{ 7380 int vlen_enc = Assembler::AVX_128bit; 7381 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 7382 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7383 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7384 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7385 %} 7386 ins_pipe( pipe_slow ); 7387 %} 7388 7389 instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{ 7390 predicate(UseAVX > 2); 7391 match(Set dst (VectorStoreMask src size)); 7392 format %{ "vector_store_mask $dst,$src\t!" %} 7393 ins_encode %{ 7394 int src_vlen_enc = vector_length_encoding(this, $src); 7395 int dst_vlen_enc = vector_length_encoding(this); 7396 if (!VM_Version::supports_avx512vl()) { 7397 src_vlen_enc = Assembler::AVX_512bit; 7398 } 7399 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7400 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7401 %} 7402 ins_pipe( pipe_slow ); 7403 %} 7404 7405 instruct storeMask8B(vec dst, vec src, immI_8 size) %{ 7406 predicate(vector_length(n) == 2 && UseAVX <= 2); 7407 match(Set dst (VectorStoreMask src size)); 7408 format %{ "vector_store_mask $dst,$src\t!" %} 7409 ins_encode %{ 7410 assert(UseSSE >= 3, "required"); 7411 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 7412 __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); 7413 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7414 __ pabsb($dst$$XMMRegister, $dst$$XMMRegister); 7415 %} 7416 ins_pipe( pipe_slow ); 7417 %} 7418 7419 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{ 7420 predicate(vector_length(n) == 4 && UseAVX <= 2); 7421 match(Set dst (VectorStoreMask src size)); 7422 format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %} 7423 effect(TEMP dst, TEMP vtmp); 7424 ins_encode %{ 7425 int vlen_enc = Assembler::AVX_128bit; 7426 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 7427 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7428 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 7429 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7430 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7431 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7432 %} 7433 ins_pipe( pipe_slow ); 7434 %} 7435 7436 instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{ 7437 predicate(UseAVX > 2); 7438 match(Set dst (VectorStoreMask src size)); 7439 format %{ "vector_store_mask $dst,$src\t!" %} 7440 ins_encode %{ 7441 int src_vlen_enc = vector_length_encoding(this, $src); 7442 int dst_vlen_enc = vector_length_encoding(this); 7443 if (!VM_Version::supports_avx512vl()) { 7444 src_vlen_enc = Assembler::AVX_512bit; 7445 } 7446 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7447 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7448 %} 7449 ins_pipe( pipe_slow ); 7450 %} 7451 7452 //-------------------------------- Load Iota Indices ---------------------------------- 7453 7454 instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ 7455 predicate(vector_element_basic_type(n) == T_BYTE); 7456 match(Set dst (VectorLoadConst src)); 7457 effect(TEMP scratch); 7458 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 7459 ins_encode %{ 7460 int vlen_in_bytes = vector_length_in_bytes(this); 7461 __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); 7462 %} 7463 ins_pipe( pipe_slow ); 7464 %} 7465 7466 //-------------------------------- Rearrange ---------------------------------- 7467 7468 // LoadShuffle/Rearrange for Byte 7469 7470 instruct loadShuffleB(vec dst) %{ 7471 predicate(vector_element_basic_type(n) == T_BYTE); 7472 match(Set dst (VectorLoadShuffle dst)); 7473 format %{ "vector_load_shuffle $dst, $dst" %} 7474 ins_encode %{ 7475 // empty 7476 %} 7477 ins_pipe( pipe_slow ); 7478 %} 7479 7480 instruct rearrangeB(vec dst, vec shuffle) %{ 7481 predicate(vector_element_basic_type(n) == T_BYTE && 7482 vector_length(n) < 32); 7483 match(Set dst (VectorRearrange dst shuffle)); 7484 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7485 ins_encode %{ 7486 assert(UseSSE >= 4, "required"); 7487 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7488 %} 7489 ins_pipe( pipe_slow ); 7490 %} 7491 7492 instruct rearrangeB_avx(vec dst, vec src, vec shuffle) %{ 7493 predicate(vector_element_basic_type(n) == T_BYTE && 7494 vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 7495 match(Set dst (VectorRearrange src shuffle)); 7496 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7497 ins_encode %{ 7498 __ vpshufb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, Assembler::AVX_256bit); 7499 %} 7500 ins_pipe( pipe_slow ); 7501 %} 7502 7503 instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ 7504 predicate(vector_element_basic_type(n) == T_BYTE && 7505 vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 7506 match(Set dst (VectorRearrange src shuffle)); 7507 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7508 ins_encode %{ 7509 int vlen_enc = vector_length_encoding(this); 7510 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7511 %} 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 // LoadShuffle/Rearrange for Short 7516 7517 instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7518 predicate(vector_element_basic_type(n) == T_SHORT && 7519 vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 7520 match(Set dst (VectorLoadShuffle src)); 7521 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7522 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7523 ins_encode %{ 7524 // Create a byte shuffle mask from short shuffle mask 7525 // only byte shuffle instruction available on these platforms 7526 7527 // Multiply each shuffle by two to get byte index 7528 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 7529 __ psllw($vtmp$$XMMRegister, 1); 7530 7531 // Duplicate to create 2 copies of byte index 7532 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 7533 __ psllw($dst$$XMMRegister, 8); 7534 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 7535 7536 // Add one to get alternate byte index 7537 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); 7538 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 7539 %} 7540 ins_pipe( pipe_slow ); 7541 %} 7542 7543 instruct rearrangeS(vec dst, vec shuffle) %{ 7544 predicate(vector_element_basic_type(n) == T_SHORT && 7545 vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 7546 match(Set dst (VectorRearrange dst shuffle)); 7547 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7548 ins_encode %{ 7549 assert(UseSSE >= 4, "required"); 7550 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7551 %} 7552 ins_pipe( pipe_slow ); 7553 %} 7554 7555 instruct loadShuffleS_evex(vec dst, vec src) %{ 7556 predicate(vector_element_basic_type(n) == T_SHORT && 7557 VM_Version::supports_avx512bw()); 7558 match(Set dst (VectorLoadShuffle src)); 7559 format %{ "vector_load_shuffle $dst, $src" %} 7560 ins_encode %{ 7561 int vlen_enc = vector_length_encoding(this); 7562 if (!VM_Version::supports_avx512vl()) { 7563 vlen_enc = Assembler::AVX_512bit; 7564 } 7565 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 7571 predicate(vector_element_basic_type(n) == T_SHORT && 7572 VM_Version::supports_avx512bw()); 7573 match(Set dst (VectorRearrange src shuffle)); 7574 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7575 ins_encode %{ 7576 int vlen_enc = vector_length_encoding(this); 7577 if (!VM_Version::supports_avx512vl()) { 7578 vlen_enc = Assembler::AVX_512bit; 7579 } 7580 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7581 %} 7582 ins_pipe( pipe_slow ); 7583 %} 7584 7585 // LoadShuffle/Rearrange for Integer and Float 7586 7587 instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7588 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7589 vector_length(n) == 4 && UseAVX < 2); 7590 match(Set dst (VectorLoadShuffle src)); 7591 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7592 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7593 ins_encode %{ 7594 assert(UseSSE >= 4, "required"); 7595 7596 // Create a byte shuffle mask from int shuffle mask 7597 // only byte shuffle instruction available on these platforms 7598 7599 // Duplicate and multiply each shuffle by 4 7600 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 7601 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 7602 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 7603 __ psllw($vtmp$$XMMRegister, 2); 7604 7605 // Duplicate again to create 4 copies of byte index 7606 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 7607 __ psllw($dst$$XMMRegister, 8); 7608 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 7609 7610 // Add 3,2,1,0 to get alternate byte index 7611 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); 7612 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 7613 %} 7614 ins_pipe( pipe_slow ); 7615 %} 7616 7617 instruct rearrangeI(vec dst, vec shuffle) %{ 7618 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7619 vector_length(n) == 4 && UseAVX < 2); 7620 match(Set dst (VectorRearrange dst shuffle)); 7621 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7622 ins_encode %{ 7623 assert(UseSSE >= 4, "required"); 7624 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7625 %} 7626 ins_pipe( pipe_slow ); 7627 %} 7628 7629 instruct loadShuffleI_avx(vec dst, vec src) %{ 7630 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7631 UseAVX >= 2); 7632 match(Set dst (VectorLoadShuffle src)); 7633 format %{ "vector_load_shuffle $dst, $src" %} 7634 ins_encode %{ 7635 int vlen_enc = vector_length_encoding(this); 7636 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7637 %} 7638 ins_pipe( pipe_slow ); 7639 %} 7640 7641 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 7642 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7643 UseAVX >= 2); 7644 match(Set dst (VectorRearrange src shuffle)); 7645 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7646 ins_encode %{ 7647 int vlen_enc = vector_length_encoding(this); 7648 if (vlen_enc == Assembler::AVX_128bit) { 7649 vlen_enc = Assembler::AVX_256bit; 7650 } 7651 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7652 %} 7653 ins_pipe( pipe_slow ); 7654 %} 7655 7656 // LoadShuffle/Rearrange for Long and Double 7657 7658 instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7659 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7660 vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 7661 match(Set dst (VectorLoadShuffle src)); 7662 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7663 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7664 ins_encode %{ 7665 assert(UseAVX >= 2, "required"); 7666 7667 int vlen_enc = vector_length_encoding(this); 7668 // Create a double word shuffle mask from long shuffle mask 7669 // only double word shuffle instruction available on these platforms 7670 7671 // Multiply each shuffle by two to get double word index 7672 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 7673 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 7674 7675 // Duplicate each double word shuffle 7676 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 7677 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7678 7679 // Add one to get alternate double word index 7680 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register); 7681 %} 7682 ins_pipe( pipe_slow ); 7683 %} 7684 7685 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 7686 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7687 vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 7688 match(Set dst (VectorRearrange src shuffle)); 7689 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7690 ins_encode %{ 7691 assert(UseAVX >= 2, "required"); 7692 7693 int vlen_enc = vector_length_encoding(this); 7694 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7695 %} 7696 ins_pipe( pipe_slow ); 7697 %} 7698 7699 instruct loadShuffleL_evex(vec dst, vec src) %{ 7700 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7701 (vector_length(n) == 8 || VM_Version::supports_avx512vl())); 7702 match(Set dst (VectorLoadShuffle src)); 7703 format %{ "vector_load_shuffle $dst, $src" %} 7704 ins_encode %{ 7705 assert(UseAVX > 2, "required"); 7706 7707 int vlen_enc = vector_length_encoding(this); 7708 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7709 %} 7710 ins_pipe( pipe_slow ); 7711 %} 7712 7713 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 7714 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7715 (vector_length(n) == 8 || VM_Version::supports_avx512vl())); 7716 match(Set dst (VectorRearrange src shuffle)); 7717 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7718 ins_encode %{ 7719 assert(UseAVX > 2, "required"); 7720 7721 int vlen_enc = vector_length_encoding(this); 7722 if (vlen_enc == Assembler::AVX_128bit) { 7723 vlen_enc = Assembler::AVX_256bit; 7724 } 7725 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7726 %} 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 // --------------------------------- FMA -------------------------------------- 7731 // a * b + c 7732 7733 instruct vfmaF_reg(vec a, vec b, vec c) %{ 7734 match(Set c (FmaVF c (Binary a b))); 7735 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 7736 ins_cost(150); 7737 ins_encode %{ 7738 assert(UseFMA, "not enabled"); 7739 int vlen_enc = vector_length_encoding(this); 7740 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vfmaF_mem(vec a, memory b, vec c) %{ 7746 match(Set c (FmaVF c (Binary a (LoadVector b)))); 7747 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 7748 ins_cost(150); 7749 ins_encode %{ 7750 assert(UseFMA, "not enabled"); 7751 int vlen_enc = vector_length_encoding(this); 7752 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 7753 %} 7754 ins_pipe( pipe_slow ); 7755 %} 7756 7757 instruct vfmaD_reg(vec a, vec b, vec c) %{ 7758 match(Set c (FmaVD c (Binary a b))); 7759 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 7760 ins_cost(150); 7761 ins_encode %{ 7762 assert(UseFMA, "not enabled"); 7763 int vlen_enc = vector_length_encoding(this); 7764 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 7765 %} 7766 ins_pipe( pipe_slow ); 7767 %} 7768 7769 instruct vfmaD_mem(vec a, memory b, vec c) %{ 7770 match(Set c (FmaVD c (Binary a (LoadVector b)))); 7771 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 7772 ins_cost(150); 7773 ins_encode %{ 7774 assert(UseFMA, "not enabled"); 7775 int vlen_enc = vector_length_encoding(this); 7776 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 7777 %} 7778 ins_pipe( pipe_slow ); 7779 %} 7780 7781 // --------------------------------- Vector Multiply Add -------------------------------------- 7782 7783 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 7784 predicate(UseAVX == 0); 7785 match(Set dst (MulAddVS2VI dst src1)); 7786 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 7787 ins_encode %{ 7788 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 7789 %} 7790 ins_pipe( pipe_slow ); 7791 %} 7792 7793 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 7794 predicate(UseAVX > 0); 7795 match(Set dst (MulAddVS2VI src1 src2)); 7796 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 7797 ins_encode %{ 7798 int vlen_enc = vector_length_encoding(this); 7799 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7800 %} 7801 ins_pipe( pipe_slow ); 7802 %} 7803 7804 // --------------------------------- Vector Multiply Add Add ---------------------------------- 7805 7806 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 7807 predicate(VM_Version::supports_avx512_vnni()); 7808 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 7809 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 7810 ins_encode %{ 7811 assert(UseAVX > 2, "required"); 7812 int vlen_enc = vector_length_encoding(this); 7813 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7814 %} 7815 ins_pipe( pipe_slow ); 7816 ins_cost(10); 7817 %} 7818 7819 // --------------------------------- PopCount -------------------------------------- 7820 7821 instruct vpopcountI(vec dst, vec src) %{ 7822 match(Set dst (PopCountVI src)); 7823 format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %} 7824 ins_encode %{ 7825 assert(UsePopCountInstruction, "not enabled"); 7826 7827 int vlen_enc = vector_length_encoding(this); 7828 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7829 %} 7830 ins_pipe( pipe_slow ); 7831 %} 7832 7833 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 7834 7835 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 7836 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 7837 effect(TEMP dst); 7838 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 7839 ins_encode %{ 7840 int vector_len = vector_length_encoding(this); 7841 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 7842 %} 7843 ins_pipe( pipe_slow ); 7844 %} 7845 7846 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 7847 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 7848 effect(TEMP dst); 7849 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 7850 ins_encode %{ 7851 int vector_len = vector_length_encoding(this); 7852 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 7853 %} 7854 ins_pipe( pipe_slow ); 7855 %}