1 // 2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 %} 1101 1102 1103 //----------SOURCE BLOCK------------------------------------------------------- 1104 // This is a block of C++ code which provides values, functions, and 1105 // definitions necessary in the rest of the architecture description 1106 1107 source_hpp %{ 1108 // Header information of the source block. 1109 // Method declarations/definitions which are used outside 1110 // the ad-scope can conveniently be defined here. 1111 // 1112 // To keep related declarations/definitions/uses close together, 1113 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1114 1115 class NativeJump; 1116 1117 class CallStubImpl { 1118 1119 //-------------------------------------------------------------- 1120 //---< Used for optimization in Compile::shorten_branches >--- 1121 //-------------------------------------------------------------- 1122 1123 public: 1124 // Size of call trampoline stub. 1125 static uint size_call_trampoline() { 1126 return 0; // no call trampolines on this platform 1127 } 1128 1129 // number of relocations needed by a call trampoline stub 1130 static uint reloc_call_trampoline() { 1131 return 0; // no call trampolines on this platform 1132 } 1133 }; 1134 1135 class HandlerImpl { 1136 1137 public: 1138 1139 static int emit_exception_handler(CodeBuffer &cbuf); 1140 static int emit_deopt_handler(CodeBuffer& cbuf); 1141 1142 static uint size_exception_handler() { 1143 // NativeCall instruction size is the same as NativeJump. 1144 // exception handler starts out as jump and can be patched to 1145 // a call be deoptimization. (4932387) 1146 // Note that this value is also credited (in output.cpp) to 1147 // the size of the code section. 1148 return NativeJump::instruction_size; 1149 } 1150 1151 #ifdef _LP64 1152 static uint size_deopt_handler() { 1153 // three 5 byte instructions plus one move for unreachable address. 1154 return 15+3; 1155 } 1156 #else 1157 static uint size_deopt_handler() { 1158 // NativeCall instruction size is the same as NativeJump. 1159 // exception handler starts out as jump and can be patched to 1160 // a call be deoptimization. (4932387) 1161 // Note that this value is also credited (in output.cpp) to 1162 // the size of the code section. 1163 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1164 } 1165 #endif 1166 }; 1167 1168 class Node::PD { 1169 public: 1170 enum NodeFlags { 1171 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1172 _last_flag = Flag_intel_jcc_erratum 1173 }; 1174 }; 1175 1176 %} // end source_hpp 1177 1178 source %{ 1179 1180 #include "opto/addnode.hpp" 1181 #include "c2_intelJccErratum_x86.hpp" 1182 1183 void PhaseOutput::pd_perform_mach_node_analysis() { 1184 if (VM_Version::has_intel_jcc_erratum()) { 1185 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1186 _buf_sizes._code += extra_padding; 1187 } 1188 } 1189 1190 int MachNode::pd_alignment_required() const { 1191 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1192 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1193 return IntelJccErratum::largest_jcc_size() + 1; 1194 } else { 1195 return 1; 1196 } 1197 } 1198 1199 int MachNode::compute_padding(int current_offset) const { 1200 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1201 Compile* C = Compile::current(); 1202 PhaseOutput* output = C->output(); 1203 Block* block = output->block(); 1204 int index = output->index(); 1205 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1206 } else { 1207 return 0; 1208 } 1209 } 1210 1211 // Emit exception handler code. 1212 // Stuff framesize into a register and call a VM stub routine. 1213 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1214 1215 // Note that the code buffer's insts_mark is always relative to insts. 1216 // That's why we must use the macroassembler to generate a handler. 1217 C2_MacroAssembler _masm(&cbuf); 1218 address base = __ start_a_stub(size_exception_handler()); 1219 if (base == NULL) { 1220 ciEnv::current()->record_failure("CodeCache is full"); 1221 return 0; // CodeBuffer::expand failed 1222 } 1223 int offset = __ offset(); 1224 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1225 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1226 __ end_a_stub(); 1227 return offset; 1228 } 1229 1230 // Emit deopt handler code. 1231 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1232 1233 // Note that the code buffer's insts_mark is always relative to insts. 1234 // That's why we must use the macroassembler to generate a handler. 1235 C2_MacroAssembler _masm(&cbuf); 1236 address base = __ start_a_stub(size_deopt_handler()); 1237 if (base == NULL) { 1238 ciEnv::current()->record_failure("CodeCache is full"); 1239 return 0; // CodeBuffer::expand failed 1240 } 1241 int offset = __ offset(); 1242 1243 #ifdef _LP64 1244 address the_pc = (address) __ pc(); 1245 Label next; 1246 // push a "the_pc" on the stack without destroying any registers 1247 // as they all may be live. 1248 1249 // push address of "next" 1250 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1251 __ bind(next); 1252 // adjust it so it matches "the_pc" 1253 __ subptr(Address(rsp, 0), __ offset() - offset); 1254 #else 1255 InternalAddress here(__ pc()); 1256 __ pushptr(here.addr()); 1257 #endif 1258 1259 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1260 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1261 __ end_a_stub(); 1262 return offset; 1263 } 1264 1265 1266 //============================================================================= 1267 1268 // Float masks come from different places depending on platform. 1269 #ifdef _LP64 1270 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1271 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1272 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1273 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1274 #else 1275 static address float_signmask() { return (address)float_signmask_pool; } 1276 static address float_signflip() { return (address)float_signflip_pool; } 1277 static address double_signmask() { return (address)double_signmask_pool; } 1278 static address double_signflip() { return (address)double_signflip_pool; } 1279 #endif 1280 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1281 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1282 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1283 1284 //============================================================================= 1285 const bool Matcher::match_rule_supported(int opcode) { 1286 if (!has_match_rule(opcode)) { 1287 return false; // no match rule present 1288 } 1289 switch (opcode) { 1290 case Op_AbsVL: 1291 if (UseAVX < 3) { 1292 return false; 1293 } 1294 break; 1295 case Op_PopCountI: 1296 case Op_PopCountL: 1297 if (!UsePopCountInstruction) { 1298 return false; 1299 } 1300 break; 1301 case Op_PopCountVI: 1302 if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) { 1303 return false; 1304 } 1305 break; 1306 case Op_MulVI: 1307 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1308 return false; 1309 } 1310 break; 1311 case Op_MulVL: 1312 case Op_MulReductionVL: 1313 if (VM_Version::supports_avx512dq() == false) { 1314 return false; 1315 } 1316 break; 1317 case Op_AbsVB: 1318 case Op_AbsVS: 1319 case Op_AbsVI: 1320 case Op_AddReductionVI: 1321 case Op_AndReductionV: 1322 case Op_OrReductionV: 1323 case Op_XorReductionV: 1324 if (UseSSE < 3) { // requires at least SSSE3 1325 return false; 1326 } 1327 break; 1328 case Op_MulReductionVI: 1329 if (UseSSE < 4) { // requires at least SSE4 1330 return false; 1331 } 1332 break; 1333 case Op_SqrtVD: 1334 case Op_SqrtVF: 1335 if (UseAVX < 1) { // enabled for AVX only 1336 return false; 1337 } 1338 break; 1339 case Op_CompareAndSwapL: 1340 #ifdef _LP64 1341 case Op_CompareAndSwapP: 1342 #endif 1343 if (!VM_Version::supports_cx8()) { 1344 return false; 1345 } 1346 break; 1347 case Op_CMoveVF: 1348 case Op_CMoveVD: 1349 if (UseAVX < 1 || UseAVX > 2) { 1350 return false; 1351 } 1352 break; 1353 case Op_StrIndexOf: 1354 if (!UseSSE42Intrinsics) { 1355 return false; 1356 } 1357 break; 1358 case Op_StrIndexOfChar: 1359 if (!UseSSE42Intrinsics) { 1360 return false; 1361 } 1362 break; 1363 case Op_OnSpinWait: 1364 if (VM_Version::supports_on_spin_wait() == false) { 1365 return false; 1366 } 1367 break; 1368 case Op_MulVB: 1369 case Op_LShiftVB: 1370 case Op_RShiftVB: 1371 case Op_URShiftVB: 1372 if (UseSSE < 4) { 1373 return false; 1374 } 1375 break; 1376 #ifdef _LP64 1377 case Op_MaxD: 1378 case Op_MaxF: 1379 case Op_MinD: 1380 case Op_MinF: 1381 if (UseAVX < 1) { // enabled for AVX only 1382 return false; 1383 } 1384 break; 1385 #endif 1386 case Op_CacheWB: 1387 case Op_CacheWBPreSync: 1388 case Op_CacheWBPostSync: 1389 if (!VM_Version::supports_data_cache_line_flush()) { 1390 return false; 1391 } 1392 break; 1393 case Op_RoundDoubleMode: 1394 if (UseSSE < 4) { 1395 return false; 1396 } 1397 break; 1398 case Op_RoundDoubleModeV: 1399 if (VM_Version::supports_avx() == false) { 1400 return false; // 128bit vroundpd is not available 1401 } 1402 break; 1403 case Op_MacroLogicV: 1404 if (UseAVX < 3 || !UseVectorMacroLogic) { 1405 return false; 1406 } 1407 break; 1408 #ifndef _LP64 1409 case Op_AddReductionVF: 1410 case Op_AddReductionVD: 1411 case Op_MulReductionVF: 1412 case Op_MulReductionVD: 1413 if (UseSSE < 1) { // requires at least SSE 1414 return false; 1415 } 1416 break; 1417 case Op_MulAddVS2VI: 1418 case Op_RShiftVL: 1419 case Op_AbsVD: 1420 case Op_NegVD: 1421 if (UseSSE < 2) { 1422 return false; 1423 } 1424 break; 1425 #endif // !LP64 1426 } 1427 return true; // Match rules are supported by default. 1428 } 1429 1430 //------------------------------------------------------------------------ 1431 1432 // Identify extra cases that we might want to provide match rules for vector nodes and 1433 // other intrinsics guarded with vector length (vlen) and element type (bt). 1434 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1435 if (!match_rule_supported(opcode)) { 1436 return false; 1437 } 1438 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1439 // * SSE2 supports 128bit vectors for all types; 1440 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1441 // * AVX2 supports 256bit vectors for all types; 1442 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1443 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1444 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1445 // And MaxVectorSize is taken into account as well. 1446 if (!vector_size_supported(bt, vlen)) { 1447 return false; 1448 } 1449 // Special cases which require vector length follow: 1450 // * implementation limitations 1451 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1452 // * 128bit vroundpd instruction is present only in AVX1 1453 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1454 switch (opcode) { 1455 case Op_AbsVF: 1456 case Op_NegVF: 1457 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1458 return false; // 512bit vandps and vxorps are not available 1459 } 1460 break; 1461 case Op_AbsVD: 1462 case Op_NegVD: 1463 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1464 return false; // 512bit vandpd and vxorpd are not available 1465 } 1466 break; 1467 case Op_CMoveVF: 1468 if (vlen != 8) { 1469 return false; // implementation limitation (only vcmov8F_reg is present) 1470 } 1471 break; 1472 case Op_RotateRightV: 1473 case Op_RotateLeftV: 1474 case Op_MacroLogicV: 1475 if (!VM_Version::supports_evex() || 1476 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1477 return false; 1478 } 1479 break; 1480 case Op_CMoveVD: 1481 if (vlen != 4) { 1482 return false; // implementation limitation (only vcmov4D_reg is present) 1483 } 1484 break; 1485 } 1486 return true; // Per default match rules are supported. 1487 } 1488 1489 // x86 supports generic vector operands: vec and legVec. 1490 const bool Matcher::supports_generic_vector_operands = true; 1491 1492 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 1493 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 1494 bool legacy = (generic_opnd->opcode() == LEGVEC); 1495 if (!VM_Version::supports_avx512vlbwdq() && // KNL 1496 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 1497 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 1498 return new legVecZOper(); 1499 } 1500 if (legacy) { 1501 switch (ideal_reg) { 1502 case Op_VecS: return new legVecSOper(); 1503 case Op_VecD: return new legVecDOper(); 1504 case Op_VecX: return new legVecXOper(); 1505 case Op_VecY: return new legVecYOper(); 1506 case Op_VecZ: return new legVecZOper(); 1507 } 1508 } else { 1509 switch (ideal_reg) { 1510 case Op_VecS: return new vecSOper(); 1511 case Op_VecD: return new vecDOper(); 1512 case Op_VecX: return new vecXOper(); 1513 case Op_VecY: return new vecYOper(); 1514 case Op_VecZ: return new vecZOper(); 1515 } 1516 } 1517 ShouldNotReachHere(); 1518 return NULL; 1519 } 1520 1521 bool Matcher::is_generic_reg2reg_move(MachNode* m) { 1522 switch (m->rule()) { 1523 case MoveVec2Leg_rule: 1524 case MoveLeg2Vec_rule: 1525 return true; 1526 default: 1527 return false; 1528 } 1529 } 1530 1531 bool Matcher::is_generic_vector(MachOper* opnd) { 1532 switch (opnd->opcode()) { 1533 case VEC: 1534 case LEGVEC: 1535 return true; 1536 default: 1537 return false; 1538 } 1539 } 1540 1541 //------------------------------------------------------------------------ 1542 1543 const bool Matcher::has_predicated_vectors(void) { 1544 bool ret_value = false; 1545 if (UseAVX > 2) { 1546 ret_value = VM_Version::supports_avx512vl(); 1547 } 1548 1549 return ret_value; 1550 } 1551 1552 const int Matcher::float_pressure(int default_pressure_threshold) { 1553 int float_pressure_threshold = default_pressure_threshold; 1554 #ifdef _LP64 1555 if (UseAVX > 2) { 1556 // Increase pressure threshold on machines with AVX3 which have 1557 // 2x more XMM registers. 1558 float_pressure_threshold = default_pressure_threshold * 2; 1559 } 1560 #endif 1561 return float_pressure_threshold; 1562 } 1563 1564 // Max vector size in bytes. 0 if not supported. 1565 const int Matcher::vector_width_in_bytes(BasicType bt) { 1566 assert(is_java_primitive(bt), "only primitive type vectors"); 1567 if (UseSSE < 2) return 0; 1568 // SSE2 supports 128bit vectors for all types. 1569 // AVX2 supports 256bit vectors for all types. 1570 // AVX2/EVEX supports 512bit vectors for all types. 1571 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1572 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1573 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1574 size = (UseAVX > 2) ? 64 : 32; 1575 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1576 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1577 // Use flag to limit vector size. 1578 size = MIN2(size,(int)MaxVectorSize); 1579 // Minimum 2 values in vector (or 4 for bytes). 1580 switch (bt) { 1581 case T_DOUBLE: 1582 case T_LONG: 1583 if (size < 16) return 0; 1584 break; 1585 case T_FLOAT: 1586 case T_INT: 1587 if (size < 8) return 0; 1588 break; 1589 case T_BOOLEAN: 1590 if (size < 4) return 0; 1591 break; 1592 case T_CHAR: 1593 if (size < 4) return 0; 1594 break; 1595 case T_BYTE: 1596 if (size < 4) return 0; 1597 break; 1598 case T_SHORT: 1599 if (size < 4) return 0; 1600 break; 1601 default: 1602 ShouldNotReachHere(); 1603 } 1604 return size; 1605 } 1606 1607 // Limits on vector size (number of elements) loaded into vector. 1608 const int Matcher::max_vector_size(const BasicType bt) { 1609 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1610 } 1611 const int Matcher::min_vector_size(const BasicType bt) { 1612 int max_size = max_vector_size(bt); 1613 // Min size which can be loaded into vector is 4 bytes. 1614 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1615 return MIN2(size,max_size); 1616 } 1617 1618 // Vector ideal reg corresponding to specified size in bytes 1619 const uint Matcher::vector_ideal_reg(int size) { 1620 assert(MaxVectorSize >= size, ""); 1621 switch(size) { 1622 case 4: return Op_VecS; 1623 case 8: return Op_VecD; 1624 case 16: return Op_VecX; 1625 case 32: return Op_VecY; 1626 case 64: return Op_VecZ; 1627 } 1628 ShouldNotReachHere(); 1629 return 0; 1630 } 1631 1632 // x86 supports misaligned vectors store/load. 1633 const bool Matcher::misaligned_vectors_ok() { 1634 return true; 1635 } 1636 1637 // x86 AES instructions are compatible with SunJCE expanded 1638 // keys, hence we do not need to pass the original key to stubs 1639 const bool Matcher::pass_original_key_for_aes() { 1640 return false; 1641 } 1642 1643 1644 const bool Matcher::convi2l_type_required = true; 1645 1646 // Check for shift by small constant as well 1647 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1648 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1649 shift->in(2)->get_int() <= 3 && 1650 // Are there other uses besides address expressions? 1651 !matcher->is_visited(shift)) { 1652 address_visited.set(shift->_idx); // Flag as address_visited 1653 mstack.push(shift->in(2), Matcher::Visit); 1654 Node *conv = shift->in(1); 1655 #ifdef _LP64 1656 // Allow Matcher to match the rule which bypass 1657 // ConvI2L operation for an array index on LP64 1658 // if the index value is positive. 1659 if (conv->Opcode() == Op_ConvI2L && 1660 conv->as_Type()->type()->is_long()->_lo >= 0 && 1661 // Are there other uses besides address expressions? 1662 !matcher->is_visited(conv)) { 1663 address_visited.set(conv->_idx); // Flag as address_visited 1664 mstack.push(conv->in(1), Matcher::Pre_Visit); 1665 } else 1666 #endif 1667 mstack.push(conv, Matcher::Pre_Visit); 1668 return true; 1669 } 1670 return false; 1671 } 1672 1673 // This function identifies sub-graphs in which a 'load' node is 1674 // input to two different nodes, and such that it can be matched 1675 // with BMI instructions like blsi, blsr, etc. 1676 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 1677 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 1678 // refers to the same node. 1679 // 1680 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 1681 // This is a temporary solution until we make DAGs expressible in ADL. 1682 template<typename ConType> 1683 class FusedPatternMatcher { 1684 Node* _op1_node; 1685 Node* _mop_node; 1686 int _con_op; 1687 1688 static int match_next(Node* n, int next_op, int next_op_idx) { 1689 if (n->in(1) == NULL || n->in(2) == NULL) { 1690 return -1; 1691 } 1692 1693 if (next_op_idx == -1) { // n is commutative, try rotations 1694 if (n->in(1)->Opcode() == next_op) { 1695 return 1; 1696 } else if (n->in(2)->Opcode() == next_op) { 1697 return 2; 1698 } 1699 } else { 1700 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 1701 if (n->in(next_op_idx)->Opcode() == next_op) { 1702 return next_op_idx; 1703 } 1704 } 1705 return -1; 1706 } 1707 1708 public: 1709 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 1710 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 1711 1712 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 1713 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 1714 typename ConType::NativeType con_value) { 1715 if (_op1_node->Opcode() != op1) { 1716 return false; 1717 } 1718 if (_mop_node->outcnt() > 2) { 1719 return false; 1720 } 1721 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 1722 if (op1_op2_idx == -1) { 1723 return false; 1724 } 1725 // Memory operation must be the other edge 1726 int op1_mop_idx = (op1_op2_idx & 1) + 1; 1727 1728 // Check that the mop node is really what we want 1729 if (_op1_node->in(op1_mop_idx) == _mop_node) { 1730 Node* op2_node = _op1_node->in(op1_op2_idx); 1731 if (op2_node->outcnt() > 1) { 1732 return false; 1733 } 1734 assert(op2_node->Opcode() == op2, "Should be"); 1735 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 1736 if (op2_con_idx == -1) { 1737 return false; 1738 } 1739 // Memory operation must be the other edge 1740 int op2_mop_idx = (op2_con_idx & 1) + 1; 1741 // Check that the memory operation is the same node 1742 if (op2_node->in(op2_mop_idx) == _mop_node) { 1743 // Now check the constant 1744 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 1745 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 1746 return true; 1747 } 1748 } 1749 } 1750 return false; 1751 } 1752 }; 1753 1754 static bool is_bmi_pattern(Node* n, Node* m) { 1755 assert(UseBMI1Instructions, "sanity"); 1756 if (n != NULL && m != NULL) { 1757 if (m->Opcode() == Op_LoadI) { 1758 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 1759 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 1760 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 1761 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 1762 } else if (m->Opcode() == Op_LoadL) { 1763 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 1764 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 1765 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 1766 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 1767 } 1768 } 1769 return false; 1770 } 1771 1772 // Should the matcher clone input 'm' of node 'n'? 1773 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 1774 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 1775 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 1776 mstack.push(m, Visit); 1777 return true; 1778 } 1779 return false; 1780 } 1781 1782 // Should the Matcher clone shifts on addressing modes, expecting them 1783 // to be subsumed into complex addressing expressions or compute them 1784 // into registers? 1785 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1786 Node *off = m->in(AddPNode::Offset); 1787 if (off->is_Con()) { 1788 address_visited.test_set(m->_idx); // Flag as address_visited 1789 Node *adr = m->in(AddPNode::Address); 1790 1791 // Intel can handle 2 adds in addressing mode 1792 // AtomicAdd is not an addressing expression. 1793 // Cheap to find it by looking for screwy base. 1794 if (adr->is_AddP() && 1795 !adr->in(AddPNode::Base)->is_top() && 1796 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1797 // Are there other uses besides address expressions? 1798 !is_visited(adr)) { 1799 address_visited.set(adr->_idx); // Flag as address_visited 1800 Node *shift = adr->in(AddPNode::Offset); 1801 if (!clone_shift(shift, this, mstack, address_visited)) { 1802 mstack.push(shift, Pre_Visit); 1803 } 1804 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1805 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1806 } else { 1807 mstack.push(adr, Pre_Visit); 1808 } 1809 1810 // Clone X+offset as it also folds into most addressing expressions 1811 mstack.push(off, Visit); 1812 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1813 return true; 1814 } else if (clone_shift(off, this, mstack, address_visited)) { 1815 address_visited.test_set(m->_idx); // Flag as address_visited 1816 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1817 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1818 return true; 1819 } 1820 return false; 1821 } 1822 1823 void Compile::reshape_address(AddPNode* addp) { 1824 } 1825 1826 static inline uint vector_length(const MachNode* n) { 1827 const TypeVect* vt = n->bottom_type()->is_vect(); 1828 return vt->length(); 1829 } 1830 1831 static inline uint vector_length(const MachNode* use, MachOper* opnd) { 1832 uint def_idx = use->operand_index(opnd); 1833 Node* def = use->in(def_idx); 1834 return def->bottom_type()->is_vect()->length(); 1835 } 1836 1837 static inline uint vector_length_in_bytes(const MachNode* n) { 1838 const TypeVect* vt = n->bottom_type()->is_vect(); 1839 return vt->length_in_bytes(); 1840 } 1841 1842 static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { 1843 uint def_idx = use->operand_index(opnd); 1844 Node* def = use->in(def_idx); 1845 return def->bottom_type()->is_vect()->length_in_bytes(); 1846 } 1847 1848 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) { 1849 switch(vector_length_in_bytes(n)) { 1850 case 4: // fall-through 1851 case 8: // fall-through 1852 case 16: return Assembler::AVX_128bit; 1853 case 32: return Assembler::AVX_256bit; 1854 case 64: return Assembler::AVX_512bit; 1855 1856 default: { 1857 ShouldNotReachHere(); 1858 return Assembler::AVX_NoVec; 1859 } 1860 } 1861 } 1862 1863 // Helper methods for MachSpillCopyNode::implementation(). 1864 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1865 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1866 // In 64-bit VM size calculation is very complex. Emitting instructions 1867 // into scratch buffer is used to get size in 64-bit VM. 1868 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1869 assert(ireg == Op_VecS || // 32bit vector 1870 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1871 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1872 "no non-adjacent vector moves" ); 1873 if (cbuf) { 1874 C2_MacroAssembler _masm(cbuf); 1875 int offset = __ offset(); 1876 switch (ireg) { 1877 case Op_VecS: // copy whole register 1878 case Op_VecD: 1879 case Op_VecX: 1880 #ifndef _LP64 1881 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1882 #else 1883 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1884 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1885 } else { 1886 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1887 } 1888 #endif 1889 break; 1890 case Op_VecY: 1891 #ifndef _LP64 1892 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1893 #else 1894 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1895 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1896 } else { 1897 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1898 } 1899 #endif 1900 break; 1901 case Op_VecZ: 1902 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1903 break; 1904 default: 1905 ShouldNotReachHere(); 1906 } 1907 int size = __ offset() - offset; 1908 #ifdef ASSERT 1909 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1910 assert(!do_size || size == 4, "incorrect size calculattion"); 1911 #endif 1912 return size; 1913 #ifndef PRODUCT 1914 } else if (!do_size) { 1915 switch (ireg) { 1916 case Op_VecS: 1917 case Op_VecD: 1918 case Op_VecX: 1919 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1920 break; 1921 case Op_VecY: 1922 case Op_VecZ: 1923 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1924 break; 1925 default: 1926 ShouldNotReachHere(); 1927 } 1928 #endif 1929 } 1930 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1931 return (UseAVX > 2) ? 6 : 4; 1932 } 1933 1934 int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1935 int stack_offset, int reg, uint ireg, outputStream* st) { 1936 // In 64-bit VM size calculation is very complex. Emitting instructions 1937 // into scratch buffer is used to get size in 64-bit VM. 1938 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1939 if (cbuf) { 1940 C2_MacroAssembler _masm(cbuf); 1941 int offset = __ offset(); 1942 if (is_load) { 1943 switch (ireg) { 1944 case Op_VecS: 1945 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1946 break; 1947 case Op_VecD: 1948 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1949 break; 1950 case Op_VecX: 1951 #ifndef _LP64 1952 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1953 #else 1954 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1955 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1956 } else { 1957 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1958 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1959 } 1960 #endif 1961 break; 1962 case Op_VecY: 1963 #ifndef _LP64 1964 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1965 #else 1966 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1967 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1968 } else { 1969 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1970 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1971 } 1972 #endif 1973 break; 1974 case Op_VecZ: 1975 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1976 break; 1977 default: 1978 ShouldNotReachHere(); 1979 } 1980 } else { // store 1981 switch (ireg) { 1982 case Op_VecS: 1983 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1984 break; 1985 case Op_VecD: 1986 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1987 break; 1988 case Op_VecX: 1989 #ifndef _LP64 1990 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1991 #else 1992 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1993 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1994 } 1995 else { 1996 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1997 } 1998 #endif 1999 break; 2000 case Op_VecY: 2001 #ifndef _LP64 2002 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2003 #else 2004 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2005 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2006 } 2007 else { 2008 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2009 } 2010 #endif 2011 break; 2012 case Op_VecZ: 2013 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2014 break; 2015 default: 2016 ShouldNotReachHere(); 2017 } 2018 } 2019 int size = __ offset() - offset; 2020 #ifdef ASSERT 2021 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 2022 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2023 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 2024 #endif 2025 return size; 2026 #ifndef PRODUCT 2027 } else if (!do_size) { 2028 if (is_load) { 2029 switch (ireg) { 2030 case Op_VecS: 2031 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2032 break; 2033 case Op_VecD: 2034 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2035 break; 2036 case Op_VecX: 2037 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2038 break; 2039 case Op_VecY: 2040 case Op_VecZ: 2041 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2042 break; 2043 default: 2044 ShouldNotReachHere(); 2045 } 2046 } else { // store 2047 switch (ireg) { 2048 case Op_VecS: 2049 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2050 break; 2051 case Op_VecD: 2052 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2053 break; 2054 case Op_VecX: 2055 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2056 break; 2057 case Op_VecY: 2058 case Op_VecZ: 2059 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2060 break; 2061 default: 2062 ShouldNotReachHere(); 2063 } 2064 } 2065 #endif 2066 } 2067 bool is_single_byte = false; 2068 int vec_len = 0; 2069 if ((UseAVX > 2) && (stack_offset != 0)) { 2070 int tuple_type = Assembler::EVEX_FVM; 2071 int input_size = Assembler::EVEX_32bit; 2072 switch (ireg) { 2073 case Op_VecS: 2074 tuple_type = Assembler::EVEX_T1S; 2075 break; 2076 case Op_VecD: 2077 tuple_type = Assembler::EVEX_T1S; 2078 input_size = Assembler::EVEX_64bit; 2079 break; 2080 case Op_VecX: 2081 break; 2082 case Op_VecY: 2083 vec_len = 1; 2084 break; 2085 case Op_VecZ: 2086 vec_len = 2; 2087 break; 2088 } 2089 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2090 } 2091 int offset_size = 0; 2092 int size = 5; 2093 if (UseAVX > 2 ) { 2094 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2095 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2096 size += 2; // Need an additional two bytes for EVEX encoding 2097 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2098 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2099 } else { 2100 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2101 size += 2; // Need an additional two bytes for EVEX encodding 2102 } 2103 } else { 2104 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2105 } 2106 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2107 return size+offset_size; 2108 } 2109 2110 static inline jint replicate4_imm(int con, int width) { 2111 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2112 assert(width == 1 || width == 2, "only byte or short types here"); 2113 int bit_width = width * 8; 2114 jint val = con; 2115 val &= (1 << bit_width) - 1; // mask off sign bits 2116 while(bit_width < 32) { 2117 val |= (val << bit_width); 2118 bit_width <<= 1; 2119 } 2120 return val; 2121 } 2122 2123 static inline jlong replicate8_imm(int con, int width) { 2124 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2125 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2126 int bit_width = width * 8; 2127 jlong val = con; 2128 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2129 while(bit_width < 64) { 2130 val |= (val << bit_width); 2131 bit_width <<= 1; 2132 } 2133 return val; 2134 } 2135 2136 #ifndef PRODUCT 2137 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2138 st->print("nop \t# %d bytes pad for loops and calls", _count); 2139 } 2140 #endif 2141 2142 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2143 C2_MacroAssembler _masm(&cbuf); 2144 __ nop(_count); 2145 } 2146 2147 uint MachNopNode::size(PhaseRegAlloc*) const { 2148 return _count; 2149 } 2150 2151 #ifndef PRODUCT 2152 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2153 st->print("# breakpoint"); 2154 } 2155 #endif 2156 2157 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2158 C2_MacroAssembler _masm(&cbuf); 2159 __ int3(); 2160 } 2161 2162 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2163 return MachNode::size(ra_); 2164 } 2165 2166 %} 2167 2168 encode %{ 2169 2170 enc_class call_epilog %{ 2171 if (VerifyStackAtCalls) { 2172 // Check that stack depth is unchanged: find majik cookie on stack 2173 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2174 C2_MacroAssembler _masm(&cbuf); 2175 Label L; 2176 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2177 __ jccb(Assembler::equal, L); 2178 // Die if stack mismatch 2179 __ int3(); 2180 __ bind(L); 2181 } 2182 %} 2183 2184 %} 2185 2186 2187 //----------OPERANDS----------------------------------------------------------- 2188 // Operand definitions must precede instruction definitions for correct parsing 2189 // in the ADLC because operands constitute user defined types which are used in 2190 // instruction definitions. 2191 2192 // Vectors 2193 2194 // Dummy generic vector class. Should be used for all vector operands. 2195 // Replaced with vec[SDXYZ] during post-selection pass. 2196 operand vec() %{ 2197 constraint(ALLOC_IN_RC(dynamic)); 2198 match(VecX); 2199 match(VecY); 2200 match(VecZ); 2201 match(VecS); 2202 match(VecD); 2203 2204 format %{ %} 2205 interface(REG_INTER); 2206 %} 2207 2208 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2209 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2210 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2211 // runtime code generation via reg_class_dynamic. 2212 operand legVec() %{ 2213 constraint(ALLOC_IN_RC(dynamic)); 2214 match(VecX); 2215 match(VecY); 2216 match(VecZ); 2217 match(VecS); 2218 match(VecD); 2219 2220 format %{ %} 2221 interface(REG_INTER); 2222 %} 2223 2224 // Replaces vec during post-selection cleanup. See above. 2225 operand vecS() %{ 2226 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2227 match(VecS); 2228 2229 format %{ %} 2230 interface(REG_INTER); 2231 %} 2232 2233 // Replaces legVec during post-selection cleanup. See above. 2234 operand legVecS() %{ 2235 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2236 match(VecS); 2237 2238 format %{ %} 2239 interface(REG_INTER); 2240 %} 2241 2242 // Replaces vec during post-selection cleanup. See above. 2243 operand vecD() %{ 2244 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2245 match(VecD); 2246 2247 format %{ %} 2248 interface(REG_INTER); 2249 %} 2250 2251 // Replaces legVec during post-selection cleanup. See above. 2252 operand legVecD() %{ 2253 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2254 match(VecD); 2255 2256 format %{ %} 2257 interface(REG_INTER); 2258 %} 2259 2260 // Replaces vec during post-selection cleanup. See above. 2261 operand vecX() %{ 2262 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2263 match(VecX); 2264 2265 format %{ %} 2266 interface(REG_INTER); 2267 %} 2268 2269 // Replaces legVec during post-selection cleanup. See above. 2270 operand legVecX() %{ 2271 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2272 match(VecX); 2273 2274 format %{ %} 2275 interface(REG_INTER); 2276 %} 2277 2278 // Replaces vec during post-selection cleanup. See above. 2279 operand vecY() %{ 2280 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2281 match(VecY); 2282 2283 format %{ %} 2284 interface(REG_INTER); 2285 %} 2286 2287 // Replaces legVec during post-selection cleanup. See above. 2288 operand legVecY() %{ 2289 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2290 match(VecY); 2291 2292 format %{ %} 2293 interface(REG_INTER); 2294 %} 2295 2296 // Replaces vec during post-selection cleanup. See above. 2297 operand vecZ() %{ 2298 constraint(ALLOC_IN_RC(vectorz_reg)); 2299 match(VecZ); 2300 2301 format %{ %} 2302 interface(REG_INTER); 2303 %} 2304 2305 // Replaces legVec during post-selection cleanup. See above. 2306 operand legVecZ() %{ 2307 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2308 match(VecZ); 2309 2310 format %{ %} 2311 interface(REG_INTER); 2312 %} 2313 2314 // Comparison Code for FP conditional move 2315 operand cmpOp_vcmppd() %{ 2316 match(Bool); 2317 2318 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2319 n->as_Bool()->_test._test != BoolTest::no_overflow); 2320 format %{ "" %} 2321 interface(COND_INTER) %{ 2322 equal (0x0, "eq"); 2323 less (0x1, "lt"); 2324 less_equal (0x2, "le"); 2325 not_equal (0xC, "ne"); 2326 greater_equal(0xD, "ge"); 2327 greater (0xE, "gt"); 2328 //TODO cannot compile (adlc breaks) without two next lines with error: 2329 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2330 // equal' for overflow. 2331 overflow (0x20, "o"); // not really supported by the instruction 2332 no_overflow (0x21, "no"); // not really supported by the instruction 2333 %} 2334 %} 2335 2336 2337 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2338 2339 // ============================================================================ 2340 2341 instruct ShouldNotReachHere() %{ 2342 match(Halt); 2343 format %{ "stop\t# ShouldNotReachHere" %} 2344 ins_encode %{ 2345 if (is_reachable()) { 2346 __ stop(_halt_reason); 2347 } 2348 %} 2349 ins_pipe(pipe_slow); 2350 %} 2351 2352 // =================================EVEX special=============================== 2353 2354 instruct setMask(rRegI dst, rRegI src) %{ 2355 predicate(Matcher::has_predicated_vectors()); 2356 match(Set dst (SetVectMaskI src)); 2357 effect(TEMP dst); 2358 format %{ "setvectmask $dst, $src" %} 2359 ins_encode %{ 2360 __ setvectmask($dst$$Register, $src$$Register); 2361 %} 2362 ins_pipe(pipe_slow); 2363 %} 2364 2365 // ============================================================================ 2366 2367 instruct addF_reg(regF dst, regF src) %{ 2368 predicate((UseSSE>=1) && (UseAVX == 0)); 2369 match(Set dst (AddF dst src)); 2370 2371 format %{ "addss $dst, $src" %} 2372 ins_cost(150); 2373 ins_encode %{ 2374 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2375 %} 2376 ins_pipe(pipe_slow); 2377 %} 2378 2379 instruct addF_mem(regF dst, memory src) %{ 2380 predicate((UseSSE>=1) && (UseAVX == 0)); 2381 match(Set dst (AddF dst (LoadF src))); 2382 2383 format %{ "addss $dst, $src" %} 2384 ins_cost(150); 2385 ins_encode %{ 2386 __ addss($dst$$XMMRegister, $src$$Address); 2387 %} 2388 ins_pipe(pipe_slow); 2389 %} 2390 2391 instruct addF_imm(regF dst, immF con) %{ 2392 predicate((UseSSE>=1) && (UseAVX == 0)); 2393 match(Set dst (AddF dst con)); 2394 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2395 ins_cost(150); 2396 ins_encode %{ 2397 __ addss($dst$$XMMRegister, $constantaddress($con)); 2398 %} 2399 ins_pipe(pipe_slow); 2400 %} 2401 2402 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2403 predicate(UseAVX > 0); 2404 match(Set dst (AddF src1 src2)); 2405 2406 format %{ "vaddss $dst, $src1, $src2" %} 2407 ins_cost(150); 2408 ins_encode %{ 2409 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2410 %} 2411 ins_pipe(pipe_slow); 2412 %} 2413 2414 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2415 predicate(UseAVX > 0); 2416 match(Set dst (AddF src1 (LoadF src2))); 2417 2418 format %{ "vaddss $dst, $src1, $src2" %} 2419 ins_cost(150); 2420 ins_encode %{ 2421 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2422 %} 2423 ins_pipe(pipe_slow); 2424 %} 2425 2426 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2427 predicate(UseAVX > 0); 2428 match(Set dst (AddF src con)); 2429 2430 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2431 ins_cost(150); 2432 ins_encode %{ 2433 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2434 %} 2435 ins_pipe(pipe_slow); 2436 %} 2437 2438 instruct addD_reg(regD dst, regD src) %{ 2439 predicate((UseSSE>=2) && (UseAVX == 0)); 2440 match(Set dst (AddD dst src)); 2441 2442 format %{ "addsd $dst, $src" %} 2443 ins_cost(150); 2444 ins_encode %{ 2445 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2446 %} 2447 ins_pipe(pipe_slow); 2448 %} 2449 2450 instruct addD_mem(regD dst, memory src) %{ 2451 predicate((UseSSE>=2) && (UseAVX == 0)); 2452 match(Set dst (AddD dst (LoadD src))); 2453 2454 format %{ "addsd $dst, $src" %} 2455 ins_cost(150); 2456 ins_encode %{ 2457 __ addsd($dst$$XMMRegister, $src$$Address); 2458 %} 2459 ins_pipe(pipe_slow); 2460 %} 2461 2462 instruct addD_imm(regD dst, immD con) %{ 2463 predicate((UseSSE>=2) && (UseAVX == 0)); 2464 match(Set dst (AddD dst con)); 2465 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2466 ins_cost(150); 2467 ins_encode %{ 2468 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2469 %} 2470 ins_pipe(pipe_slow); 2471 %} 2472 2473 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2474 predicate(UseAVX > 0); 2475 match(Set dst (AddD src1 src2)); 2476 2477 format %{ "vaddsd $dst, $src1, $src2" %} 2478 ins_cost(150); 2479 ins_encode %{ 2480 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2481 %} 2482 ins_pipe(pipe_slow); 2483 %} 2484 2485 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2486 predicate(UseAVX > 0); 2487 match(Set dst (AddD src1 (LoadD src2))); 2488 2489 format %{ "vaddsd $dst, $src1, $src2" %} 2490 ins_cost(150); 2491 ins_encode %{ 2492 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2493 %} 2494 ins_pipe(pipe_slow); 2495 %} 2496 2497 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2498 predicate(UseAVX > 0); 2499 match(Set dst (AddD src con)); 2500 2501 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2502 ins_cost(150); 2503 ins_encode %{ 2504 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2505 %} 2506 ins_pipe(pipe_slow); 2507 %} 2508 2509 instruct subF_reg(regF dst, regF src) %{ 2510 predicate((UseSSE>=1) && (UseAVX == 0)); 2511 match(Set dst (SubF dst src)); 2512 2513 format %{ "subss $dst, $src" %} 2514 ins_cost(150); 2515 ins_encode %{ 2516 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2517 %} 2518 ins_pipe(pipe_slow); 2519 %} 2520 2521 instruct subF_mem(regF dst, memory src) %{ 2522 predicate((UseSSE>=1) && (UseAVX == 0)); 2523 match(Set dst (SubF dst (LoadF src))); 2524 2525 format %{ "subss $dst, $src" %} 2526 ins_cost(150); 2527 ins_encode %{ 2528 __ subss($dst$$XMMRegister, $src$$Address); 2529 %} 2530 ins_pipe(pipe_slow); 2531 %} 2532 2533 instruct subF_imm(regF dst, immF con) %{ 2534 predicate((UseSSE>=1) && (UseAVX == 0)); 2535 match(Set dst (SubF dst con)); 2536 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2537 ins_cost(150); 2538 ins_encode %{ 2539 __ subss($dst$$XMMRegister, $constantaddress($con)); 2540 %} 2541 ins_pipe(pipe_slow); 2542 %} 2543 2544 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2545 predicate(UseAVX > 0); 2546 match(Set dst (SubF src1 src2)); 2547 2548 format %{ "vsubss $dst, $src1, $src2" %} 2549 ins_cost(150); 2550 ins_encode %{ 2551 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2552 %} 2553 ins_pipe(pipe_slow); 2554 %} 2555 2556 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2557 predicate(UseAVX > 0); 2558 match(Set dst (SubF src1 (LoadF src2))); 2559 2560 format %{ "vsubss $dst, $src1, $src2" %} 2561 ins_cost(150); 2562 ins_encode %{ 2563 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2564 %} 2565 ins_pipe(pipe_slow); 2566 %} 2567 2568 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2569 predicate(UseAVX > 0); 2570 match(Set dst (SubF src con)); 2571 2572 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2573 ins_cost(150); 2574 ins_encode %{ 2575 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2576 %} 2577 ins_pipe(pipe_slow); 2578 %} 2579 2580 instruct subD_reg(regD dst, regD src) %{ 2581 predicate((UseSSE>=2) && (UseAVX == 0)); 2582 match(Set dst (SubD dst src)); 2583 2584 format %{ "subsd $dst, $src" %} 2585 ins_cost(150); 2586 ins_encode %{ 2587 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2588 %} 2589 ins_pipe(pipe_slow); 2590 %} 2591 2592 instruct subD_mem(regD dst, memory src) %{ 2593 predicate((UseSSE>=2) && (UseAVX == 0)); 2594 match(Set dst (SubD dst (LoadD src))); 2595 2596 format %{ "subsd $dst, $src" %} 2597 ins_cost(150); 2598 ins_encode %{ 2599 __ subsd($dst$$XMMRegister, $src$$Address); 2600 %} 2601 ins_pipe(pipe_slow); 2602 %} 2603 2604 instruct subD_imm(regD dst, immD con) %{ 2605 predicate((UseSSE>=2) && (UseAVX == 0)); 2606 match(Set dst (SubD dst con)); 2607 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2608 ins_cost(150); 2609 ins_encode %{ 2610 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2611 %} 2612 ins_pipe(pipe_slow); 2613 %} 2614 2615 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2616 predicate(UseAVX > 0); 2617 match(Set dst (SubD src1 src2)); 2618 2619 format %{ "vsubsd $dst, $src1, $src2" %} 2620 ins_cost(150); 2621 ins_encode %{ 2622 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2623 %} 2624 ins_pipe(pipe_slow); 2625 %} 2626 2627 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2628 predicate(UseAVX > 0); 2629 match(Set dst (SubD src1 (LoadD src2))); 2630 2631 format %{ "vsubsd $dst, $src1, $src2" %} 2632 ins_cost(150); 2633 ins_encode %{ 2634 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2635 %} 2636 ins_pipe(pipe_slow); 2637 %} 2638 2639 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2640 predicate(UseAVX > 0); 2641 match(Set dst (SubD src con)); 2642 2643 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2644 ins_cost(150); 2645 ins_encode %{ 2646 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2647 %} 2648 ins_pipe(pipe_slow); 2649 %} 2650 2651 instruct mulF_reg(regF dst, regF src) %{ 2652 predicate((UseSSE>=1) && (UseAVX == 0)); 2653 match(Set dst (MulF dst src)); 2654 2655 format %{ "mulss $dst, $src" %} 2656 ins_cost(150); 2657 ins_encode %{ 2658 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2659 %} 2660 ins_pipe(pipe_slow); 2661 %} 2662 2663 instruct mulF_mem(regF dst, memory src) %{ 2664 predicate((UseSSE>=1) && (UseAVX == 0)); 2665 match(Set dst (MulF dst (LoadF src))); 2666 2667 format %{ "mulss $dst, $src" %} 2668 ins_cost(150); 2669 ins_encode %{ 2670 __ mulss($dst$$XMMRegister, $src$$Address); 2671 %} 2672 ins_pipe(pipe_slow); 2673 %} 2674 2675 instruct mulF_imm(regF dst, immF con) %{ 2676 predicate((UseSSE>=1) && (UseAVX == 0)); 2677 match(Set dst (MulF dst con)); 2678 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2679 ins_cost(150); 2680 ins_encode %{ 2681 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2682 %} 2683 ins_pipe(pipe_slow); 2684 %} 2685 2686 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2687 predicate(UseAVX > 0); 2688 match(Set dst (MulF src1 src2)); 2689 2690 format %{ "vmulss $dst, $src1, $src2" %} 2691 ins_cost(150); 2692 ins_encode %{ 2693 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2694 %} 2695 ins_pipe(pipe_slow); 2696 %} 2697 2698 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2699 predicate(UseAVX > 0); 2700 match(Set dst (MulF src1 (LoadF src2))); 2701 2702 format %{ "vmulss $dst, $src1, $src2" %} 2703 ins_cost(150); 2704 ins_encode %{ 2705 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2706 %} 2707 ins_pipe(pipe_slow); 2708 %} 2709 2710 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2711 predicate(UseAVX > 0); 2712 match(Set dst (MulF src con)); 2713 2714 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2715 ins_cost(150); 2716 ins_encode %{ 2717 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2718 %} 2719 ins_pipe(pipe_slow); 2720 %} 2721 2722 instruct mulD_reg(regD dst, regD src) %{ 2723 predicate((UseSSE>=2) && (UseAVX == 0)); 2724 match(Set dst (MulD dst src)); 2725 2726 format %{ "mulsd $dst, $src" %} 2727 ins_cost(150); 2728 ins_encode %{ 2729 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2730 %} 2731 ins_pipe(pipe_slow); 2732 %} 2733 2734 instruct mulD_mem(regD dst, memory src) %{ 2735 predicate((UseSSE>=2) && (UseAVX == 0)); 2736 match(Set dst (MulD dst (LoadD src))); 2737 2738 format %{ "mulsd $dst, $src" %} 2739 ins_cost(150); 2740 ins_encode %{ 2741 __ mulsd($dst$$XMMRegister, $src$$Address); 2742 %} 2743 ins_pipe(pipe_slow); 2744 %} 2745 2746 instruct mulD_imm(regD dst, immD con) %{ 2747 predicate((UseSSE>=2) && (UseAVX == 0)); 2748 match(Set dst (MulD dst con)); 2749 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2750 ins_cost(150); 2751 ins_encode %{ 2752 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2753 %} 2754 ins_pipe(pipe_slow); 2755 %} 2756 2757 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2758 predicate(UseAVX > 0); 2759 match(Set dst (MulD src1 src2)); 2760 2761 format %{ "vmulsd $dst, $src1, $src2" %} 2762 ins_cost(150); 2763 ins_encode %{ 2764 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2765 %} 2766 ins_pipe(pipe_slow); 2767 %} 2768 2769 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2770 predicate(UseAVX > 0); 2771 match(Set dst (MulD src1 (LoadD src2))); 2772 2773 format %{ "vmulsd $dst, $src1, $src2" %} 2774 ins_cost(150); 2775 ins_encode %{ 2776 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2777 %} 2778 ins_pipe(pipe_slow); 2779 %} 2780 2781 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2782 predicate(UseAVX > 0); 2783 match(Set dst (MulD src con)); 2784 2785 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2786 ins_cost(150); 2787 ins_encode %{ 2788 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2789 %} 2790 ins_pipe(pipe_slow); 2791 %} 2792 2793 instruct divF_reg(regF dst, regF src) %{ 2794 predicate((UseSSE>=1) && (UseAVX == 0)); 2795 match(Set dst (DivF dst src)); 2796 2797 format %{ "divss $dst, $src" %} 2798 ins_cost(150); 2799 ins_encode %{ 2800 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2801 %} 2802 ins_pipe(pipe_slow); 2803 %} 2804 2805 instruct divF_mem(regF dst, memory src) %{ 2806 predicate((UseSSE>=1) && (UseAVX == 0)); 2807 match(Set dst (DivF dst (LoadF src))); 2808 2809 format %{ "divss $dst, $src" %} 2810 ins_cost(150); 2811 ins_encode %{ 2812 __ divss($dst$$XMMRegister, $src$$Address); 2813 %} 2814 ins_pipe(pipe_slow); 2815 %} 2816 2817 instruct divF_imm(regF dst, immF con) %{ 2818 predicate((UseSSE>=1) && (UseAVX == 0)); 2819 match(Set dst (DivF dst con)); 2820 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2821 ins_cost(150); 2822 ins_encode %{ 2823 __ divss($dst$$XMMRegister, $constantaddress($con)); 2824 %} 2825 ins_pipe(pipe_slow); 2826 %} 2827 2828 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2829 predicate(UseAVX > 0); 2830 match(Set dst (DivF src1 src2)); 2831 2832 format %{ "vdivss $dst, $src1, $src2" %} 2833 ins_cost(150); 2834 ins_encode %{ 2835 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2836 %} 2837 ins_pipe(pipe_slow); 2838 %} 2839 2840 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2841 predicate(UseAVX > 0); 2842 match(Set dst (DivF src1 (LoadF src2))); 2843 2844 format %{ "vdivss $dst, $src1, $src2" %} 2845 ins_cost(150); 2846 ins_encode %{ 2847 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2848 %} 2849 ins_pipe(pipe_slow); 2850 %} 2851 2852 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2853 predicate(UseAVX > 0); 2854 match(Set dst (DivF src con)); 2855 2856 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2857 ins_cost(150); 2858 ins_encode %{ 2859 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2860 %} 2861 ins_pipe(pipe_slow); 2862 %} 2863 2864 instruct divD_reg(regD dst, regD src) %{ 2865 predicate((UseSSE>=2) && (UseAVX == 0)); 2866 match(Set dst (DivD dst src)); 2867 2868 format %{ "divsd $dst, $src" %} 2869 ins_cost(150); 2870 ins_encode %{ 2871 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2872 %} 2873 ins_pipe(pipe_slow); 2874 %} 2875 2876 instruct divD_mem(regD dst, memory src) %{ 2877 predicate((UseSSE>=2) && (UseAVX == 0)); 2878 match(Set dst (DivD dst (LoadD src))); 2879 2880 format %{ "divsd $dst, $src" %} 2881 ins_cost(150); 2882 ins_encode %{ 2883 __ divsd($dst$$XMMRegister, $src$$Address); 2884 %} 2885 ins_pipe(pipe_slow); 2886 %} 2887 2888 instruct divD_imm(regD dst, immD con) %{ 2889 predicate((UseSSE>=2) && (UseAVX == 0)); 2890 match(Set dst (DivD dst con)); 2891 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2892 ins_cost(150); 2893 ins_encode %{ 2894 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2895 %} 2896 ins_pipe(pipe_slow); 2897 %} 2898 2899 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2900 predicate(UseAVX > 0); 2901 match(Set dst (DivD src1 src2)); 2902 2903 format %{ "vdivsd $dst, $src1, $src2" %} 2904 ins_cost(150); 2905 ins_encode %{ 2906 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2907 %} 2908 ins_pipe(pipe_slow); 2909 %} 2910 2911 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2912 predicate(UseAVX > 0); 2913 match(Set dst (DivD src1 (LoadD src2))); 2914 2915 format %{ "vdivsd $dst, $src1, $src2" %} 2916 ins_cost(150); 2917 ins_encode %{ 2918 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2919 %} 2920 ins_pipe(pipe_slow); 2921 %} 2922 2923 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2924 predicate(UseAVX > 0); 2925 match(Set dst (DivD src con)); 2926 2927 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2928 ins_cost(150); 2929 ins_encode %{ 2930 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2931 %} 2932 ins_pipe(pipe_slow); 2933 %} 2934 2935 instruct absF_reg(regF dst) %{ 2936 predicate((UseSSE>=1) && (UseAVX == 0)); 2937 match(Set dst (AbsF dst)); 2938 ins_cost(150); 2939 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2940 ins_encode %{ 2941 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2942 %} 2943 ins_pipe(pipe_slow); 2944 %} 2945 2946 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2947 predicate(UseAVX > 0); 2948 match(Set dst (AbsF src)); 2949 ins_cost(150); 2950 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2951 ins_encode %{ 2952 int vector_len = 0; 2953 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2954 ExternalAddress(float_signmask()), vector_len); 2955 %} 2956 ins_pipe(pipe_slow); 2957 %} 2958 2959 instruct absD_reg(regD dst) %{ 2960 predicate((UseSSE>=2) && (UseAVX == 0)); 2961 match(Set dst (AbsD dst)); 2962 ins_cost(150); 2963 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2964 "# abs double by sign masking" %} 2965 ins_encode %{ 2966 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2967 %} 2968 ins_pipe(pipe_slow); 2969 %} 2970 2971 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2972 predicate(UseAVX > 0); 2973 match(Set dst (AbsD src)); 2974 ins_cost(150); 2975 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2976 "# abs double by sign masking" %} 2977 ins_encode %{ 2978 int vector_len = 0; 2979 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2980 ExternalAddress(double_signmask()), vector_len); 2981 %} 2982 ins_pipe(pipe_slow); 2983 %} 2984 2985 instruct negF_reg(regF dst) %{ 2986 predicate((UseSSE>=1) && (UseAVX == 0)); 2987 match(Set dst (NegF dst)); 2988 ins_cost(150); 2989 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2990 ins_encode %{ 2991 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2992 %} 2993 ins_pipe(pipe_slow); 2994 %} 2995 2996 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2997 predicate(UseAVX > 0); 2998 match(Set dst (NegF src)); 2999 ins_cost(150); 3000 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3001 ins_encode %{ 3002 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3003 ExternalAddress(float_signflip())); 3004 %} 3005 ins_pipe(pipe_slow); 3006 %} 3007 3008 instruct negD_reg(regD dst) %{ 3009 predicate((UseSSE>=2) && (UseAVX == 0)); 3010 match(Set dst (NegD dst)); 3011 ins_cost(150); 3012 format %{ "xorpd $dst, [0x8000000000000000]\t" 3013 "# neg double by sign flipping" %} 3014 ins_encode %{ 3015 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3016 %} 3017 ins_pipe(pipe_slow); 3018 %} 3019 3020 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3021 predicate(UseAVX > 0); 3022 match(Set dst (NegD src)); 3023 ins_cost(150); 3024 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3025 "# neg double by sign flipping" %} 3026 ins_encode %{ 3027 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3028 ExternalAddress(double_signflip())); 3029 %} 3030 ins_pipe(pipe_slow); 3031 %} 3032 3033 instruct sqrtF_reg(regF dst, regF src) %{ 3034 predicate(UseSSE>=1); 3035 match(Set dst (SqrtF src)); 3036 3037 format %{ "sqrtss $dst, $src" %} 3038 ins_cost(150); 3039 ins_encode %{ 3040 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 3041 %} 3042 ins_pipe(pipe_slow); 3043 %} 3044 3045 instruct sqrtF_mem(regF dst, memory src) %{ 3046 predicate(UseSSE>=1); 3047 match(Set dst (SqrtF (LoadF src))); 3048 3049 format %{ "sqrtss $dst, $src" %} 3050 ins_cost(150); 3051 ins_encode %{ 3052 __ sqrtss($dst$$XMMRegister, $src$$Address); 3053 %} 3054 ins_pipe(pipe_slow); 3055 %} 3056 3057 instruct sqrtF_imm(regF dst, immF con) %{ 3058 predicate(UseSSE>=1); 3059 match(Set dst (SqrtF con)); 3060 3061 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3062 ins_cost(150); 3063 ins_encode %{ 3064 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 3065 %} 3066 ins_pipe(pipe_slow); 3067 %} 3068 3069 instruct sqrtD_reg(regD dst, regD src) %{ 3070 predicate(UseSSE>=2); 3071 match(Set dst (SqrtD src)); 3072 3073 format %{ "sqrtsd $dst, $src" %} 3074 ins_cost(150); 3075 ins_encode %{ 3076 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 3077 %} 3078 ins_pipe(pipe_slow); 3079 %} 3080 3081 instruct sqrtD_mem(regD dst, memory src) %{ 3082 predicate(UseSSE>=2); 3083 match(Set dst (SqrtD (LoadD src))); 3084 3085 format %{ "sqrtsd $dst, $src" %} 3086 ins_cost(150); 3087 ins_encode %{ 3088 __ sqrtsd($dst$$XMMRegister, $src$$Address); 3089 %} 3090 ins_pipe(pipe_slow); 3091 %} 3092 3093 instruct sqrtD_imm(regD dst, immD con) %{ 3094 predicate(UseSSE>=2); 3095 match(Set dst (SqrtD con)); 3096 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3097 ins_cost(150); 3098 ins_encode %{ 3099 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 3100 %} 3101 ins_pipe(pipe_slow); 3102 %} 3103 3104 3105 #ifdef _LP64 3106 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3107 match(Set dst (RoundDoubleMode src rmode)); 3108 format %{ "roundsd $dst,$src" %} 3109 ins_cost(150); 3110 ins_encode %{ 3111 assert(UseSSE >= 4, "required"); 3112 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3113 %} 3114 ins_pipe(pipe_slow); 3115 %} 3116 3117 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3118 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3119 format %{ "roundsd $dst,$src" %} 3120 ins_cost(150); 3121 ins_encode %{ 3122 assert(UseSSE >= 4, "required"); 3123 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3124 %} 3125 ins_pipe(pipe_slow); 3126 %} 3127 3128 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 3129 match(Set dst (RoundDoubleMode con rmode)); 3130 effect(TEMP scratch_reg); 3131 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3132 ins_cost(150); 3133 ins_encode %{ 3134 assert(UseSSE >= 4, "required"); 3135 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 3136 %} 3137 ins_pipe(pipe_slow); 3138 %} 3139 3140 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3141 predicate(n->as_Vector()->length() < 8); 3142 match(Set dst (RoundDoubleModeV src rmode)); 3143 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3144 ins_encode %{ 3145 assert(UseAVX > 0, "required"); 3146 int vector_len = vector_length_encoding(this); 3147 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len); 3148 %} 3149 ins_pipe( pipe_slow ); 3150 %} 3151 3152 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3153 predicate(n->as_Vector()->length() == 8); 3154 match(Set dst (RoundDoubleModeV src rmode)); 3155 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3156 ins_encode %{ 3157 assert(UseAVX > 2, "required"); 3158 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3159 %} 3160 ins_pipe( pipe_slow ); 3161 %} 3162 3163 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3164 predicate(n->as_Vector()->length() < 8); 3165 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3166 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3167 ins_encode %{ 3168 assert(UseAVX > 0, "required"); 3169 int vector_len = vector_length_encoding(this); 3170 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len); 3171 %} 3172 ins_pipe( pipe_slow ); 3173 %} 3174 3175 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3176 predicate(n->as_Vector()->length() == 8); 3177 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3178 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3179 ins_encode %{ 3180 assert(UseAVX > 2, "required"); 3181 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3182 %} 3183 ins_pipe( pipe_slow ); 3184 %} 3185 #endif // _LP64 3186 3187 instruct onspinwait() %{ 3188 match(OnSpinWait); 3189 ins_cost(200); 3190 3191 format %{ 3192 $$template 3193 $$emit$$"pause\t! membar_onspinwait" 3194 %} 3195 ins_encode %{ 3196 __ pause(); 3197 %} 3198 ins_pipe(pipe_slow); 3199 %} 3200 3201 // a * b + c 3202 instruct fmaD_reg(regD a, regD b, regD c) %{ 3203 predicate(UseFMA); 3204 match(Set c (FmaD c (Binary a b))); 3205 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3206 ins_cost(150); 3207 ins_encode %{ 3208 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3209 %} 3210 ins_pipe( pipe_slow ); 3211 %} 3212 3213 // a * b + c 3214 instruct fmaF_reg(regF a, regF b, regF c) %{ 3215 predicate(UseFMA); 3216 match(Set c (FmaF c (Binary a b))); 3217 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3218 ins_cost(150); 3219 ins_encode %{ 3220 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3221 %} 3222 ins_pipe( pipe_slow ); 3223 %} 3224 3225 // ====================VECTOR INSTRUCTIONS===================================== 3226 3227 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3228 instruct MoveVec2Leg(legVec dst, vec src) %{ 3229 match(Set dst src); 3230 format %{ "" %} 3231 ins_encode %{ 3232 ShouldNotReachHere(); 3233 %} 3234 ins_pipe( fpu_reg_reg ); 3235 %} 3236 3237 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3238 match(Set dst src); 3239 format %{ "" %} 3240 ins_encode %{ 3241 ShouldNotReachHere(); 3242 %} 3243 ins_pipe( fpu_reg_reg ); 3244 %} 3245 3246 // ============================================================================ 3247 3248 // Load vectors 3249 instruct loadV(vec dst, memory mem) %{ 3250 match(Set dst (LoadVector mem)); 3251 ins_cost(125); 3252 format %{ "load_vector $dst,$mem" %} 3253 ins_encode %{ 3254 switch (vector_length_in_bytes(this)) { 3255 case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; 3256 case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; 3257 case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; 3258 case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break; 3259 case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break; 3260 default: ShouldNotReachHere(); 3261 } 3262 %} 3263 ins_pipe( pipe_slow ); 3264 %} 3265 3266 // Store vectors generic operand pattern. 3267 instruct storeV(memory mem, vec src) %{ 3268 match(Set mem (StoreVector mem src)); 3269 ins_cost(145); 3270 format %{ "store_vector $mem,$src\n\t" %} 3271 ins_encode %{ 3272 switch (vector_length_in_bytes(this, $src)) { 3273 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3274 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3275 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3276 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3277 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3278 default: ShouldNotReachHere(); 3279 } 3280 %} 3281 ins_pipe( pipe_slow ); 3282 %} 3283 3284 // ====================REPLICATE======================================= 3285 3286 // Replicate byte scalar to be vector 3287 instruct ReplB_reg(vec dst, rRegI src) %{ 3288 match(Set dst (ReplicateB src)); 3289 format %{ "replicateB $dst,$src" %} 3290 ins_encode %{ 3291 uint vlen = vector_length(this); 3292 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3293 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 3294 int vlen_enc = vector_length_encoding(this); 3295 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 3296 } else { 3297 __ movdl($dst$$XMMRegister, $src$$Register); 3298 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3299 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3300 if (vlen >= 16) { 3301 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3302 if (vlen >= 32) { 3303 assert(vlen == 32, "sanity"); 3304 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3305 } 3306 } 3307 } 3308 %} 3309 ins_pipe( pipe_slow ); 3310 %} 3311 3312 instruct ReplB_mem(vec dst, memory mem) %{ 3313 predicate(VM_Version::supports_avx2()); 3314 match(Set dst (ReplicateB (LoadB mem))); 3315 format %{ "replicateB $dst,$mem" %} 3316 ins_encode %{ 3317 int vector_len = vector_length_encoding(this); 3318 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3319 %} 3320 ins_pipe( pipe_slow ); 3321 %} 3322 3323 instruct ReplB_imm(vec dst, immI con) %{ 3324 match(Set dst (ReplicateB con)); 3325 format %{ "replicateB $dst,$con" %} 3326 ins_encode %{ 3327 uint vlen = vector_length(this); 3328 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1)); 3329 if (vlen == 4) { 3330 __ movdl($dst$$XMMRegister, const_addr); 3331 } else { 3332 __ movq($dst$$XMMRegister, const_addr); 3333 if (vlen >= 16) { 3334 if (VM_Version::supports_avx2()) { 3335 int vlen_enc = vector_length_encoding(this); 3336 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3337 } else { 3338 assert(vlen == 16, "sanity"); 3339 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3340 } 3341 } 3342 } 3343 %} 3344 ins_pipe( pipe_slow ); 3345 %} 3346 3347 // Replicate byte scalar zero to be vector 3348 instruct ReplB_zero(vec dst, immI0 zero) %{ 3349 match(Set dst (ReplicateB zero)); 3350 format %{ "replicateB $dst,$zero" %} 3351 ins_encode %{ 3352 uint vlen = vector_length(this); 3353 if (vlen <= 16) { 3354 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3355 } else { 3356 // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ). 3357 int vlen_enc = vector_length_encoding(this); 3358 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3359 } 3360 %} 3361 ins_pipe( fpu_reg_reg ); 3362 %} 3363 3364 // ====================ReplicateS======================================= 3365 3366 instruct ReplS_reg(vec dst, rRegI src) %{ 3367 match(Set dst (ReplicateS src)); 3368 format %{ "replicateS $dst,$src" %} 3369 ins_encode %{ 3370 uint vlen = vector_length(this); 3371 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3372 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 3373 int vlen_enc = vector_length_encoding(this); 3374 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 3375 } else { 3376 __ movdl($dst$$XMMRegister, $src$$Register); 3377 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3378 if (vlen >= 8) { 3379 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3380 if (vlen >= 16) { 3381 assert(vlen == 16, "sanity"); 3382 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3383 } 3384 } 3385 } 3386 %} 3387 ins_pipe( pipe_slow ); 3388 %} 3389 3390 instruct ReplS_mem(vec dst, memory mem) %{ 3391 predicate(VM_Version::supports_avx2()); 3392 match(Set dst (ReplicateS (LoadS mem))); 3393 format %{ "replicateS $dst,$mem" %} 3394 ins_encode %{ 3395 int vlen_enc = vector_length_encoding(this); 3396 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 3397 %} 3398 ins_pipe( pipe_slow ); 3399 %} 3400 3401 instruct ReplS_imm(vec dst, immI con) %{ 3402 match(Set dst (ReplicateS con)); 3403 format %{ "replicateS $dst,$con" %} 3404 ins_encode %{ 3405 uint vlen = vector_length(this); 3406 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2)); 3407 if (vlen == 2) { 3408 __ movdl($dst$$XMMRegister, const_addr); 3409 } else { 3410 __ movq($dst$$XMMRegister, const_addr); 3411 if (vlen >= 8) { 3412 if (VM_Version::supports_avx2()) { 3413 int vlen_enc = vector_length_encoding(this); 3414 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3415 } else { 3416 assert(vlen == 8, "sanity"); 3417 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3418 } 3419 } 3420 } 3421 %} 3422 ins_pipe( fpu_reg_reg ); 3423 %} 3424 3425 instruct ReplS_zero(vec dst, immI0 zero) %{ 3426 match(Set dst (ReplicateS zero)); 3427 format %{ "replicateS $dst,$zero" %} 3428 ins_encode %{ 3429 uint vlen = vector_length(this); 3430 if (vlen <= 8) { 3431 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3432 } else { 3433 int vlen_enc = vector_length_encoding(this); 3434 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3435 } 3436 %} 3437 ins_pipe( fpu_reg_reg ); 3438 %} 3439 3440 // ====================ReplicateI======================================= 3441 3442 instruct ReplI_reg(vec dst, rRegI src) %{ 3443 match(Set dst (ReplicateI src)); 3444 format %{ "replicateI $dst,$src" %} 3445 ins_encode %{ 3446 uint vlen = vector_length(this); 3447 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3448 int vlen_enc = vector_length_encoding(this); 3449 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 3450 } else { 3451 __ movdl($dst$$XMMRegister, $src$$Register); 3452 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3453 if (vlen >= 8) { 3454 assert(vlen == 8, "sanity"); 3455 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3456 } 3457 } 3458 %} 3459 ins_pipe( pipe_slow ); 3460 %} 3461 3462 instruct ReplI_mem(vec dst, memory mem) %{ 3463 match(Set dst (ReplicateI (LoadI mem))); 3464 format %{ "replicateI $dst,$mem" %} 3465 ins_encode %{ 3466 uint vlen = vector_length(this); 3467 if (vlen <= 4) { 3468 __ movdl($dst$$XMMRegister, $mem$$Address); 3469 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3470 } else { 3471 assert(VM_Version::supports_avx2(), "sanity"); 3472 int vector_len = vector_length_encoding(this); 3473 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3474 } 3475 %} 3476 ins_pipe( pipe_slow ); 3477 %} 3478 3479 instruct ReplI_imm(vec dst, immI con) %{ 3480 match(Set dst (ReplicateI con)); 3481 format %{ "replicateI $dst,$con" %} 3482 ins_encode %{ 3483 uint vlen = vector_length(this); 3484 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4)); 3485 if (vlen <= 4) { 3486 __ movq($dst$$XMMRegister, const_addr); 3487 if (vlen == 4) { 3488 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3489 } 3490 } else { 3491 assert(VM_Version::supports_avx2(), "sanity"); 3492 int vector_len = vector_length_encoding(this); 3493 __ movq($dst$$XMMRegister, const_addr); 3494 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3495 } 3496 %} 3497 ins_pipe( pipe_slow ); 3498 %} 3499 3500 // Replicate integer (4 byte) scalar zero to be vector 3501 instruct ReplI_zero(vec dst, immI0 zero) %{ 3502 match(Set dst (ReplicateI zero)); 3503 format %{ "replicateI $dst,$zero" %} 3504 ins_encode %{ 3505 uint vlen = vector_length(this); 3506 if (vlen <= 4) { 3507 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3508 } else { 3509 int vlen_enc = vector_length_encoding(this); 3510 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3511 } 3512 %} 3513 ins_pipe( fpu_reg_reg ); 3514 %} 3515 3516 instruct ReplI_M1(vec dst, immI_M1 con) %{ 3517 predicate(UseAVX > 0); 3518 match(Set dst (ReplicateB con)); 3519 match(Set dst (ReplicateS con)); 3520 match(Set dst (ReplicateI con)); 3521 effect(TEMP dst); 3522 format %{ "vallones $dst" %} 3523 ins_encode %{ 3524 int vector_len = vector_length_encoding(this); 3525 __ vallones($dst$$XMMRegister, vector_len); 3526 %} 3527 ins_pipe( pipe_slow ); 3528 %} 3529 3530 // ====================ReplicateL======================================= 3531 3532 #ifdef _LP64 3533 // Replicate long (8 byte) scalar to be vector 3534 instruct ReplL_reg(vec dst, rRegL src) %{ 3535 match(Set dst (ReplicateL src)); 3536 format %{ "replicateL $dst,$src" %} 3537 ins_encode %{ 3538 uint vlen = vector_length(this); 3539 if (vlen == 2) { 3540 __ movdq($dst$$XMMRegister, $src$$Register); 3541 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3542 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3543 int vlen_enc = vector_length_encoding(this); 3544 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 3545 } else { 3546 assert(vlen == 4, "sanity"); 3547 __ movdq($dst$$XMMRegister, $src$$Register); 3548 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3549 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3550 } 3551 %} 3552 ins_pipe( pipe_slow ); 3553 %} 3554 #else // _LP64 3555 // Replicate long (8 byte) scalar to be vector 3556 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 3557 predicate(n->as_Vector()->length() <= 4); 3558 match(Set dst (ReplicateL src)); 3559 effect(TEMP dst, USE src, TEMP tmp); 3560 format %{ "replicateL $dst,$src" %} 3561 ins_encode %{ 3562 uint vlen = vector_length(this); 3563 if (vlen == 2) { 3564 __ movdl($dst$$XMMRegister, $src$$Register); 3565 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3566 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3567 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3568 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3569 int vector_len = Assembler::AVX_256bit; 3570 __ movdl($dst$$XMMRegister, $src$$Register); 3571 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3572 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3573 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3574 } else { 3575 __ movdl($dst$$XMMRegister, $src$$Register); 3576 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3577 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3578 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3579 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3580 } 3581 %} 3582 ins_pipe( pipe_slow ); 3583 %} 3584 3585 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 3586 predicate(n->as_Vector()->length() == 8); 3587 match(Set dst (ReplicateL src)); 3588 effect(TEMP dst, USE src, TEMP tmp); 3589 format %{ "replicateL $dst,$src" %} 3590 ins_encode %{ 3591 if (VM_Version::supports_avx512vl()) { 3592 __ movdl($dst$$XMMRegister, $src$$Register); 3593 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3594 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3595 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3596 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3597 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3598 } else { 3599 int vector_len = Assembler::AVX_512bit; 3600 __ movdl($dst$$XMMRegister, $src$$Register); 3601 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3602 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3603 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3604 } 3605 %} 3606 ins_pipe( pipe_slow ); 3607 %} 3608 #endif // _LP64 3609 3610 instruct ReplL_mem(vec dst, memory mem) %{ 3611 match(Set dst (ReplicateL (LoadL mem))); 3612 format %{ "replicateL $dst,$mem" %} 3613 ins_encode %{ 3614 uint vlen = vector_length(this); 3615 if (vlen == 2) { 3616 __ movq($dst$$XMMRegister, $mem$$Address); 3617 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3618 } else { 3619 assert(VM_Version::supports_avx2(), "sanity"); 3620 int vlen_enc = vector_length_encoding(this); 3621 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 3622 } 3623 %} 3624 ins_pipe( pipe_slow ); 3625 %} 3626 3627 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3628 instruct ReplL_imm(vec dst, immL con) %{ 3629 match(Set dst (ReplicateL con)); 3630 format %{ "replicateL $dst,$con" %} 3631 ins_encode %{ 3632 uint vlen = vector_length(this); 3633 InternalAddress const_addr = $constantaddress($con); 3634 if (vlen == 2) { 3635 __ movq($dst$$XMMRegister, const_addr); 3636 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3637 } else { 3638 assert(VM_Version::supports_avx2(), "sanity"); 3639 int vlen_enc = vector_length_encoding(this); 3640 __ movq($dst$$XMMRegister, const_addr); 3641 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3642 } 3643 %} 3644 ins_pipe( pipe_slow ); 3645 %} 3646 3647 instruct ReplL_zero(vec dst, immL0 zero) %{ 3648 match(Set dst (ReplicateL zero)); 3649 format %{ "replicateL $dst,$zero" %} 3650 ins_encode %{ 3651 int vlen = vector_length(this); 3652 if (vlen == 2) { 3653 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3654 } else { 3655 int vlen_enc = vector_length_encoding(this); 3656 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3657 } 3658 %} 3659 ins_pipe( fpu_reg_reg ); 3660 %} 3661 3662 instruct ReplL_M1(vec dst, immL_M1 con) %{ 3663 predicate(UseAVX > 0); 3664 match(Set dst (ReplicateL con)); 3665 effect(TEMP dst); 3666 format %{ "vallones $dst" %} 3667 ins_encode %{ 3668 int vector_len = vector_length_encoding(this); 3669 __ vallones($dst$$XMMRegister, vector_len); 3670 %} 3671 ins_pipe( pipe_slow ); 3672 %} 3673 3674 // ====================ReplicateF======================================= 3675 3676 instruct ReplF_reg(vec dst, vlRegF src) %{ 3677 match(Set dst (ReplicateF src)); 3678 format %{ "replicateF $dst,$src" %} 3679 ins_encode %{ 3680 uint vlen = vector_length(this); 3681 if (vlen <= 4) { 3682 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3683 } else if (VM_Version::supports_avx2()) { 3684 int vector_len = vector_length_encoding(this); 3685 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2 3686 } else { 3687 assert(vlen == 8, "sanity"); 3688 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3689 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3690 } 3691 %} 3692 ins_pipe( pipe_slow ); 3693 %} 3694 3695 instruct ReplF_mem(vec dst, memory mem) %{ 3696 match(Set dst (ReplicateF (LoadF mem))); 3697 format %{ "replicateF $dst,$mem" %} 3698 ins_encode %{ 3699 uint vlen = vector_length(this); 3700 if (vlen <= 4) { 3701 __ movdl($dst$$XMMRegister, $mem$$Address); 3702 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3703 } else { 3704 assert(VM_Version::supports_avx(), "sanity"); 3705 int vector_len = vector_length_encoding(this); 3706 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 3707 } 3708 %} 3709 ins_pipe( pipe_slow ); 3710 %} 3711 3712 instruct ReplF_zero(vec dst, immF0 zero) %{ 3713 match(Set dst (ReplicateF zero)); 3714 format %{ "replicateF $dst,$zero" %} 3715 ins_encode %{ 3716 uint vlen = vector_length(this); 3717 if (vlen <= 4) { 3718 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3719 } else { 3720 int vlen_enc = vector_length_encoding(this); 3721 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 3722 } 3723 %} 3724 ins_pipe( fpu_reg_reg ); 3725 %} 3726 3727 // ====================ReplicateD======================================= 3728 3729 // Replicate double (8 bytes) scalar to be vector 3730 instruct ReplD_reg(vec dst, vlRegD src) %{ 3731 match(Set dst (ReplicateD src)); 3732 format %{ "replicateD $dst,$src" %} 3733 ins_encode %{ 3734 uint vlen = vector_length(this); 3735 if (vlen == 2) { 3736 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3737 } else if (VM_Version::supports_avx2()) { 3738 int vector_len = vector_length_encoding(this); 3739 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2 3740 } else { 3741 assert(vlen == 4, "sanity"); 3742 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3743 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3744 } 3745 %} 3746 ins_pipe( pipe_slow ); 3747 %} 3748 3749 instruct ReplD_mem(vec dst, memory mem) %{ 3750 match(Set dst (ReplicateD (LoadD mem))); 3751 format %{ "replicateD $dst,$mem" %} 3752 ins_encode %{ 3753 uint vlen = vector_length(this); 3754 if (vlen == 2) { 3755 __ movq($dst$$XMMRegister, $mem$$Address); 3756 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 3757 } else { 3758 assert(VM_Version::supports_avx(), "sanity"); 3759 int vector_len = vector_length_encoding(this); 3760 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 3761 } 3762 %} 3763 ins_pipe( pipe_slow ); 3764 %} 3765 3766 instruct ReplD_zero(vec dst, immD0 zero) %{ 3767 match(Set dst (ReplicateD zero)); 3768 format %{ "replicateD $dst,$zero" %} 3769 ins_encode %{ 3770 uint vlen = vector_length(this); 3771 if (vlen == 2) { 3772 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3773 } else { 3774 int vlen_enc = vector_length_encoding(this); 3775 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 3776 } 3777 %} 3778 ins_pipe( fpu_reg_reg ); 3779 %} 3780 3781 // ====================REDUCTION ARITHMETIC======================================= 3782 // =======================Int Reduction========================================== 3783 3784 instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 3785 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT && 3786 n->in(2)->bottom_type()->is_vect()->length() < 16); 3787 match(Set dst (AddReductionVI src1 src2)); 3788 match(Set dst (MulReductionVI src1 src2)); 3789 match(Set dst (AndReductionV src1 src2)); 3790 match(Set dst ( OrReductionV src1 src2)); 3791 match(Set dst (XorReductionV src1 src2)); 3792 effect(TEMP vtmp1, TEMP vtmp2); 3793 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3794 ins_encode %{ 3795 int opcode = this->ideal_Opcode(); 3796 int vlen = vector_length(this, $src2); 3797 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3798 %} 3799 ins_pipe( pipe_slow ); 3800 %} 3801 3802 instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 3803 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT && 3804 n->in(2)->bottom_type()->is_vect()->length() == 16); 3805 match(Set dst (AddReductionVI src1 src2)); 3806 match(Set dst (MulReductionVI src1 src2)); 3807 match(Set dst (AndReductionV src1 src2)); 3808 match(Set dst ( OrReductionV src1 src2)); 3809 match(Set dst (XorReductionV src1 src2)); 3810 effect(TEMP vtmp1, TEMP vtmp2); 3811 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3812 ins_encode %{ 3813 int opcode = this->ideal_Opcode(); 3814 int vlen = vector_length(this, $src2); 3815 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 // =======================Long Reduction========================================== 3821 3822 #ifdef _LP64 3823 instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 3824 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && 3825 n->in(2)->bottom_type()->is_vect()->length() < 8); 3826 match(Set dst (AddReductionVL src1 src2)); 3827 match(Set dst (MulReductionVL src1 src2)); 3828 match(Set dst (AndReductionV src1 src2)); 3829 match(Set dst ( OrReductionV src1 src2)); 3830 match(Set dst (XorReductionV src1 src2)); 3831 effect(TEMP vtmp1, TEMP vtmp2); 3832 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3833 ins_encode %{ 3834 int opcode = this->ideal_Opcode(); 3835 int vlen = vector_length(this, $src2); 3836 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3837 %} 3838 ins_pipe( pipe_slow ); 3839 %} 3840 3841 instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 3842 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && 3843 n->in(2)->bottom_type()->is_vect()->length() == 8); 3844 match(Set dst (AddReductionVL src1 src2)); 3845 match(Set dst (MulReductionVL src1 src2)); 3846 match(Set dst (AndReductionV src1 src2)); 3847 match(Set dst ( OrReductionV src1 src2)); 3848 match(Set dst (XorReductionV src1 src2)); 3849 effect(TEMP vtmp1, TEMP vtmp2); 3850 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3851 ins_encode %{ 3852 int opcode = this->ideal_Opcode(); 3853 int vlen = vector_length(this, $src2); 3854 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3855 %} 3856 ins_pipe( pipe_slow ); 3857 %} 3858 #endif // _LP64 3859 3860 // =======================Float Reduction========================================== 3861 3862 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 3863 predicate(n->in(2)->bottom_type()->is_vect()->length() <= 4); 3864 match(Set dst (AddReductionVF dst src)); 3865 match(Set dst (MulReductionVF dst src)); 3866 effect(TEMP dst, TEMP vtmp); 3867 format %{ "vector_reduction_fp $dst,$src ; using $vtmp as TEMP" %} 3868 ins_encode %{ 3869 int opcode = this->ideal_Opcode(); 3870 int vlen = vector_length(this, $src); 3871 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 3872 %} 3873 ins_pipe( pipe_slow ); 3874 %} 3875 3876 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 3877 predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); 3878 match(Set dst (AddReductionVF dst src)); 3879 match(Set dst (MulReductionVF dst src)); 3880 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3881 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3882 ins_encode %{ 3883 int opcode = this->ideal_Opcode(); 3884 int vlen = vector_length(this, $src); 3885 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3886 %} 3887 ins_pipe( pipe_slow ); 3888 %} 3889 3890 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 3891 predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); 3892 match(Set dst (AddReductionVF dst src)); 3893 match(Set dst (MulReductionVF dst src)); 3894 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3895 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3896 ins_encode %{ 3897 int opcode = this->ideal_Opcode(); 3898 int vlen = vector_length(this, $src); 3899 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3900 %} 3901 ins_pipe( pipe_slow ); 3902 %} 3903 3904 // =======================Double Reduction========================================== 3905 3906 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 3907 predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); 3908 match(Set dst (AddReductionVD dst src)); 3909 match(Set dst (MulReductionVD dst src)); 3910 effect(TEMP dst, TEMP vtmp); 3911 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 3912 ins_encode %{ 3913 int opcode = this->ideal_Opcode(); 3914 int vlen = vector_length(this, $src); 3915 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 3916 %} 3917 ins_pipe( pipe_slow ); 3918 %} 3919 3920 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 3921 predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); 3922 match(Set dst (AddReductionVD dst src)); 3923 match(Set dst (MulReductionVD dst src)); 3924 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3925 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3926 ins_encode %{ 3927 int opcode = this->ideal_Opcode(); 3928 int vlen = vector_length(this, $src); 3929 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3930 %} 3931 ins_pipe( pipe_slow ); 3932 %} 3933 3934 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 3935 predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); 3936 match(Set dst (AddReductionVD dst src)); 3937 match(Set dst (MulReductionVD dst src)); 3938 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3939 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3940 ins_encode %{ 3941 int opcode = this->ideal_Opcode(); 3942 int vlen = vector_length(this, $src); 3943 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3944 %} 3945 ins_pipe( pipe_slow ); 3946 %} 3947 3948 // ====================VECTOR ARITHMETIC======================================= 3949 3950 // --------------------------------- ADD -------------------------------------- 3951 3952 // Bytes vector add 3953 instruct vaddB(vec dst, vec src) %{ 3954 predicate(UseAVX == 0); 3955 match(Set dst (AddVB dst src)); 3956 format %{ "paddb $dst,$src\t! add packedB" %} 3957 ins_encode %{ 3958 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3959 %} 3960 ins_pipe( pipe_slow ); 3961 %} 3962 3963 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 3964 predicate(UseAVX > 0); 3965 match(Set dst (AddVB src1 src2)); 3966 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 3967 ins_encode %{ 3968 int vector_len = vector_length_encoding(this); 3969 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3970 %} 3971 ins_pipe( pipe_slow ); 3972 %} 3973 3974 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 3975 predicate(UseAVX > 0); 3976 match(Set dst (AddVB src (LoadVector mem))); 3977 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 3978 ins_encode %{ 3979 int vector_len = vector_length_encoding(this); 3980 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3981 %} 3982 ins_pipe( pipe_slow ); 3983 %} 3984 3985 // Shorts/Chars vector add 3986 instruct vaddS(vec dst, vec src) %{ 3987 predicate(UseAVX == 0); 3988 match(Set dst (AddVS dst src)); 3989 format %{ "paddw $dst,$src\t! add packedS" %} 3990 ins_encode %{ 3991 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 3992 %} 3993 ins_pipe( pipe_slow ); 3994 %} 3995 3996 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 3997 predicate(UseAVX > 0); 3998 match(Set dst (AddVS src1 src2)); 3999 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 4000 ins_encode %{ 4001 int vector_len = vector_length_encoding(this); 4002 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4003 %} 4004 ins_pipe( pipe_slow ); 4005 %} 4006 4007 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 4008 predicate(UseAVX > 0); 4009 match(Set dst (AddVS src (LoadVector mem))); 4010 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 4011 ins_encode %{ 4012 int vector_len = vector_length_encoding(this); 4013 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4014 %} 4015 ins_pipe( pipe_slow ); 4016 %} 4017 4018 // Integers vector add 4019 instruct vaddI(vec dst, vec src) %{ 4020 predicate(UseAVX == 0); 4021 match(Set dst (AddVI dst src)); 4022 format %{ "paddd $dst,$src\t! add packedI" %} 4023 ins_encode %{ 4024 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 4025 %} 4026 ins_pipe( pipe_slow ); 4027 %} 4028 4029 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 4030 predicate(UseAVX > 0); 4031 match(Set dst (AddVI src1 src2)); 4032 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 4033 ins_encode %{ 4034 int vector_len = vector_length_encoding(this); 4035 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4036 %} 4037 ins_pipe( pipe_slow ); 4038 %} 4039 4040 4041 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 4042 predicate(UseAVX > 0); 4043 match(Set dst (AddVI src (LoadVector mem))); 4044 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 4045 ins_encode %{ 4046 int vector_len = vector_length_encoding(this); 4047 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4048 %} 4049 ins_pipe( pipe_slow ); 4050 %} 4051 4052 // Longs vector add 4053 instruct vaddL(vec dst, vec src) %{ 4054 predicate(UseAVX == 0); 4055 match(Set dst (AddVL dst src)); 4056 format %{ "paddq $dst,$src\t! add packedL" %} 4057 ins_encode %{ 4058 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 4059 %} 4060 ins_pipe( pipe_slow ); 4061 %} 4062 4063 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 4064 predicate(UseAVX > 0); 4065 match(Set dst (AddVL src1 src2)); 4066 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 4067 ins_encode %{ 4068 int vector_len = vector_length_encoding(this); 4069 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4070 %} 4071 ins_pipe( pipe_slow ); 4072 %} 4073 4074 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 4075 predicate(UseAVX > 0); 4076 match(Set dst (AddVL src (LoadVector mem))); 4077 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 4078 ins_encode %{ 4079 int vector_len = vector_length_encoding(this); 4080 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4081 %} 4082 ins_pipe( pipe_slow ); 4083 %} 4084 4085 // Floats vector add 4086 instruct vaddF(vec dst, vec src) %{ 4087 predicate(UseAVX == 0); 4088 match(Set dst (AddVF dst src)); 4089 format %{ "addps $dst,$src\t! add packedF" %} 4090 ins_encode %{ 4091 __ addps($dst$$XMMRegister, $src$$XMMRegister); 4092 %} 4093 ins_pipe( pipe_slow ); 4094 %} 4095 4096 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 4097 predicate(UseAVX > 0); 4098 match(Set dst (AddVF src1 src2)); 4099 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 4100 ins_encode %{ 4101 int vector_len = vector_length_encoding(this); 4102 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4103 %} 4104 ins_pipe( pipe_slow ); 4105 %} 4106 4107 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 4108 predicate(UseAVX > 0); 4109 match(Set dst (AddVF src (LoadVector mem))); 4110 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 4111 ins_encode %{ 4112 int vector_len = vector_length_encoding(this); 4113 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4114 %} 4115 ins_pipe( pipe_slow ); 4116 %} 4117 4118 // Doubles vector add 4119 instruct vaddD(vec dst, vec src) %{ 4120 predicate(UseAVX == 0); 4121 match(Set dst (AddVD dst src)); 4122 format %{ "addpd $dst,$src\t! add packedD" %} 4123 ins_encode %{ 4124 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 4125 %} 4126 ins_pipe( pipe_slow ); 4127 %} 4128 4129 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 4130 predicate(UseAVX > 0); 4131 match(Set dst (AddVD src1 src2)); 4132 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 4133 ins_encode %{ 4134 int vector_len = vector_length_encoding(this); 4135 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4136 %} 4137 ins_pipe( pipe_slow ); 4138 %} 4139 4140 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 4141 predicate(UseAVX > 0); 4142 match(Set dst (AddVD src (LoadVector mem))); 4143 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 4144 ins_encode %{ 4145 int vector_len = vector_length_encoding(this); 4146 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4147 %} 4148 ins_pipe( pipe_slow ); 4149 %} 4150 4151 // --------------------------------- SUB -------------------------------------- 4152 4153 // Bytes vector sub 4154 instruct vsubB(vec dst, vec src) %{ 4155 predicate(UseAVX == 0); 4156 match(Set dst (SubVB dst src)); 4157 format %{ "psubb $dst,$src\t! sub packedB" %} 4158 ins_encode %{ 4159 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 4164 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 4165 predicate(UseAVX > 0); 4166 match(Set dst (SubVB src1 src2)); 4167 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 4168 ins_encode %{ 4169 int vector_len = vector_length_encoding(this); 4170 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4171 %} 4172 ins_pipe( pipe_slow ); 4173 %} 4174 4175 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 4176 predicate(UseAVX > 0); 4177 match(Set dst (SubVB src (LoadVector mem))); 4178 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 4179 ins_encode %{ 4180 int vector_len = vector_length_encoding(this); 4181 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4182 %} 4183 ins_pipe( pipe_slow ); 4184 %} 4185 4186 // Shorts/Chars vector sub 4187 instruct vsubS(vec dst, vec src) %{ 4188 predicate(UseAVX == 0); 4189 match(Set dst (SubVS dst src)); 4190 format %{ "psubw $dst,$src\t! sub packedS" %} 4191 ins_encode %{ 4192 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 4198 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 4199 predicate(UseAVX > 0); 4200 match(Set dst (SubVS src1 src2)); 4201 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 4202 ins_encode %{ 4203 int vector_len = vector_length_encoding(this); 4204 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4205 %} 4206 ins_pipe( pipe_slow ); 4207 %} 4208 4209 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 4210 predicate(UseAVX > 0); 4211 match(Set dst (SubVS src (LoadVector mem))); 4212 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 4213 ins_encode %{ 4214 int vector_len = vector_length_encoding(this); 4215 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4216 %} 4217 ins_pipe( pipe_slow ); 4218 %} 4219 4220 // Integers vector sub 4221 instruct vsubI(vec dst, vec src) %{ 4222 predicate(UseAVX == 0); 4223 match(Set dst (SubVI dst src)); 4224 format %{ "psubd $dst,$src\t! sub packedI" %} 4225 ins_encode %{ 4226 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 4227 %} 4228 ins_pipe( pipe_slow ); 4229 %} 4230 4231 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 4232 predicate(UseAVX > 0); 4233 match(Set dst (SubVI src1 src2)); 4234 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 4235 ins_encode %{ 4236 int vector_len = vector_length_encoding(this); 4237 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4238 %} 4239 ins_pipe( pipe_slow ); 4240 %} 4241 4242 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 4243 predicate(UseAVX > 0); 4244 match(Set dst (SubVI src (LoadVector mem))); 4245 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 4246 ins_encode %{ 4247 int vector_len = vector_length_encoding(this); 4248 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4249 %} 4250 ins_pipe( pipe_slow ); 4251 %} 4252 4253 // Longs vector sub 4254 instruct vsubL(vec dst, vec src) %{ 4255 predicate(UseAVX == 0); 4256 match(Set dst (SubVL dst src)); 4257 format %{ "psubq $dst,$src\t! sub packedL" %} 4258 ins_encode %{ 4259 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 4260 %} 4261 ins_pipe( pipe_slow ); 4262 %} 4263 4264 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 4265 predicate(UseAVX > 0); 4266 match(Set dst (SubVL src1 src2)); 4267 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 4268 ins_encode %{ 4269 int vector_len = vector_length_encoding(this); 4270 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4271 %} 4272 ins_pipe( pipe_slow ); 4273 %} 4274 4275 4276 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 4277 predicate(UseAVX > 0); 4278 match(Set dst (SubVL src (LoadVector mem))); 4279 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 4280 ins_encode %{ 4281 int vector_len = vector_length_encoding(this); 4282 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4283 %} 4284 ins_pipe( pipe_slow ); 4285 %} 4286 4287 // Floats vector sub 4288 instruct vsubF(vec dst, vec src) %{ 4289 predicate(UseAVX == 0); 4290 match(Set dst (SubVF dst src)); 4291 format %{ "subps $dst,$src\t! sub packedF" %} 4292 ins_encode %{ 4293 __ subps($dst$$XMMRegister, $src$$XMMRegister); 4294 %} 4295 ins_pipe( pipe_slow ); 4296 %} 4297 4298 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 4299 predicate(UseAVX > 0); 4300 match(Set dst (SubVF src1 src2)); 4301 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 4302 ins_encode %{ 4303 int vector_len = vector_length_encoding(this); 4304 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4305 %} 4306 ins_pipe( pipe_slow ); 4307 %} 4308 4309 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 4310 predicate(UseAVX > 0); 4311 match(Set dst (SubVF src (LoadVector mem))); 4312 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 4313 ins_encode %{ 4314 int vector_len = vector_length_encoding(this); 4315 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4316 %} 4317 ins_pipe( pipe_slow ); 4318 %} 4319 4320 // Doubles vector sub 4321 instruct vsubD(vec dst, vec src) %{ 4322 predicate(UseAVX == 0); 4323 match(Set dst (SubVD dst src)); 4324 format %{ "subpd $dst,$src\t! sub packedD" %} 4325 ins_encode %{ 4326 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 4327 %} 4328 ins_pipe( pipe_slow ); 4329 %} 4330 4331 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 4332 predicate(UseAVX > 0); 4333 match(Set dst (SubVD src1 src2)); 4334 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 4335 ins_encode %{ 4336 int vector_len = vector_length_encoding(this); 4337 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4338 %} 4339 ins_pipe( pipe_slow ); 4340 %} 4341 4342 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 4343 predicate(UseAVX > 0); 4344 match(Set dst (SubVD src (LoadVector mem))); 4345 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 4346 ins_encode %{ 4347 int vector_len = vector_length_encoding(this); 4348 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4349 %} 4350 ins_pipe( pipe_slow ); 4351 %} 4352 4353 // --------------------------------- MUL -------------------------------------- 4354 4355 // Byte vector mul 4356 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 4357 predicate(n->as_Vector()->length() == 4 || 4358 n->as_Vector()->length() == 8); 4359 match(Set dst (MulVB src1 src2)); 4360 effect(TEMP dst, TEMP tmp, TEMP scratch); 4361 format %{"vector_mulB $dst,$src1,$src2" %} 4362 ins_encode %{ 4363 assert(UseSSE > 3, "required"); 4364 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 4365 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 4366 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 4367 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4368 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 4369 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 4370 %} 4371 ins_pipe( pipe_slow ); 4372 %} 4373 4374 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4375 predicate(n->as_Vector()->length() == 16 && UseAVX <= 1); 4376 match(Set dst (MulVB src1 src2)); 4377 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4378 format %{"vector_mulB $dst,$src1,$src2" %} 4379 ins_encode %{ 4380 assert(UseSSE > 3, "required"); 4381 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 4382 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 4383 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 4384 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 4385 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 4386 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4387 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 4388 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 4389 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4390 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 4391 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 4392 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 4393 %} 4394 ins_pipe( pipe_slow ); 4395 %} 4396 4397 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 4398 predicate(n->as_Vector()->length() == 16 && UseAVX > 1); 4399 match(Set dst (MulVB src1 src2)); 4400 effect(TEMP dst, TEMP tmp, TEMP scratch); 4401 format %{"vector_mulB $dst,$src1,$src2" %} 4402 ins_encode %{ 4403 int vector_len = Assembler::AVX_256bit; 4404 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 4405 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4406 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); 4407 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4408 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 4409 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 4410 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 4411 %} 4412 ins_pipe( pipe_slow ); 4413 %} 4414 4415 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4416 predicate(n->as_Vector()->length() == 32); 4417 match(Set dst (MulVB src1 src2)); 4418 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4419 format %{"vector_mulB $dst,$src1,$src2" %} 4420 ins_encode %{ 4421 assert(UseAVX > 1, "required"); 4422 int vector_len = Assembler::AVX_256bit; 4423 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 4424 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 4425 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4426 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4427 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4428 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 4429 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4430 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4431 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4432 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4433 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4434 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4435 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4436 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 4437 %} 4438 ins_pipe( pipe_slow ); 4439 %} 4440 4441 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4442 predicate(n->as_Vector()->length() == 64); 4443 match(Set dst (MulVB src1 src2)); 4444 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4445 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 4446 ins_encode %{ 4447 assert(UseAVX > 2, "required"); 4448 int vector_len = Assembler::AVX_512bit; 4449 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 4450 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 4451 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4452 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4453 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4454 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 4455 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4456 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4457 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4458 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4459 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4460 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4461 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4462 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 4463 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4464 %} 4465 ins_pipe( pipe_slow ); 4466 %} 4467 4468 // Shorts/Chars vector mul 4469 instruct vmulS(vec dst, vec src) %{ 4470 predicate(UseAVX == 0); 4471 match(Set dst (MulVS dst src)); 4472 format %{ "pmullw $dst,$src\t! mul packedS" %} 4473 ins_encode %{ 4474 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4475 %} 4476 ins_pipe( pipe_slow ); 4477 %} 4478 4479 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 4480 predicate(UseAVX > 0); 4481 match(Set dst (MulVS src1 src2)); 4482 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 4483 ins_encode %{ 4484 int vector_len = vector_length_encoding(this); 4485 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4486 %} 4487 ins_pipe( pipe_slow ); 4488 %} 4489 4490 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 4491 predicate(UseAVX > 0); 4492 match(Set dst (MulVS src (LoadVector mem))); 4493 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 4494 ins_encode %{ 4495 int vector_len = vector_length_encoding(this); 4496 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4497 %} 4498 ins_pipe( pipe_slow ); 4499 %} 4500 4501 // Integers vector mul 4502 instruct vmulI(vec dst, vec src) %{ 4503 predicate(UseAVX == 0); 4504 match(Set dst (MulVI dst src)); 4505 format %{ "pmulld $dst,$src\t! mul packedI" %} 4506 ins_encode %{ 4507 assert(UseSSE > 3, "required"); 4508 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 4509 %} 4510 ins_pipe( pipe_slow ); 4511 %} 4512 4513 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 4514 predicate(UseAVX > 0); 4515 match(Set dst (MulVI src1 src2)); 4516 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 4517 ins_encode %{ 4518 int vector_len = vector_length_encoding(this); 4519 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4520 %} 4521 ins_pipe( pipe_slow ); 4522 %} 4523 4524 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 4525 predicate(UseAVX > 0); 4526 match(Set dst (MulVI src (LoadVector mem))); 4527 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 4528 ins_encode %{ 4529 int vector_len = vector_length_encoding(this); 4530 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4531 %} 4532 ins_pipe( pipe_slow ); 4533 %} 4534 4535 // Longs vector mul 4536 instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 4537 match(Set dst (MulVL src1 src2)); 4538 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 4539 ins_encode %{ 4540 assert(UseAVX > 2, "required"); 4541 int vector_len = vector_length_encoding(this); 4542 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4543 %} 4544 ins_pipe( pipe_slow ); 4545 %} 4546 4547 instruct vmulL_mem(vec dst, vec src, memory mem) %{ 4548 match(Set dst (MulVL src (LoadVector mem))); 4549 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 4550 ins_encode %{ 4551 assert(UseAVX > 2, "required"); 4552 int vector_len = vector_length_encoding(this); 4553 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4554 %} 4555 ins_pipe( pipe_slow ); 4556 %} 4557 4558 // Floats vector mul 4559 instruct vmulF(vec dst, vec src) %{ 4560 predicate(UseAVX == 0); 4561 match(Set dst (MulVF dst src)); 4562 format %{ "mulps $dst,$src\t! mul packedF" %} 4563 ins_encode %{ 4564 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 4565 %} 4566 ins_pipe( pipe_slow ); 4567 %} 4568 4569 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 4570 predicate(UseAVX > 0); 4571 match(Set dst (MulVF src1 src2)); 4572 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 4573 ins_encode %{ 4574 int vector_len = vector_length_encoding(this); 4575 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4576 %} 4577 ins_pipe( pipe_slow ); 4578 %} 4579 4580 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 4581 predicate(UseAVX > 0); 4582 match(Set dst (MulVF src (LoadVector mem))); 4583 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 4584 ins_encode %{ 4585 int vector_len = vector_length_encoding(this); 4586 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4587 %} 4588 ins_pipe( pipe_slow ); 4589 %} 4590 4591 // Doubles vector mul 4592 instruct vmulD(vec dst, vec src) %{ 4593 predicate(UseAVX == 0); 4594 match(Set dst (MulVD dst src)); 4595 format %{ "mulpd $dst,$src\t! mul packedD" %} 4596 ins_encode %{ 4597 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 4603 predicate(UseAVX > 0); 4604 match(Set dst (MulVD src1 src2)); 4605 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 4606 ins_encode %{ 4607 int vector_len = vector_length_encoding(this); 4608 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4609 %} 4610 ins_pipe( pipe_slow ); 4611 %} 4612 4613 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 4614 predicate(UseAVX > 0); 4615 match(Set dst (MulVD src (LoadVector mem))); 4616 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 4617 ins_encode %{ 4618 int vector_len = vector_length_encoding(this); 4619 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4620 %} 4621 ins_pipe( pipe_slow ); 4622 %} 4623 4624 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 4625 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4626 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 4627 effect(TEMP dst, USE src1, USE src2); 4628 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 4629 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 4630 %} 4631 ins_encode %{ 4632 int vector_len = 1; 4633 int cond = (Assembler::Condition)($copnd$$cmpcode); 4634 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 4635 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 4636 %} 4637 ins_pipe( pipe_slow ); 4638 %} 4639 4640 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 4641 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4642 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 4643 effect(TEMP dst, USE src1, USE src2); 4644 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 4645 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 4646 %} 4647 ins_encode %{ 4648 int vector_len = 1; 4649 int cond = (Assembler::Condition)($copnd$$cmpcode); 4650 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 4651 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 4652 %} 4653 ins_pipe( pipe_slow ); 4654 %} 4655 4656 // --------------------------------- DIV -------------------------------------- 4657 4658 // Floats vector div 4659 instruct vdivF(vec dst, vec src) %{ 4660 predicate(UseAVX == 0); 4661 match(Set dst (DivVF dst src)); 4662 format %{ "divps $dst,$src\t! div packedF" %} 4663 ins_encode %{ 4664 __ divps($dst$$XMMRegister, $src$$XMMRegister); 4665 %} 4666 ins_pipe( pipe_slow ); 4667 %} 4668 4669 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 4670 predicate(UseAVX > 0); 4671 match(Set dst (DivVF src1 src2)); 4672 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 4673 ins_encode %{ 4674 int vector_len = vector_length_encoding(this); 4675 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4676 %} 4677 ins_pipe( pipe_slow ); 4678 %} 4679 4680 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 4681 predicate(UseAVX > 0); 4682 match(Set dst (DivVF src (LoadVector mem))); 4683 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 4684 ins_encode %{ 4685 int vector_len = vector_length_encoding(this); 4686 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4687 %} 4688 ins_pipe( pipe_slow ); 4689 %} 4690 4691 // Doubles vector div 4692 instruct vdivD(vec dst, vec src) %{ 4693 predicate(UseAVX == 0); 4694 match(Set dst (DivVD dst src)); 4695 format %{ "divpd $dst,$src\t! div packedD" %} 4696 ins_encode %{ 4697 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 4698 %} 4699 ins_pipe( pipe_slow ); 4700 %} 4701 4702 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 4703 predicate(UseAVX > 0); 4704 match(Set dst (DivVD src1 src2)); 4705 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 4706 ins_encode %{ 4707 int vector_len = vector_length_encoding(this); 4708 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4709 %} 4710 ins_pipe( pipe_slow ); 4711 %} 4712 4713 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 4714 predicate(UseAVX > 0); 4715 match(Set dst (DivVD src (LoadVector mem))); 4716 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 4717 ins_encode %{ 4718 int vector_len = vector_length_encoding(this); 4719 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4720 %} 4721 ins_pipe( pipe_slow ); 4722 %} 4723 4724 // --------------------------------- Sqrt -------------------------------------- 4725 4726 instruct vsqrtF_reg(vec dst, vec src) %{ 4727 match(Set dst (SqrtVF src)); 4728 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 4729 ins_encode %{ 4730 assert(UseAVX > 0, "required"); 4731 int vector_len = vector_length_encoding(this); 4732 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct vsqrtF_mem(vec dst, memory mem) %{ 4738 match(Set dst (SqrtVF (LoadVector mem))); 4739 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 4740 ins_encode %{ 4741 assert(UseAVX > 0, "required"); 4742 int vector_len = vector_length_encoding(this); 4743 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 4744 %} 4745 ins_pipe( pipe_slow ); 4746 %} 4747 4748 // Floating point vector sqrt 4749 instruct vsqrtD_reg(vec dst, vec src) %{ 4750 match(Set dst (SqrtVD src)); 4751 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 4752 ins_encode %{ 4753 assert(UseAVX > 0, "required"); 4754 int vector_len = vector_length_encoding(this); 4755 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4756 %} 4757 ins_pipe( pipe_slow ); 4758 %} 4759 4760 instruct vsqrtD_mem(vec dst, memory mem) %{ 4761 match(Set dst (SqrtVD (LoadVector mem))); 4762 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 4763 ins_encode %{ 4764 assert(UseAVX > 0, "required"); 4765 int vector_len = vector_length_encoding(this); 4766 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 4767 %} 4768 ins_pipe( pipe_slow ); 4769 %} 4770 4771 // ------------------------------ Shift --------------------------------------- 4772 4773 // Left and right shift count vectors are the same on x86 4774 // (only lowest bits of xmm reg are used for count). 4775 instruct vshiftcnt(vec dst, rRegI cnt) %{ 4776 match(Set dst (LShiftCntV cnt)); 4777 match(Set dst (RShiftCntV cnt)); 4778 format %{ "movdl $dst,$cnt\t! load shift count" %} 4779 ins_encode %{ 4780 __ movdl($dst$$XMMRegister, $cnt$$Register); 4781 %} 4782 ins_pipe( pipe_slow ); 4783 %} 4784 4785 // Byte vector shift 4786 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4787 predicate(n->as_Vector()->length() <= 8); 4788 match(Set dst (LShiftVB src shift)); 4789 match(Set dst (RShiftVB src shift)); 4790 match(Set dst (URShiftVB src shift)); 4791 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 4792 format %{"vector_byte_shift $dst,$src,$shift" %} 4793 ins_encode %{ 4794 assert(UseSSE > 3, "required"); 4795 int opcode = this->ideal_Opcode(); 4796 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 4797 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 4798 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4799 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 4800 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 4801 %} 4802 ins_pipe( pipe_slow ); 4803 %} 4804 4805 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 4806 predicate(n->as_Vector()->length() == 16 && UseAVX <= 1); 4807 match(Set dst (LShiftVB src shift)); 4808 match(Set dst (RShiftVB src shift)); 4809 match(Set dst (URShiftVB src shift)); 4810 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 4811 format %{"vector_byte_shift $dst,$src,$shift" %} 4812 ins_encode %{ 4813 assert(UseSSE > 3, "required"); 4814 int opcode = this->ideal_Opcode(); 4815 4816 __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister); 4817 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 4818 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 4819 __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 4820 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 4821 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4822 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 4823 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 4824 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 4825 %} 4826 ins_pipe( pipe_slow ); 4827 %} 4828 4829 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4830 predicate(n->as_Vector()->length() == 16 && UseAVX > 1); 4831 match(Set dst (LShiftVB src shift)); 4832 match(Set dst (RShiftVB src shift)); 4833 match(Set dst (URShiftVB src shift)); 4834 effect(TEMP dst, TEMP tmp, TEMP scratch); 4835 format %{"vector_byte_shift $dst,$src,$shift" %} 4836 ins_encode %{ 4837 int opcode = this->ideal_Opcode(); 4838 int vector_len = Assembler::AVX_256bit; 4839 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 4840 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 4841 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4842 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 4843 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 4844 %} 4845 ins_pipe( pipe_slow ); 4846 %} 4847 4848 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4849 predicate(n->as_Vector()->length() == 32); 4850 match(Set dst (LShiftVB src shift)); 4851 match(Set dst (RShiftVB src shift)); 4852 match(Set dst (URShiftVB src shift)); 4853 effect(TEMP dst, TEMP tmp, TEMP scratch); 4854 format %{"vector_byte_shift $dst,$src,$shift" %} 4855 ins_encode %{ 4856 assert(UseAVX > 1, "required"); 4857 int opcode = this->ideal_Opcode(); 4858 int vector_len = Assembler::AVX_256bit; 4859 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 4860 __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4861 __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 4862 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 4863 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len); 4864 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4865 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4866 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 4867 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 4868 %} 4869 ins_pipe( pipe_slow ); 4870 %} 4871 4872 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 4873 predicate(n->as_Vector()->length() == 64); 4874 match(Set dst (LShiftVB src shift)); 4875 match(Set dst (RShiftVB src shift)); 4876 match(Set dst (URShiftVB src shift)); 4877 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4878 format %{"vector_byte_shift $dst,$src,$shift" %} 4879 ins_encode %{ 4880 assert(UseAVX > 2, "required"); 4881 int opcode = this->ideal_Opcode(); 4882 int vector_len = Assembler::AVX_512bit; 4883 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 4884 __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4885 __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len); 4886 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len); 4887 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); 4888 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4889 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4890 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4891 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4892 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4893 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 4894 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4895 %} 4896 ins_pipe( pipe_slow ); 4897 %} 4898 4899 // Shorts vector logical right shift produces incorrect Java result 4900 // for negative data because java code convert short value into int with 4901 // sign extension before a shift. But char vectors are fine since chars are 4902 // unsigned values. 4903 // Shorts/Chars vector left shift 4904 instruct vshiftS(vec dst, vec src, vec shift) %{ 4905 match(Set dst (LShiftVS src shift)); 4906 match(Set dst (RShiftVS src shift)); 4907 match(Set dst (URShiftVS src shift)); 4908 effect(TEMP dst, USE src, USE shift); 4909 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 4910 ins_encode %{ 4911 int opcode = this->ideal_Opcode(); 4912 if (UseAVX > 0) { 4913 int vlen_enc = vector_length_encoding(this); 4914 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 4915 } else { 4916 int vlen = vector_length(this); 4917 if (vlen == 2) { 4918 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 4919 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4920 } else if (vlen == 4) { 4921 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 4922 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4923 } else { 4924 assert (vlen == 8, "sanity"); 4925 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4926 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4927 } 4928 } 4929 %} 4930 ins_pipe( pipe_slow ); 4931 %} 4932 4933 // Integers vector left shift 4934 instruct vshiftI(vec dst, vec src, vec shift) %{ 4935 match(Set dst (LShiftVI src shift)); 4936 match(Set dst (RShiftVI src shift)); 4937 match(Set dst (URShiftVI src shift)); 4938 effect(TEMP dst, USE src, USE shift); 4939 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 4940 ins_encode %{ 4941 int opcode = this->ideal_Opcode(); 4942 if (UseAVX > 0) { 4943 int vector_len = vector_length_encoding(this); 4944 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 4945 } else { 4946 int vlen = vector_length(this); 4947 if (vlen == 2) { 4948 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 4949 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4950 } else { 4951 assert(vlen == 4, "sanity"); 4952 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4953 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4954 } 4955 } 4956 %} 4957 ins_pipe( pipe_slow ); 4958 %} 4959 4960 // Integers vector left constant shift 4961 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 4962 match(Set dst (LShiftVI src (LShiftCntV shift))); 4963 match(Set dst (RShiftVI src (RShiftCntV shift))); 4964 match(Set dst (URShiftVI src (RShiftCntV shift))); 4965 effect(TEMP dst, USE src); 4966 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 4967 ins_encode %{ 4968 int opcode = this->ideal_Opcode(); 4969 if (UseAVX > 0) { 4970 int vector_len = vector_length_encoding(this); 4971 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 4972 } else { 4973 int vlen = vector_length(this); 4974 if (vlen == 2) { 4975 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 4976 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 4977 } else { 4978 assert(vlen == 4, "sanity"); 4979 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4980 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 4981 } 4982 } 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 4987 // Longs vector shift 4988 instruct vshiftL(vec dst, vec src, vec shift) %{ 4989 match(Set dst (LShiftVL src shift)); 4990 match(Set dst (URShiftVL src shift)); 4991 effect(TEMP dst, USE src, USE shift); 4992 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 4993 ins_encode %{ 4994 int opcode = this->ideal_Opcode(); 4995 if (UseAVX > 0) { 4996 int vector_len = vector_length_encoding(this); 4997 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 4998 } else { 4999 assert(vector_length(this) == 2, ""); 5000 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5001 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 5002 } 5003 %} 5004 ins_pipe( pipe_slow ); 5005 %} 5006 5007 // Longs vector constant shift 5008 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 5009 match(Set dst (LShiftVL src (LShiftCntV shift))); 5010 match(Set dst (URShiftVL src (RShiftCntV shift))); 5011 effect(TEMP dst, USE src, USE shift); 5012 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 5013 ins_encode %{ 5014 int opcode = this->ideal_Opcode(); 5015 if (UseAVX > 0) { 5016 int vector_len = vector_length_encoding(this); 5017 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 5018 } else { 5019 assert(vector_length(this) == 2, ""); 5020 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5021 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 5022 } 5023 %} 5024 ins_pipe( pipe_slow ); 5025 %} 5026 5027 // -------------------ArithmeticRightShift ----------------------------------- 5028 // Long vector arithmetic right shift 5029 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5030 predicate(UseAVX <= 2); 5031 match(Set dst (RShiftVL src shift)); 5032 effect(TEMP dst, TEMP tmp, TEMP scratch); 5033 format %{ "vshiftq $dst,$src,$shift" %} 5034 ins_encode %{ 5035 uint vlen = vector_length(this); 5036 if (vlen == 2) { 5037 assert(UseSSE >= 2, "required"); 5038 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5039 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 5040 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 5041 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 5042 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 5043 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 5044 } else { 5045 assert(vlen == 4, "sanity"); 5046 assert(UseAVX > 1, "required"); 5047 int vector_len = Assembler::AVX_256bit; 5048 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 5049 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 5050 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 5051 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 5052 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 5053 } 5054 %} 5055 ins_pipe( pipe_slow ); 5056 %} 5057 5058 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 5059 predicate(UseAVX > 2); 5060 match(Set dst (RShiftVL src shift)); 5061 format %{ "vshiftq $dst,$src,$shift" %} 5062 ins_encode %{ 5063 int vector_len = vector_length_encoding(this); 5064 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 5065 %} 5066 ins_pipe( pipe_slow ); 5067 %} 5068 5069 // --------------------------------- AND -------------------------------------- 5070 5071 instruct vand(vec dst, vec src) %{ 5072 predicate(UseAVX == 0); 5073 match(Set dst (AndV dst src)); 5074 format %{ "pand $dst,$src\t! and vectors" %} 5075 ins_encode %{ 5076 __ pand($dst$$XMMRegister, $src$$XMMRegister); 5077 %} 5078 ins_pipe( pipe_slow ); 5079 %} 5080 5081 instruct vand_reg(vec dst, vec src1, vec src2) %{ 5082 predicate(UseAVX > 0); 5083 match(Set dst (AndV src1 src2)); 5084 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 5085 ins_encode %{ 5086 int vector_len = vector_length_encoding(this); 5087 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5088 %} 5089 ins_pipe( pipe_slow ); 5090 %} 5091 5092 instruct vand_mem(vec dst, vec src, memory mem) %{ 5093 predicate(UseAVX > 0); 5094 match(Set dst (AndV src (LoadVector mem))); 5095 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 5096 ins_encode %{ 5097 int vector_len = vector_length_encoding(this); 5098 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5099 %} 5100 ins_pipe( pipe_slow ); 5101 %} 5102 5103 // --------------------------------- OR --------------------------------------- 5104 5105 instruct vor(vec dst, vec src) %{ 5106 predicate(UseAVX == 0); 5107 match(Set dst (OrV dst src)); 5108 format %{ "por $dst,$src\t! or vectors" %} 5109 ins_encode %{ 5110 __ por($dst$$XMMRegister, $src$$XMMRegister); 5111 %} 5112 ins_pipe( pipe_slow ); 5113 %} 5114 5115 instruct vor_reg(vec dst, vec src1, vec src2) %{ 5116 predicate(UseAVX > 0); 5117 match(Set dst (OrV src1 src2)); 5118 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 5119 ins_encode %{ 5120 int vector_len = vector_length_encoding(this); 5121 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5122 %} 5123 ins_pipe( pipe_slow ); 5124 %} 5125 5126 instruct vor_mem(vec dst, vec src, memory mem) %{ 5127 predicate(UseAVX > 0); 5128 match(Set dst (OrV src (LoadVector mem))); 5129 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 5130 ins_encode %{ 5131 int vector_len = vector_length_encoding(this); 5132 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5133 %} 5134 ins_pipe( pipe_slow ); 5135 %} 5136 5137 // --------------------------------- XOR -------------------------------------- 5138 5139 instruct vxor(vec dst, vec src) %{ 5140 predicate(UseAVX == 0); 5141 match(Set dst (XorV dst src)); 5142 format %{ "pxor $dst,$src\t! xor vectors" %} 5143 ins_encode %{ 5144 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5145 %} 5146 ins_pipe( pipe_slow ); 5147 %} 5148 5149 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 5150 predicate(UseAVX > 0); 5151 match(Set dst (XorV src1 src2)); 5152 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 5153 ins_encode %{ 5154 int vector_len = vector_length_encoding(this); 5155 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5156 %} 5157 ins_pipe( pipe_slow ); 5158 %} 5159 5160 instruct vxor_mem(vec dst, vec src, memory mem) %{ 5161 predicate(UseAVX > 0); 5162 match(Set dst (XorV src (LoadVector mem))); 5163 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 5164 ins_encode %{ 5165 int vector_len = vector_length_encoding(this); 5166 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5167 %} 5168 ins_pipe( pipe_slow ); 5169 %} 5170 5171 // --------------------------------- ABS -------------------------------------- 5172 // a = |a| 5173 instruct vabsB_reg(vec dst, vec src) %{ 5174 match(Set dst (AbsVB src)); 5175 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 5176 ins_encode %{ 5177 uint vlen = vector_length(this); 5178 if (vlen <= 16) { 5179 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 5180 } else { 5181 int vlen_enc = vector_length_encoding(this); 5182 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5183 } 5184 %} 5185 ins_pipe( pipe_slow ); 5186 %} 5187 5188 instruct vabsS_reg(vec dst, vec src) %{ 5189 match(Set dst (AbsVS src)); 5190 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 5191 ins_encode %{ 5192 uint vlen = vector_length(this); 5193 if (vlen <= 8) { 5194 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 5195 } else { 5196 int vlen_enc = vector_length_encoding(this); 5197 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5198 } 5199 %} 5200 ins_pipe( pipe_slow ); 5201 %} 5202 5203 instruct vabsI_reg(vec dst, vec src) %{ 5204 match(Set dst (AbsVI src)); 5205 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 5206 ins_encode %{ 5207 uint vlen = vector_length(this); 5208 if (vlen <= 4) { 5209 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 5210 } else { 5211 int vlen_enc = vector_length_encoding(this); 5212 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5213 } 5214 %} 5215 ins_pipe( pipe_slow ); 5216 %} 5217 5218 instruct vabsL_reg(vec dst, vec src) %{ 5219 match(Set dst (AbsVL src)); 5220 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 5221 ins_encode %{ 5222 assert(UseAVX > 2, "required"); 5223 int vector_len = vector_length_encoding(this); 5224 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5225 %} 5226 ins_pipe( pipe_slow ); 5227 %} 5228 5229 // --------------------------------- ABSNEG -------------------------------------- 5230 5231 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 5232 predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F 5233 match(Set dst (AbsVF src)); 5234 match(Set dst (NegVF src)); 5235 effect(TEMP scratch); 5236 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 5237 ins_cost(150); 5238 ins_encode %{ 5239 int opcode = this->ideal_Opcode(); 5240 int vlen = vector_length(this); 5241 if (vlen == 2) { 5242 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 5243 } else { 5244 assert(vlen == 8 || vlen == 16, "required"); 5245 int vlen_enc = vector_length_encoding(this); 5246 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 5247 } 5248 %} 5249 ins_pipe( pipe_slow ); 5250 %} 5251 5252 instruct vabsneg4F(vec dst, rRegI scratch) %{ 5253 predicate(n->as_Vector()->length() == 4); 5254 match(Set dst (AbsVF dst)); 5255 match(Set dst (NegVF dst)); 5256 effect(TEMP scratch); 5257 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 5258 ins_cost(150); 5259 ins_encode %{ 5260 int opcode = this->ideal_Opcode(); 5261 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 5262 %} 5263 ins_pipe( pipe_slow ); 5264 %} 5265 5266 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 5267 match(Set dst (AbsVD src)); 5268 match(Set dst (NegVD src)); 5269 effect(TEMP scratch); 5270 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 5271 ins_encode %{ 5272 int opcode = this->ideal_Opcode(); 5273 uint vlen = vector_length(this); 5274 if (vlen == 2) { 5275 assert(UseSSE >= 2, "required"); 5276 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 5277 } else { 5278 int vlen_enc = vector_length_encoding(this); 5279 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 5280 } 5281 %} 5282 ins_pipe( pipe_slow ); 5283 %} 5284 5285 // --------------------------------- FMA -------------------------------------- 5286 // a * b + c 5287 5288 instruct vfmaF_reg(vec a, vec b, vec c) %{ 5289 match(Set c (FmaVF c (Binary a b))); 5290 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 5291 ins_cost(150); 5292 ins_encode %{ 5293 assert(UseFMA, "not enabled"); 5294 int vector_len = vector_length_encoding(this); 5295 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 5296 %} 5297 ins_pipe( pipe_slow ); 5298 %} 5299 5300 instruct vfmaF_mem(vec a, memory b, vec c) %{ 5301 match(Set c (FmaVF c (Binary a (LoadVector b)))); 5302 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 5303 ins_cost(150); 5304 ins_encode %{ 5305 assert(UseFMA, "not enabled"); 5306 int vector_len = vector_length_encoding(this); 5307 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 5308 %} 5309 ins_pipe( pipe_slow ); 5310 %} 5311 5312 instruct vfmaD_reg(vec a, vec b, vec c) %{ 5313 match(Set c (FmaVD c (Binary a b))); 5314 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 5315 ins_cost(150); 5316 ins_encode %{ 5317 assert(UseFMA, "not enabled"); 5318 int vector_len = vector_length_encoding(this); 5319 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 5320 %} 5321 ins_pipe( pipe_slow ); 5322 %} 5323 5324 instruct vfmaD_mem(vec a, memory b, vec c) %{ 5325 match(Set c (FmaVD c (Binary a (LoadVector b)))); 5326 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 5327 ins_cost(150); 5328 ins_encode %{ 5329 assert(UseFMA, "not enabled"); 5330 int vector_len = vector_length_encoding(this); 5331 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 5332 %} 5333 ins_pipe( pipe_slow ); 5334 %} 5335 5336 // --------------------------------- Vector Multiply Add -------------------------------------- 5337 5338 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 5339 predicate(UseAVX == 0); 5340 match(Set dst (MulAddVS2VI dst src1)); 5341 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %} 5342 ins_encode %{ 5343 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 5344 %} 5345 ins_pipe( pipe_slow ); 5346 %} 5347 5348 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 5349 predicate(UseAVX > 0); 5350 match(Set dst (MulAddVS2VI src1 src2)); 5351 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 5352 ins_encode %{ 5353 int vector_len = vector_length_encoding(this); 5354 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5355 %} 5356 ins_pipe( pipe_slow ); 5357 %} 5358 5359 // --------------------------------- Vector Multiply Add Add ---------------------------------- 5360 5361 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 5362 predicate(VM_Version::supports_avx512_vnni()); 5363 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 5364 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 5365 ins_encode %{ 5366 assert(UseAVX > 2, "required"); 5367 int vector_len = vector_length_encoding(this); 5368 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5369 %} 5370 ins_pipe( pipe_slow ); 5371 ins_cost(10); 5372 %} 5373 5374 // --------------------------------- PopCount -------------------------------------- 5375 5376 instruct vpopcountI(vec dst, vec src) %{ 5377 match(Set dst (PopCountVI src)); 5378 format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %} 5379 ins_encode %{ 5380 assert(UsePopCountInstruction, "not enabled"); 5381 5382 int vector_len = vector_length_encoding(this); 5383 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5384 %} 5385 ins_pipe( pipe_slow ); 5386 %} 5387 5388 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 5389 5390 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 5391 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 5392 effect(TEMP dst); 5393 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 5394 ins_encode %{ 5395 int vector_len = vector_length_encoding(this); 5396 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 5397 %} 5398 ins_pipe( pipe_slow ); 5399 %} 5400 5401 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 5402 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 5403 effect(TEMP dst); 5404 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 5405 ins_encode %{ 5406 int vector_len = vector_length_encoding(this); 5407 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 5408 %} 5409 ins_pipe( pipe_slow ); 5410 %} 5411 5412 // --------------------------------- Rotation Operations ---------------------------------- 5413 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 5414 match(Set dst (RotateLeftV src shift)); 5415 match(Set dst (RotateRightV src shift)); 5416 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 5417 ins_encode %{ 5418 int opcode = this->ideal_Opcode(); 5419 int vector_len = vector_length_encoding(this); 5420 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 5421 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 5422 %} 5423 ins_pipe( pipe_slow ); 5424 %} 5425 5426 instruct vprorate(vec dst, vec src, vec shift) %{ 5427 match(Set dst (RotateLeftV src shift)); 5428 match(Set dst (RotateRightV src shift)); 5429 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 5430 ins_encode %{ 5431 int opcode = this->ideal_Opcode(); 5432 int vector_len = vector_length_encoding(this); 5433 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 5434 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 5435 %} 5436 ins_pipe( pipe_slow ); 5437 %} 5438