1 // 2 // Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 %} 1101 1102 1103 //----------SOURCE BLOCK------------------------------------------------------- 1104 // This is a block of C++ code which provides values, functions, and 1105 // definitions necessary in the rest of the architecture description 1106 1107 source_hpp %{ 1108 // Header information of the source block. 1109 // Method declarations/definitions which are used outside 1110 // the ad-scope can conveniently be defined here. 1111 // 1112 // To keep related declarations/definitions/uses close together, 1113 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1114 1115 class NativeJump; 1116 1117 class CallStubImpl { 1118 1119 //-------------------------------------------------------------- 1120 //---< Used for optimization in Compile::shorten_branches >--- 1121 //-------------------------------------------------------------- 1122 1123 public: 1124 // Size of call trampoline stub. 1125 static uint size_call_trampoline() { 1126 return 0; // no call trampolines on this platform 1127 } 1128 1129 // number of relocations needed by a call trampoline stub 1130 static uint reloc_call_trampoline() { 1131 return 0; // no call trampolines on this platform 1132 } 1133 }; 1134 1135 class HandlerImpl { 1136 1137 public: 1138 1139 static int emit_exception_handler(CodeBuffer &cbuf); 1140 static int emit_deopt_handler(CodeBuffer& cbuf); 1141 1142 static uint size_exception_handler() { 1143 // NativeCall instruction size is the same as NativeJump. 1144 // exception handler starts out as jump and can be patched to 1145 // a call be deoptimization. (4932387) 1146 // Note that this value is also credited (in output.cpp) to 1147 // the size of the code section. 1148 return NativeJump::instruction_size; 1149 } 1150 1151 #ifdef _LP64 1152 static uint size_deopt_handler() { 1153 // three 5 byte instructions plus one move for unreachable address. 1154 return 15+3; 1155 } 1156 #else 1157 static uint size_deopt_handler() { 1158 // NativeCall instruction size is the same as NativeJump. 1159 // exception handler starts out as jump and can be patched to 1160 // a call be deoptimization. (4932387) 1161 // Note that this value is also credited (in output.cpp) to 1162 // the size of the code section. 1163 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1164 } 1165 #endif 1166 }; 1167 1168 class Node::PD { 1169 public: 1170 enum NodeFlags { 1171 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1172 _last_flag = Flag_intel_jcc_erratum 1173 }; 1174 }; 1175 1176 %} // end source_hpp 1177 1178 source %{ 1179 1180 #include "opto/addnode.hpp" 1181 #include "c2_intelJccErratum_x86.hpp" 1182 1183 void PhaseOutput::pd_perform_mach_node_analysis() { 1184 if (VM_Version::has_intel_jcc_erratum()) { 1185 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1186 _buf_sizes._code += extra_padding; 1187 } 1188 } 1189 1190 int MachNode::pd_alignment_required() const { 1191 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1192 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1193 return IntelJccErratum::largest_jcc_size() + 1; 1194 } else { 1195 return 1; 1196 } 1197 } 1198 1199 int MachNode::compute_padding(int current_offset) const { 1200 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1201 Compile* C = Compile::current(); 1202 PhaseOutput* output = C->output(); 1203 Block* block = output->block(); 1204 int index = output->index(); 1205 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1206 } else { 1207 return 0; 1208 } 1209 } 1210 1211 // Emit exception handler code. 1212 // Stuff framesize into a register and call a VM stub routine. 1213 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1214 1215 // Note that the code buffer's insts_mark is always relative to insts. 1216 // That's why we must use the macroassembler to generate a handler. 1217 C2_MacroAssembler _masm(&cbuf); 1218 address base = __ start_a_stub(size_exception_handler()); 1219 if (base == NULL) { 1220 ciEnv::current()->record_failure("CodeCache is full"); 1221 return 0; // CodeBuffer::expand failed 1222 } 1223 int offset = __ offset(); 1224 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1225 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1226 __ end_a_stub(); 1227 return offset; 1228 } 1229 1230 // Emit deopt handler code. 1231 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1232 1233 // Note that the code buffer's insts_mark is always relative to insts. 1234 // That's why we must use the macroassembler to generate a handler. 1235 C2_MacroAssembler _masm(&cbuf); 1236 address base = __ start_a_stub(size_deopt_handler()); 1237 if (base == NULL) { 1238 ciEnv::current()->record_failure("CodeCache is full"); 1239 return 0; // CodeBuffer::expand failed 1240 } 1241 int offset = __ offset(); 1242 1243 #ifdef _LP64 1244 address the_pc = (address) __ pc(); 1245 Label next; 1246 // push a "the_pc" on the stack without destroying any registers 1247 // as they all may be live. 1248 1249 // push address of "next" 1250 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1251 __ bind(next); 1252 // adjust it so it matches "the_pc" 1253 __ subptr(Address(rsp, 0), __ offset() - offset); 1254 #else 1255 InternalAddress here(__ pc()); 1256 __ pushptr(here.addr()); 1257 #endif 1258 1259 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1260 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1261 __ end_a_stub(); 1262 return offset; 1263 } 1264 1265 1266 //============================================================================= 1267 1268 // Float masks come from different places depending on platform. 1269 #ifdef _LP64 1270 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1271 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1272 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1273 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1274 #else 1275 static address float_signmask() { return (address)float_signmask_pool; } 1276 static address float_signflip() { return (address)float_signflip_pool; } 1277 static address double_signmask() { return (address)double_signmask_pool; } 1278 static address double_signflip() { return (address)double_signflip_pool; } 1279 #endif 1280 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1281 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1282 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1283 1284 //============================================================================= 1285 const bool Matcher::match_rule_supported(int opcode) { 1286 if (!has_match_rule(opcode)) { 1287 return false; // no match rule present 1288 } 1289 switch (opcode) { 1290 case Op_AbsVL: 1291 if (UseAVX < 3) { 1292 return false; 1293 } 1294 break; 1295 case Op_PopCountI: 1296 case Op_PopCountL: 1297 if (!UsePopCountInstruction) { 1298 return false; 1299 } 1300 break; 1301 case Op_PopCountVI: 1302 if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) { 1303 return false; 1304 } 1305 break; 1306 case Op_MulVI: 1307 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1308 return false; 1309 } 1310 break; 1311 case Op_MulVL: 1312 case Op_MulReductionVL: 1313 if (VM_Version::supports_avx512dq() == false) { 1314 return false; 1315 } 1316 break; 1317 case Op_AbsVB: 1318 case Op_AbsVS: 1319 case Op_AbsVI: 1320 case Op_AddReductionVI: 1321 case Op_AndReductionV: 1322 case Op_OrReductionV: 1323 case Op_XorReductionV: 1324 if (UseSSE < 3) { // requires at least SSSE3 1325 return false; 1326 } 1327 break; 1328 case Op_MulReductionVI: 1329 if (UseSSE < 4) { // requires at least SSE4 1330 return false; 1331 } 1332 break; 1333 case Op_SqrtVD: 1334 case Op_SqrtVF: 1335 if (UseAVX < 1) { // enabled for AVX only 1336 return false; 1337 } 1338 break; 1339 case Op_CompareAndSwapL: 1340 #ifdef _LP64 1341 case Op_CompareAndSwapP: 1342 #endif 1343 if (!VM_Version::supports_cx8()) { 1344 return false; 1345 } 1346 break; 1347 case Op_CMoveVF: 1348 case Op_CMoveVD: 1349 if (UseAVX < 1 || UseAVX > 2) { 1350 return false; 1351 } 1352 break; 1353 case Op_StrIndexOf: 1354 if (!UseSSE42Intrinsics) { 1355 return false; 1356 } 1357 break; 1358 case Op_StrIndexOfChar: 1359 if (!UseSSE42Intrinsics) { 1360 return false; 1361 } 1362 break; 1363 case Op_OnSpinWait: 1364 if (VM_Version::supports_on_spin_wait() == false) { 1365 return false; 1366 } 1367 break; 1368 case Op_MulVB: 1369 case Op_LShiftVB: 1370 case Op_RShiftVB: 1371 case Op_URShiftVB: 1372 if (UseSSE < 4) { 1373 return false; 1374 } 1375 break; 1376 #ifdef _LP64 1377 case Op_MaxD: 1378 case Op_MaxF: 1379 case Op_MinD: 1380 case Op_MinF: 1381 if (UseAVX < 1) { // enabled for AVX only 1382 return false; 1383 } 1384 break; 1385 #endif 1386 case Op_CacheWB: 1387 case Op_CacheWBPreSync: 1388 case Op_CacheWBPostSync: 1389 if (!VM_Version::supports_data_cache_line_flush()) { 1390 return false; 1391 } 1392 break; 1393 case Op_RoundDoubleMode: 1394 if (UseSSE < 4) { 1395 return false; 1396 } 1397 break; 1398 case Op_RoundDoubleModeV: 1399 if (VM_Version::supports_avx() == false) { 1400 return false; // 128bit vroundpd is not available 1401 } 1402 break; 1403 case Op_MacroLogicV: 1404 if (UseAVX < 3 || !UseVectorMacroLogic) { 1405 return false; 1406 } 1407 break; 1408 #ifndef _LP64 1409 case Op_AddReductionVF: 1410 case Op_AddReductionVD: 1411 case Op_MulReductionVF: 1412 case Op_MulReductionVD: 1413 if (UseSSE < 1) { // requires at least SSE 1414 return false; 1415 } 1416 break; 1417 case Op_MulAddVS2VI: 1418 case Op_RShiftVL: 1419 case Op_AbsVD: 1420 case Op_NegVD: 1421 if (UseSSE < 2) { 1422 return false; 1423 } 1424 break; 1425 #endif // !LP64 1426 } 1427 return true; // Match rules are supported by default. 1428 } 1429 1430 //------------------------------------------------------------------------ 1431 1432 // Identify extra cases that we might want to provide match rules for vector nodes and 1433 // other intrinsics guarded with vector length (vlen) and element type (bt). 1434 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1435 if (!match_rule_supported(opcode)) { 1436 return false; 1437 } 1438 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1439 // * SSE2 supports 128bit vectors for all types; 1440 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1441 // * AVX2 supports 256bit vectors for all types; 1442 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1443 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1444 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1445 // And MaxVectorSize is taken into account as well. 1446 if (!vector_size_supported(bt, vlen)) { 1447 return false; 1448 } 1449 // Special cases which require vector length follow: 1450 // * implementation limitations 1451 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1452 // * 128bit vroundpd instruction is present only in AVX1 1453 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1454 switch (opcode) { 1455 case Op_AbsVF: 1456 case Op_NegVF: 1457 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1458 return false; // 512bit vandps and vxorps are not available 1459 } 1460 break; 1461 case Op_AbsVD: 1462 case Op_NegVD: 1463 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1464 return false; // 512bit vandpd and vxorpd are not available 1465 } 1466 break; 1467 case Op_CMoveVF: 1468 if (vlen != 8) { 1469 return false; // implementation limitation (only vcmov8F_reg is present) 1470 } 1471 break; 1472 case Op_RotateRightV: 1473 case Op_RotateLeftV: 1474 case Op_MacroLogicV: 1475 if (!VM_Version::supports_evex() || 1476 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1477 return false; 1478 } 1479 break; 1480 case Op_CMoveVD: 1481 if (vlen != 4) { 1482 return false; // implementation limitation (only vcmov4D_reg is present) 1483 } 1484 break; 1485 } 1486 return true; // Per default match rules are supported. 1487 } 1488 1489 // x86 supports generic vector operands: vec and legVec. 1490 const bool Matcher::supports_generic_vector_operands = true; 1491 1492 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 1493 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 1494 bool legacy = (generic_opnd->opcode() == LEGVEC); 1495 if (!VM_Version::supports_avx512vlbwdq() && // KNL 1496 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 1497 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 1498 return new legVecZOper(); 1499 } 1500 if (legacy) { 1501 switch (ideal_reg) { 1502 case Op_VecS: return new legVecSOper(); 1503 case Op_VecD: return new legVecDOper(); 1504 case Op_VecX: return new legVecXOper(); 1505 case Op_VecY: return new legVecYOper(); 1506 case Op_VecZ: return new legVecZOper(); 1507 } 1508 } else { 1509 switch (ideal_reg) { 1510 case Op_VecS: return new vecSOper(); 1511 case Op_VecD: return new vecDOper(); 1512 case Op_VecX: return new vecXOper(); 1513 case Op_VecY: return new vecYOper(); 1514 case Op_VecZ: return new vecZOper(); 1515 } 1516 } 1517 ShouldNotReachHere(); 1518 return NULL; 1519 } 1520 1521 bool Matcher::is_generic_reg2reg_move(MachNode* m) { 1522 switch (m->rule()) { 1523 case MoveVec2Leg_rule: 1524 case MoveLeg2Vec_rule: 1525 return true; 1526 default: 1527 return false; 1528 } 1529 } 1530 1531 bool Matcher::is_generic_vector(MachOper* opnd) { 1532 switch (opnd->opcode()) { 1533 case VEC: 1534 case LEGVEC: 1535 return true; 1536 default: 1537 return false; 1538 } 1539 } 1540 1541 //------------------------------------------------------------------------ 1542 1543 const bool Matcher::has_predicated_vectors(void) { 1544 bool ret_value = false; 1545 if (UseAVX > 2) { 1546 ret_value = VM_Version::supports_avx512vl(); 1547 } 1548 1549 return ret_value; 1550 } 1551 1552 const int Matcher::float_pressure(int default_pressure_threshold) { 1553 int float_pressure_threshold = default_pressure_threshold; 1554 #ifdef _LP64 1555 if (UseAVX > 2) { 1556 // Increase pressure threshold on machines with AVX3 which have 1557 // 2x more XMM registers. 1558 float_pressure_threshold = default_pressure_threshold * 2; 1559 } 1560 #endif 1561 return float_pressure_threshold; 1562 } 1563 1564 // Max vector size in bytes. 0 if not supported. 1565 const int Matcher::vector_width_in_bytes(BasicType bt) { 1566 assert(is_java_primitive(bt), "only primitive type vectors"); 1567 if (UseSSE < 2) return 0; 1568 // SSE2 supports 128bit vectors for all types. 1569 // AVX2 supports 256bit vectors for all types. 1570 // AVX2/EVEX supports 512bit vectors for all types. 1571 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1572 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1573 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1574 size = (UseAVX > 2) ? 64 : 32; 1575 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1576 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1577 // Use flag to limit vector size. 1578 size = MIN2(size,(int)MaxVectorSize); 1579 // Minimum 2 values in vector (or 4 for bytes). 1580 switch (bt) { 1581 case T_DOUBLE: 1582 case T_LONG: 1583 if (size < 16) return 0; 1584 break; 1585 case T_FLOAT: 1586 case T_INT: 1587 if (size < 8) return 0; 1588 break; 1589 case T_BOOLEAN: 1590 if (size < 4) return 0; 1591 break; 1592 case T_CHAR: 1593 if (size < 4) return 0; 1594 break; 1595 case T_BYTE: 1596 if (size < 4) return 0; 1597 break; 1598 case T_SHORT: 1599 if (size < 4) return 0; 1600 break; 1601 default: 1602 ShouldNotReachHere(); 1603 } 1604 return size; 1605 } 1606 1607 // Limits on vector size (number of elements) loaded into vector. 1608 const int Matcher::max_vector_size(const BasicType bt) { 1609 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1610 } 1611 const int Matcher::min_vector_size(const BasicType bt) { 1612 int max_size = max_vector_size(bt); 1613 // Min size which can be loaded into vector is 4 bytes. 1614 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1615 return MIN2(size,max_size); 1616 } 1617 1618 const bool Matcher::supports_scalable_vector() { 1619 return false; 1620 } 1621 1622 const int Matcher::scalable_vector_reg_size(const BasicType bt) { 1623 return -1; 1624 } 1625 1626 // Vector ideal reg corresponding to specified size in bytes 1627 const uint Matcher::vector_ideal_reg(int size) { 1628 assert(MaxVectorSize >= size, ""); 1629 switch(size) { 1630 case 4: return Op_VecS; 1631 case 8: return Op_VecD; 1632 case 16: return Op_VecX; 1633 case 32: return Op_VecY; 1634 case 64: return Op_VecZ; 1635 } 1636 ShouldNotReachHere(); 1637 return 0; 1638 } 1639 1640 // x86 supports misaligned vectors store/load. 1641 const bool Matcher::misaligned_vectors_ok() { 1642 return true; 1643 } 1644 1645 // x86 AES instructions are compatible with SunJCE expanded 1646 // keys, hence we do not need to pass the original key to stubs 1647 const bool Matcher::pass_original_key_for_aes() { 1648 return false; 1649 } 1650 1651 1652 const bool Matcher::convi2l_type_required = true; 1653 1654 // Check for shift by small constant as well 1655 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1656 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1657 shift->in(2)->get_int() <= 3 && 1658 // Are there other uses besides address expressions? 1659 !matcher->is_visited(shift)) { 1660 address_visited.set(shift->_idx); // Flag as address_visited 1661 mstack.push(shift->in(2), Matcher::Visit); 1662 Node *conv = shift->in(1); 1663 #ifdef _LP64 1664 // Allow Matcher to match the rule which bypass 1665 // ConvI2L operation for an array index on LP64 1666 // if the index value is positive. 1667 if (conv->Opcode() == Op_ConvI2L && 1668 conv->as_Type()->type()->is_long()->_lo >= 0 && 1669 // Are there other uses besides address expressions? 1670 !matcher->is_visited(conv)) { 1671 address_visited.set(conv->_idx); // Flag as address_visited 1672 mstack.push(conv->in(1), Matcher::Pre_Visit); 1673 } else 1674 #endif 1675 mstack.push(conv, Matcher::Pre_Visit); 1676 return true; 1677 } 1678 return false; 1679 } 1680 1681 // This function identifies sub-graphs in which a 'load' node is 1682 // input to two different nodes, and such that it can be matched 1683 // with BMI instructions like blsi, blsr, etc. 1684 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 1685 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 1686 // refers to the same node. 1687 // 1688 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 1689 // This is a temporary solution until we make DAGs expressible in ADL. 1690 template<typename ConType> 1691 class FusedPatternMatcher { 1692 Node* _op1_node; 1693 Node* _mop_node; 1694 int _con_op; 1695 1696 static int match_next(Node* n, int next_op, int next_op_idx) { 1697 if (n->in(1) == NULL || n->in(2) == NULL) { 1698 return -1; 1699 } 1700 1701 if (next_op_idx == -1) { // n is commutative, try rotations 1702 if (n->in(1)->Opcode() == next_op) { 1703 return 1; 1704 } else if (n->in(2)->Opcode() == next_op) { 1705 return 2; 1706 } 1707 } else { 1708 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 1709 if (n->in(next_op_idx)->Opcode() == next_op) { 1710 return next_op_idx; 1711 } 1712 } 1713 return -1; 1714 } 1715 1716 public: 1717 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 1718 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 1719 1720 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 1721 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 1722 typename ConType::NativeType con_value) { 1723 if (_op1_node->Opcode() != op1) { 1724 return false; 1725 } 1726 if (_mop_node->outcnt() > 2) { 1727 return false; 1728 } 1729 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 1730 if (op1_op2_idx == -1) { 1731 return false; 1732 } 1733 // Memory operation must be the other edge 1734 int op1_mop_idx = (op1_op2_idx & 1) + 1; 1735 1736 // Check that the mop node is really what we want 1737 if (_op1_node->in(op1_mop_idx) == _mop_node) { 1738 Node* op2_node = _op1_node->in(op1_op2_idx); 1739 if (op2_node->outcnt() > 1) { 1740 return false; 1741 } 1742 assert(op2_node->Opcode() == op2, "Should be"); 1743 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 1744 if (op2_con_idx == -1) { 1745 return false; 1746 } 1747 // Memory operation must be the other edge 1748 int op2_mop_idx = (op2_con_idx & 1) + 1; 1749 // Check that the memory operation is the same node 1750 if (op2_node->in(op2_mop_idx) == _mop_node) { 1751 // Now check the constant 1752 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 1753 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 1754 return true; 1755 } 1756 } 1757 } 1758 return false; 1759 } 1760 }; 1761 1762 static bool is_bmi_pattern(Node* n, Node* m) { 1763 assert(UseBMI1Instructions, "sanity"); 1764 if (n != NULL && m != NULL) { 1765 if (m->Opcode() == Op_LoadI) { 1766 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 1767 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 1768 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 1769 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 1770 } else if (m->Opcode() == Op_LoadL) { 1771 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 1772 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 1773 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 1774 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 1775 } 1776 } 1777 return false; 1778 } 1779 1780 // Should the matcher clone input 'm' of node 'n'? 1781 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 1782 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 1783 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 1784 mstack.push(m, Visit); 1785 return true; 1786 } 1787 return false; 1788 } 1789 1790 // Should the Matcher clone shifts on addressing modes, expecting them 1791 // to be subsumed into complex addressing expressions or compute them 1792 // into registers? 1793 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1794 Node *off = m->in(AddPNode::Offset); 1795 if (off->is_Con()) { 1796 address_visited.test_set(m->_idx); // Flag as address_visited 1797 Node *adr = m->in(AddPNode::Address); 1798 1799 // Intel can handle 2 adds in addressing mode 1800 // AtomicAdd is not an addressing expression. 1801 // Cheap to find it by looking for screwy base. 1802 if (adr->is_AddP() && 1803 !adr->in(AddPNode::Base)->is_top() && 1804 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1805 // Are there other uses besides address expressions? 1806 !is_visited(adr)) { 1807 address_visited.set(adr->_idx); // Flag as address_visited 1808 Node *shift = adr->in(AddPNode::Offset); 1809 if (!clone_shift(shift, this, mstack, address_visited)) { 1810 mstack.push(shift, Pre_Visit); 1811 } 1812 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1813 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1814 } else { 1815 mstack.push(adr, Pre_Visit); 1816 } 1817 1818 // Clone X+offset as it also folds into most addressing expressions 1819 mstack.push(off, Visit); 1820 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1821 return true; 1822 } else if (clone_shift(off, this, mstack, address_visited)) { 1823 address_visited.test_set(m->_idx); // Flag as address_visited 1824 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1825 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1826 return true; 1827 } 1828 return false; 1829 } 1830 1831 void Compile::reshape_address(AddPNode* addp) { 1832 } 1833 1834 static inline uint vector_length(const MachNode* n) { 1835 const TypeVect* vt = n->bottom_type()->is_vect(); 1836 return vt->length(); 1837 } 1838 1839 static inline uint vector_length(const MachNode* use, MachOper* opnd) { 1840 uint def_idx = use->operand_index(opnd); 1841 Node* def = use->in(def_idx); 1842 return def->bottom_type()->is_vect()->length(); 1843 } 1844 1845 static inline uint vector_length_in_bytes(const MachNode* n) { 1846 const TypeVect* vt = n->bottom_type()->is_vect(); 1847 return vt->length_in_bytes(); 1848 } 1849 1850 static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { 1851 uint def_idx = use->operand_index(opnd); 1852 Node* def = use->in(def_idx); 1853 return def->bottom_type()->is_vect()->length_in_bytes(); 1854 } 1855 1856 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) { 1857 switch(vector_length_in_bytes(n)) { 1858 case 4: // fall-through 1859 case 8: // fall-through 1860 case 16: return Assembler::AVX_128bit; 1861 case 32: return Assembler::AVX_256bit; 1862 case 64: return Assembler::AVX_512bit; 1863 1864 default: { 1865 ShouldNotReachHere(); 1866 return Assembler::AVX_NoVec; 1867 } 1868 } 1869 } 1870 1871 // Helper methods for MachSpillCopyNode::implementation(). 1872 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1873 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1874 // In 64-bit VM size calculation is very complex. Emitting instructions 1875 // into scratch buffer is used to get size in 64-bit VM. 1876 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1877 assert(ireg == Op_VecS || // 32bit vector 1878 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1879 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1880 "no non-adjacent vector moves" ); 1881 if (cbuf) { 1882 C2_MacroAssembler _masm(cbuf); 1883 int offset = __ offset(); 1884 switch (ireg) { 1885 case Op_VecS: // copy whole register 1886 case Op_VecD: 1887 case Op_VecX: 1888 #ifndef _LP64 1889 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1890 #else 1891 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1892 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1893 } else { 1894 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1895 } 1896 #endif 1897 break; 1898 case Op_VecY: 1899 #ifndef _LP64 1900 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1901 #else 1902 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1903 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1904 } else { 1905 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1906 } 1907 #endif 1908 break; 1909 case Op_VecZ: 1910 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1911 break; 1912 default: 1913 ShouldNotReachHere(); 1914 } 1915 int size = __ offset() - offset; 1916 #ifdef ASSERT 1917 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1918 assert(!do_size || size == 4, "incorrect size calculattion"); 1919 #endif 1920 return size; 1921 #ifndef PRODUCT 1922 } else if (!do_size) { 1923 switch (ireg) { 1924 case Op_VecS: 1925 case Op_VecD: 1926 case Op_VecX: 1927 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1928 break; 1929 case Op_VecY: 1930 case Op_VecZ: 1931 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1932 break; 1933 default: 1934 ShouldNotReachHere(); 1935 } 1936 #endif 1937 } 1938 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1939 return (UseAVX > 2) ? 6 : 4; 1940 } 1941 1942 int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1943 int stack_offset, int reg, uint ireg, outputStream* st) { 1944 // In 64-bit VM size calculation is very complex. Emitting instructions 1945 // into scratch buffer is used to get size in 64-bit VM. 1946 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1947 if (cbuf) { 1948 C2_MacroAssembler _masm(cbuf); 1949 int offset = __ offset(); 1950 if (is_load) { 1951 switch (ireg) { 1952 case Op_VecS: 1953 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1954 break; 1955 case Op_VecD: 1956 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1957 break; 1958 case Op_VecX: 1959 #ifndef _LP64 1960 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1961 #else 1962 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1963 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1964 } else { 1965 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1966 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1967 } 1968 #endif 1969 break; 1970 case Op_VecY: 1971 #ifndef _LP64 1972 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1973 #else 1974 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1975 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1976 } else { 1977 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1978 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1979 } 1980 #endif 1981 break; 1982 case Op_VecZ: 1983 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1984 break; 1985 default: 1986 ShouldNotReachHere(); 1987 } 1988 } else { // store 1989 switch (ireg) { 1990 case Op_VecS: 1991 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1992 break; 1993 case Op_VecD: 1994 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1995 break; 1996 case Op_VecX: 1997 #ifndef _LP64 1998 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1999 #else 2000 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2001 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2002 } 2003 else { 2004 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2005 } 2006 #endif 2007 break; 2008 case Op_VecY: 2009 #ifndef _LP64 2010 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2011 #else 2012 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2013 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2014 } 2015 else { 2016 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2017 } 2018 #endif 2019 break; 2020 case Op_VecZ: 2021 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2022 break; 2023 default: 2024 ShouldNotReachHere(); 2025 } 2026 } 2027 int size = __ offset() - offset; 2028 #ifdef ASSERT 2029 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 2030 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2031 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 2032 #endif 2033 return size; 2034 #ifndef PRODUCT 2035 } else if (!do_size) { 2036 if (is_load) { 2037 switch (ireg) { 2038 case Op_VecS: 2039 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2040 break; 2041 case Op_VecD: 2042 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2043 break; 2044 case Op_VecX: 2045 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2046 break; 2047 case Op_VecY: 2048 case Op_VecZ: 2049 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2050 break; 2051 default: 2052 ShouldNotReachHere(); 2053 } 2054 } else { // store 2055 switch (ireg) { 2056 case Op_VecS: 2057 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2058 break; 2059 case Op_VecD: 2060 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2061 break; 2062 case Op_VecX: 2063 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2064 break; 2065 case Op_VecY: 2066 case Op_VecZ: 2067 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2068 break; 2069 default: 2070 ShouldNotReachHere(); 2071 } 2072 } 2073 #endif 2074 } 2075 bool is_single_byte = false; 2076 int vec_len = 0; 2077 if ((UseAVX > 2) && (stack_offset != 0)) { 2078 int tuple_type = Assembler::EVEX_FVM; 2079 int input_size = Assembler::EVEX_32bit; 2080 switch (ireg) { 2081 case Op_VecS: 2082 tuple_type = Assembler::EVEX_T1S; 2083 break; 2084 case Op_VecD: 2085 tuple_type = Assembler::EVEX_T1S; 2086 input_size = Assembler::EVEX_64bit; 2087 break; 2088 case Op_VecX: 2089 break; 2090 case Op_VecY: 2091 vec_len = 1; 2092 break; 2093 case Op_VecZ: 2094 vec_len = 2; 2095 break; 2096 } 2097 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2098 } 2099 int offset_size = 0; 2100 int size = 5; 2101 if (UseAVX > 2 ) { 2102 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2103 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2104 size += 2; // Need an additional two bytes for EVEX encoding 2105 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2106 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2107 } else { 2108 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2109 size += 2; // Need an additional two bytes for EVEX encodding 2110 } 2111 } else { 2112 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2113 } 2114 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2115 return size+offset_size; 2116 } 2117 2118 static inline jint replicate4_imm(int con, int width) { 2119 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2120 assert(width == 1 || width == 2, "only byte or short types here"); 2121 int bit_width = width * 8; 2122 jint val = con; 2123 val &= (1 << bit_width) - 1; // mask off sign bits 2124 while(bit_width < 32) { 2125 val |= (val << bit_width); 2126 bit_width <<= 1; 2127 } 2128 return val; 2129 } 2130 2131 static inline jlong replicate8_imm(int con, int width) { 2132 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2133 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2134 int bit_width = width * 8; 2135 jlong val = con; 2136 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2137 while(bit_width < 64) { 2138 val |= (val << bit_width); 2139 bit_width <<= 1; 2140 } 2141 return val; 2142 } 2143 2144 #ifndef PRODUCT 2145 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2146 st->print("nop \t# %d bytes pad for loops and calls", _count); 2147 } 2148 #endif 2149 2150 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2151 C2_MacroAssembler _masm(&cbuf); 2152 __ nop(_count); 2153 } 2154 2155 uint MachNopNode::size(PhaseRegAlloc*) const { 2156 return _count; 2157 } 2158 2159 #ifndef PRODUCT 2160 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2161 st->print("# breakpoint"); 2162 } 2163 #endif 2164 2165 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2166 C2_MacroAssembler _masm(&cbuf); 2167 __ int3(); 2168 } 2169 2170 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2171 return MachNode::size(ra_); 2172 } 2173 2174 %} 2175 2176 encode %{ 2177 2178 enc_class call_epilog %{ 2179 if (VerifyStackAtCalls) { 2180 // Check that stack depth is unchanged: find majik cookie on stack 2181 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2182 C2_MacroAssembler _masm(&cbuf); 2183 Label L; 2184 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2185 __ jccb(Assembler::equal, L); 2186 // Die if stack mismatch 2187 __ int3(); 2188 __ bind(L); 2189 } 2190 %} 2191 2192 %} 2193 2194 2195 //----------OPERANDS----------------------------------------------------------- 2196 // Operand definitions must precede instruction definitions for correct parsing 2197 // in the ADLC because operands constitute user defined types which are used in 2198 // instruction definitions. 2199 2200 // Vectors 2201 2202 // Dummy generic vector class. Should be used for all vector operands. 2203 // Replaced with vec[SDXYZ] during post-selection pass. 2204 operand vec() %{ 2205 constraint(ALLOC_IN_RC(dynamic)); 2206 match(VecX); 2207 match(VecY); 2208 match(VecZ); 2209 match(VecS); 2210 match(VecD); 2211 2212 format %{ %} 2213 interface(REG_INTER); 2214 %} 2215 2216 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2217 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2218 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2219 // runtime code generation via reg_class_dynamic. 2220 operand legVec() %{ 2221 constraint(ALLOC_IN_RC(dynamic)); 2222 match(VecX); 2223 match(VecY); 2224 match(VecZ); 2225 match(VecS); 2226 match(VecD); 2227 2228 format %{ %} 2229 interface(REG_INTER); 2230 %} 2231 2232 // Replaces vec during post-selection cleanup. See above. 2233 operand vecS() %{ 2234 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2235 match(VecS); 2236 2237 format %{ %} 2238 interface(REG_INTER); 2239 %} 2240 2241 // Replaces legVec during post-selection cleanup. See above. 2242 operand legVecS() %{ 2243 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2244 match(VecS); 2245 2246 format %{ %} 2247 interface(REG_INTER); 2248 %} 2249 2250 // Replaces vec during post-selection cleanup. See above. 2251 operand vecD() %{ 2252 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2253 match(VecD); 2254 2255 format %{ %} 2256 interface(REG_INTER); 2257 %} 2258 2259 // Replaces legVec during post-selection cleanup. See above. 2260 operand legVecD() %{ 2261 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2262 match(VecD); 2263 2264 format %{ %} 2265 interface(REG_INTER); 2266 %} 2267 2268 // Replaces vec during post-selection cleanup. See above. 2269 operand vecX() %{ 2270 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2271 match(VecX); 2272 2273 format %{ %} 2274 interface(REG_INTER); 2275 %} 2276 2277 // Replaces legVec during post-selection cleanup. See above. 2278 operand legVecX() %{ 2279 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2280 match(VecX); 2281 2282 format %{ %} 2283 interface(REG_INTER); 2284 %} 2285 2286 // Replaces vec during post-selection cleanup. See above. 2287 operand vecY() %{ 2288 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2289 match(VecY); 2290 2291 format %{ %} 2292 interface(REG_INTER); 2293 %} 2294 2295 // Replaces legVec during post-selection cleanup. See above. 2296 operand legVecY() %{ 2297 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2298 match(VecY); 2299 2300 format %{ %} 2301 interface(REG_INTER); 2302 %} 2303 2304 // Replaces vec during post-selection cleanup. See above. 2305 operand vecZ() %{ 2306 constraint(ALLOC_IN_RC(vectorz_reg)); 2307 match(VecZ); 2308 2309 format %{ %} 2310 interface(REG_INTER); 2311 %} 2312 2313 // Replaces legVec during post-selection cleanup. See above. 2314 operand legVecZ() %{ 2315 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2316 match(VecZ); 2317 2318 format %{ %} 2319 interface(REG_INTER); 2320 %} 2321 2322 // Comparison Code for FP conditional move 2323 operand cmpOp_vcmppd() %{ 2324 match(Bool); 2325 2326 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2327 n->as_Bool()->_test._test != BoolTest::no_overflow); 2328 format %{ "" %} 2329 interface(COND_INTER) %{ 2330 equal (0x0, "eq"); 2331 less (0x1, "lt"); 2332 less_equal (0x2, "le"); 2333 not_equal (0xC, "ne"); 2334 greater_equal(0xD, "ge"); 2335 greater (0xE, "gt"); 2336 //TODO cannot compile (adlc breaks) without two next lines with error: 2337 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2338 // equal' for overflow. 2339 overflow (0x20, "o"); // not really supported by the instruction 2340 no_overflow (0x21, "no"); // not really supported by the instruction 2341 %} 2342 %} 2343 2344 2345 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2346 2347 // ============================================================================ 2348 2349 instruct ShouldNotReachHere() %{ 2350 match(Halt); 2351 format %{ "stop\t# ShouldNotReachHere" %} 2352 ins_encode %{ 2353 if (is_reachable()) { 2354 __ stop(_halt_reason); 2355 } 2356 %} 2357 ins_pipe(pipe_slow); 2358 %} 2359 2360 // =================================EVEX special=============================== 2361 2362 instruct setMask(rRegI dst, rRegI src) %{ 2363 predicate(Matcher::has_predicated_vectors()); 2364 match(Set dst (SetVectMaskI src)); 2365 effect(TEMP dst); 2366 format %{ "setvectmask $dst, $src" %} 2367 ins_encode %{ 2368 __ setvectmask($dst$$Register, $src$$Register); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 // ============================================================================ 2374 2375 instruct addF_reg(regF dst, regF src) %{ 2376 predicate((UseSSE>=1) && (UseAVX == 0)); 2377 match(Set dst (AddF dst src)); 2378 2379 format %{ "addss $dst, $src" %} 2380 ins_cost(150); 2381 ins_encode %{ 2382 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2383 %} 2384 ins_pipe(pipe_slow); 2385 %} 2386 2387 instruct addF_mem(regF dst, memory src) %{ 2388 predicate((UseSSE>=1) && (UseAVX == 0)); 2389 match(Set dst (AddF dst (LoadF src))); 2390 2391 format %{ "addss $dst, $src" %} 2392 ins_cost(150); 2393 ins_encode %{ 2394 __ addss($dst$$XMMRegister, $src$$Address); 2395 %} 2396 ins_pipe(pipe_slow); 2397 %} 2398 2399 instruct addF_imm(regF dst, immF con) %{ 2400 predicate((UseSSE>=1) && (UseAVX == 0)); 2401 match(Set dst (AddF dst con)); 2402 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2403 ins_cost(150); 2404 ins_encode %{ 2405 __ addss($dst$$XMMRegister, $constantaddress($con)); 2406 %} 2407 ins_pipe(pipe_slow); 2408 %} 2409 2410 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2411 predicate(UseAVX > 0); 2412 match(Set dst (AddF src1 src2)); 2413 2414 format %{ "vaddss $dst, $src1, $src2" %} 2415 ins_cost(150); 2416 ins_encode %{ 2417 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2418 %} 2419 ins_pipe(pipe_slow); 2420 %} 2421 2422 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2423 predicate(UseAVX > 0); 2424 match(Set dst (AddF src1 (LoadF src2))); 2425 2426 format %{ "vaddss $dst, $src1, $src2" %} 2427 ins_cost(150); 2428 ins_encode %{ 2429 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2430 %} 2431 ins_pipe(pipe_slow); 2432 %} 2433 2434 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2435 predicate(UseAVX > 0); 2436 match(Set dst (AddF src con)); 2437 2438 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2439 ins_cost(150); 2440 ins_encode %{ 2441 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2442 %} 2443 ins_pipe(pipe_slow); 2444 %} 2445 2446 instruct addD_reg(regD dst, regD src) %{ 2447 predicate((UseSSE>=2) && (UseAVX == 0)); 2448 match(Set dst (AddD dst src)); 2449 2450 format %{ "addsd $dst, $src" %} 2451 ins_cost(150); 2452 ins_encode %{ 2453 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2454 %} 2455 ins_pipe(pipe_slow); 2456 %} 2457 2458 instruct addD_mem(regD dst, memory src) %{ 2459 predicate((UseSSE>=2) && (UseAVX == 0)); 2460 match(Set dst (AddD dst (LoadD src))); 2461 2462 format %{ "addsd $dst, $src" %} 2463 ins_cost(150); 2464 ins_encode %{ 2465 __ addsd($dst$$XMMRegister, $src$$Address); 2466 %} 2467 ins_pipe(pipe_slow); 2468 %} 2469 2470 instruct addD_imm(regD dst, immD con) %{ 2471 predicate((UseSSE>=2) && (UseAVX == 0)); 2472 match(Set dst (AddD dst con)); 2473 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2474 ins_cost(150); 2475 ins_encode %{ 2476 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2477 %} 2478 ins_pipe(pipe_slow); 2479 %} 2480 2481 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2482 predicate(UseAVX > 0); 2483 match(Set dst (AddD src1 src2)); 2484 2485 format %{ "vaddsd $dst, $src1, $src2" %} 2486 ins_cost(150); 2487 ins_encode %{ 2488 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2489 %} 2490 ins_pipe(pipe_slow); 2491 %} 2492 2493 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2494 predicate(UseAVX > 0); 2495 match(Set dst (AddD src1 (LoadD src2))); 2496 2497 format %{ "vaddsd $dst, $src1, $src2" %} 2498 ins_cost(150); 2499 ins_encode %{ 2500 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2501 %} 2502 ins_pipe(pipe_slow); 2503 %} 2504 2505 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2506 predicate(UseAVX > 0); 2507 match(Set dst (AddD src con)); 2508 2509 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2510 ins_cost(150); 2511 ins_encode %{ 2512 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2513 %} 2514 ins_pipe(pipe_slow); 2515 %} 2516 2517 instruct subF_reg(regF dst, regF src) %{ 2518 predicate((UseSSE>=1) && (UseAVX == 0)); 2519 match(Set dst (SubF dst src)); 2520 2521 format %{ "subss $dst, $src" %} 2522 ins_cost(150); 2523 ins_encode %{ 2524 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2525 %} 2526 ins_pipe(pipe_slow); 2527 %} 2528 2529 instruct subF_mem(regF dst, memory src) %{ 2530 predicate((UseSSE>=1) && (UseAVX == 0)); 2531 match(Set dst (SubF dst (LoadF src))); 2532 2533 format %{ "subss $dst, $src" %} 2534 ins_cost(150); 2535 ins_encode %{ 2536 __ subss($dst$$XMMRegister, $src$$Address); 2537 %} 2538 ins_pipe(pipe_slow); 2539 %} 2540 2541 instruct subF_imm(regF dst, immF con) %{ 2542 predicate((UseSSE>=1) && (UseAVX == 0)); 2543 match(Set dst (SubF dst con)); 2544 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2545 ins_cost(150); 2546 ins_encode %{ 2547 __ subss($dst$$XMMRegister, $constantaddress($con)); 2548 %} 2549 ins_pipe(pipe_slow); 2550 %} 2551 2552 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2553 predicate(UseAVX > 0); 2554 match(Set dst (SubF src1 src2)); 2555 2556 format %{ "vsubss $dst, $src1, $src2" %} 2557 ins_cost(150); 2558 ins_encode %{ 2559 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2560 %} 2561 ins_pipe(pipe_slow); 2562 %} 2563 2564 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2565 predicate(UseAVX > 0); 2566 match(Set dst (SubF src1 (LoadF src2))); 2567 2568 format %{ "vsubss $dst, $src1, $src2" %} 2569 ins_cost(150); 2570 ins_encode %{ 2571 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2572 %} 2573 ins_pipe(pipe_slow); 2574 %} 2575 2576 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2577 predicate(UseAVX > 0); 2578 match(Set dst (SubF src con)); 2579 2580 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2581 ins_cost(150); 2582 ins_encode %{ 2583 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2584 %} 2585 ins_pipe(pipe_slow); 2586 %} 2587 2588 instruct subD_reg(regD dst, regD src) %{ 2589 predicate((UseSSE>=2) && (UseAVX == 0)); 2590 match(Set dst (SubD dst src)); 2591 2592 format %{ "subsd $dst, $src" %} 2593 ins_cost(150); 2594 ins_encode %{ 2595 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2596 %} 2597 ins_pipe(pipe_slow); 2598 %} 2599 2600 instruct subD_mem(regD dst, memory src) %{ 2601 predicate((UseSSE>=2) && (UseAVX == 0)); 2602 match(Set dst (SubD dst (LoadD src))); 2603 2604 format %{ "subsd $dst, $src" %} 2605 ins_cost(150); 2606 ins_encode %{ 2607 __ subsd($dst$$XMMRegister, $src$$Address); 2608 %} 2609 ins_pipe(pipe_slow); 2610 %} 2611 2612 instruct subD_imm(regD dst, immD con) %{ 2613 predicate((UseSSE>=2) && (UseAVX == 0)); 2614 match(Set dst (SubD dst con)); 2615 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2616 ins_cost(150); 2617 ins_encode %{ 2618 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2619 %} 2620 ins_pipe(pipe_slow); 2621 %} 2622 2623 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2624 predicate(UseAVX > 0); 2625 match(Set dst (SubD src1 src2)); 2626 2627 format %{ "vsubsd $dst, $src1, $src2" %} 2628 ins_cost(150); 2629 ins_encode %{ 2630 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2631 %} 2632 ins_pipe(pipe_slow); 2633 %} 2634 2635 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2636 predicate(UseAVX > 0); 2637 match(Set dst (SubD src1 (LoadD src2))); 2638 2639 format %{ "vsubsd $dst, $src1, $src2" %} 2640 ins_cost(150); 2641 ins_encode %{ 2642 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2643 %} 2644 ins_pipe(pipe_slow); 2645 %} 2646 2647 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2648 predicate(UseAVX > 0); 2649 match(Set dst (SubD src con)); 2650 2651 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2652 ins_cost(150); 2653 ins_encode %{ 2654 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2655 %} 2656 ins_pipe(pipe_slow); 2657 %} 2658 2659 instruct mulF_reg(regF dst, regF src) %{ 2660 predicate((UseSSE>=1) && (UseAVX == 0)); 2661 match(Set dst (MulF dst src)); 2662 2663 format %{ "mulss $dst, $src" %} 2664 ins_cost(150); 2665 ins_encode %{ 2666 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2667 %} 2668 ins_pipe(pipe_slow); 2669 %} 2670 2671 instruct mulF_mem(regF dst, memory src) %{ 2672 predicate((UseSSE>=1) && (UseAVX == 0)); 2673 match(Set dst (MulF dst (LoadF src))); 2674 2675 format %{ "mulss $dst, $src" %} 2676 ins_cost(150); 2677 ins_encode %{ 2678 __ mulss($dst$$XMMRegister, $src$$Address); 2679 %} 2680 ins_pipe(pipe_slow); 2681 %} 2682 2683 instruct mulF_imm(regF dst, immF con) %{ 2684 predicate((UseSSE>=1) && (UseAVX == 0)); 2685 match(Set dst (MulF dst con)); 2686 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2687 ins_cost(150); 2688 ins_encode %{ 2689 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2690 %} 2691 ins_pipe(pipe_slow); 2692 %} 2693 2694 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2695 predicate(UseAVX > 0); 2696 match(Set dst (MulF src1 src2)); 2697 2698 format %{ "vmulss $dst, $src1, $src2" %} 2699 ins_cost(150); 2700 ins_encode %{ 2701 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2702 %} 2703 ins_pipe(pipe_slow); 2704 %} 2705 2706 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2707 predicate(UseAVX > 0); 2708 match(Set dst (MulF src1 (LoadF src2))); 2709 2710 format %{ "vmulss $dst, $src1, $src2" %} 2711 ins_cost(150); 2712 ins_encode %{ 2713 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2714 %} 2715 ins_pipe(pipe_slow); 2716 %} 2717 2718 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2719 predicate(UseAVX > 0); 2720 match(Set dst (MulF src con)); 2721 2722 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2723 ins_cost(150); 2724 ins_encode %{ 2725 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2726 %} 2727 ins_pipe(pipe_slow); 2728 %} 2729 2730 instruct mulD_reg(regD dst, regD src) %{ 2731 predicate((UseSSE>=2) && (UseAVX == 0)); 2732 match(Set dst (MulD dst src)); 2733 2734 format %{ "mulsd $dst, $src" %} 2735 ins_cost(150); 2736 ins_encode %{ 2737 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct mulD_mem(regD dst, memory src) %{ 2743 predicate((UseSSE>=2) && (UseAVX == 0)); 2744 match(Set dst (MulD dst (LoadD src))); 2745 2746 format %{ "mulsd $dst, $src" %} 2747 ins_cost(150); 2748 ins_encode %{ 2749 __ mulsd($dst$$XMMRegister, $src$$Address); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct mulD_imm(regD dst, immD con) %{ 2755 predicate((UseSSE>=2) && (UseAVX == 0)); 2756 match(Set dst (MulD dst con)); 2757 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2758 ins_cost(150); 2759 ins_encode %{ 2760 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2761 %} 2762 ins_pipe(pipe_slow); 2763 %} 2764 2765 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2766 predicate(UseAVX > 0); 2767 match(Set dst (MulD src1 src2)); 2768 2769 format %{ "vmulsd $dst, $src1, $src2" %} 2770 ins_cost(150); 2771 ins_encode %{ 2772 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2773 %} 2774 ins_pipe(pipe_slow); 2775 %} 2776 2777 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2778 predicate(UseAVX > 0); 2779 match(Set dst (MulD src1 (LoadD src2))); 2780 2781 format %{ "vmulsd $dst, $src1, $src2" %} 2782 ins_cost(150); 2783 ins_encode %{ 2784 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2785 %} 2786 ins_pipe(pipe_slow); 2787 %} 2788 2789 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2790 predicate(UseAVX > 0); 2791 match(Set dst (MulD src con)); 2792 2793 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2794 ins_cost(150); 2795 ins_encode %{ 2796 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2797 %} 2798 ins_pipe(pipe_slow); 2799 %} 2800 2801 instruct divF_reg(regF dst, regF src) %{ 2802 predicate((UseSSE>=1) && (UseAVX == 0)); 2803 match(Set dst (DivF dst src)); 2804 2805 format %{ "divss $dst, $src" %} 2806 ins_cost(150); 2807 ins_encode %{ 2808 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2809 %} 2810 ins_pipe(pipe_slow); 2811 %} 2812 2813 instruct divF_mem(regF dst, memory src) %{ 2814 predicate((UseSSE>=1) && (UseAVX == 0)); 2815 match(Set dst (DivF dst (LoadF src))); 2816 2817 format %{ "divss $dst, $src" %} 2818 ins_cost(150); 2819 ins_encode %{ 2820 __ divss($dst$$XMMRegister, $src$$Address); 2821 %} 2822 ins_pipe(pipe_slow); 2823 %} 2824 2825 instruct divF_imm(regF dst, immF con) %{ 2826 predicate((UseSSE>=1) && (UseAVX == 0)); 2827 match(Set dst (DivF dst con)); 2828 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2829 ins_cost(150); 2830 ins_encode %{ 2831 __ divss($dst$$XMMRegister, $constantaddress($con)); 2832 %} 2833 ins_pipe(pipe_slow); 2834 %} 2835 2836 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2837 predicate(UseAVX > 0); 2838 match(Set dst (DivF src1 src2)); 2839 2840 format %{ "vdivss $dst, $src1, $src2" %} 2841 ins_cost(150); 2842 ins_encode %{ 2843 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2844 %} 2845 ins_pipe(pipe_slow); 2846 %} 2847 2848 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2849 predicate(UseAVX > 0); 2850 match(Set dst (DivF src1 (LoadF src2))); 2851 2852 format %{ "vdivss $dst, $src1, $src2" %} 2853 ins_cost(150); 2854 ins_encode %{ 2855 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2856 %} 2857 ins_pipe(pipe_slow); 2858 %} 2859 2860 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2861 predicate(UseAVX > 0); 2862 match(Set dst (DivF src con)); 2863 2864 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2865 ins_cost(150); 2866 ins_encode %{ 2867 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2868 %} 2869 ins_pipe(pipe_slow); 2870 %} 2871 2872 instruct divD_reg(regD dst, regD src) %{ 2873 predicate((UseSSE>=2) && (UseAVX == 0)); 2874 match(Set dst (DivD dst src)); 2875 2876 format %{ "divsd $dst, $src" %} 2877 ins_cost(150); 2878 ins_encode %{ 2879 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2880 %} 2881 ins_pipe(pipe_slow); 2882 %} 2883 2884 instruct divD_mem(regD dst, memory src) %{ 2885 predicate((UseSSE>=2) && (UseAVX == 0)); 2886 match(Set dst (DivD dst (LoadD src))); 2887 2888 format %{ "divsd $dst, $src" %} 2889 ins_cost(150); 2890 ins_encode %{ 2891 __ divsd($dst$$XMMRegister, $src$$Address); 2892 %} 2893 ins_pipe(pipe_slow); 2894 %} 2895 2896 instruct divD_imm(regD dst, immD con) %{ 2897 predicate((UseSSE>=2) && (UseAVX == 0)); 2898 match(Set dst (DivD dst con)); 2899 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2900 ins_cost(150); 2901 ins_encode %{ 2902 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2903 %} 2904 ins_pipe(pipe_slow); 2905 %} 2906 2907 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2908 predicate(UseAVX > 0); 2909 match(Set dst (DivD src1 src2)); 2910 2911 format %{ "vdivsd $dst, $src1, $src2" %} 2912 ins_cost(150); 2913 ins_encode %{ 2914 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2915 %} 2916 ins_pipe(pipe_slow); 2917 %} 2918 2919 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2920 predicate(UseAVX > 0); 2921 match(Set dst (DivD src1 (LoadD src2))); 2922 2923 format %{ "vdivsd $dst, $src1, $src2" %} 2924 ins_cost(150); 2925 ins_encode %{ 2926 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2927 %} 2928 ins_pipe(pipe_slow); 2929 %} 2930 2931 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2932 predicate(UseAVX > 0); 2933 match(Set dst (DivD src con)); 2934 2935 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2936 ins_cost(150); 2937 ins_encode %{ 2938 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2939 %} 2940 ins_pipe(pipe_slow); 2941 %} 2942 2943 instruct absF_reg(regF dst) %{ 2944 predicate((UseSSE>=1) && (UseAVX == 0)); 2945 match(Set dst (AbsF dst)); 2946 ins_cost(150); 2947 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2948 ins_encode %{ 2949 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2950 %} 2951 ins_pipe(pipe_slow); 2952 %} 2953 2954 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2955 predicate(UseAVX > 0); 2956 match(Set dst (AbsF src)); 2957 ins_cost(150); 2958 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2959 ins_encode %{ 2960 int vector_len = 0; 2961 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2962 ExternalAddress(float_signmask()), vector_len); 2963 %} 2964 ins_pipe(pipe_slow); 2965 %} 2966 2967 instruct absD_reg(regD dst) %{ 2968 predicate((UseSSE>=2) && (UseAVX == 0)); 2969 match(Set dst (AbsD dst)); 2970 ins_cost(150); 2971 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2972 "# abs double by sign masking" %} 2973 ins_encode %{ 2974 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2975 %} 2976 ins_pipe(pipe_slow); 2977 %} 2978 2979 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2980 predicate(UseAVX > 0); 2981 match(Set dst (AbsD src)); 2982 ins_cost(150); 2983 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2984 "# abs double by sign masking" %} 2985 ins_encode %{ 2986 int vector_len = 0; 2987 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2988 ExternalAddress(double_signmask()), vector_len); 2989 %} 2990 ins_pipe(pipe_slow); 2991 %} 2992 2993 instruct negF_reg(regF dst) %{ 2994 predicate((UseSSE>=1) && (UseAVX == 0)); 2995 match(Set dst (NegF dst)); 2996 ins_cost(150); 2997 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2998 ins_encode %{ 2999 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3000 %} 3001 ins_pipe(pipe_slow); 3002 %} 3003 3004 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3005 predicate(UseAVX > 0); 3006 match(Set dst (NegF src)); 3007 ins_cost(150); 3008 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3009 ins_encode %{ 3010 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3011 ExternalAddress(float_signflip())); 3012 %} 3013 ins_pipe(pipe_slow); 3014 %} 3015 3016 instruct negD_reg(regD dst) %{ 3017 predicate((UseSSE>=2) && (UseAVX == 0)); 3018 match(Set dst (NegD dst)); 3019 ins_cost(150); 3020 format %{ "xorpd $dst, [0x8000000000000000]\t" 3021 "# neg double by sign flipping" %} 3022 ins_encode %{ 3023 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3024 %} 3025 ins_pipe(pipe_slow); 3026 %} 3027 3028 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3029 predicate(UseAVX > 0); 3030 match(Set dst (NegD src)); 3031 ins_cost(150); 3032 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3033 "# neg double by sign flipping" %} 3034 ins_encode %{ 3035 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3036 ExternalAddress(double_signflip())); 3037 %} 3038 ins_pipe(pipe_slow); 3039 %} 3040 3041 instruct sqrtF_reg(regF dst, regF src) %{ 3042 predicate(UseSSE>=1); 3043 match(Set dst (SqrtF src)); 3044 3045 format %{ "sqrtss $dst, $src" %} 3046 ins_cost(150); 3047 ins_encode %{ 3048 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 3049 %} 3050 ins_pipe(pipe_slow); 3051 %} 3052 3053 instruct sqrtF_mem(regF dst, memory src) %{ 3054 predicate(UseSSE>=1); 3055 match(Set dst (SqrtF (LoadF src))); 3056 3057 format %{ "sqrtss $dst, $src" %} 3058 ins_cost(150); 3059 ins_encode %{ 3060 __ sqrtss($dst$$XMMRegister, $src$$Address); 3061 %} 3062 ins_pipe(pipe_slow); 3063 %} 3064 3065 instruct sqrtF_imm(regF dst, immF con) %{ 3066 predicate(UseSSE>=1); 3067 match(Set dst (SqrtF con)); 3068 3069 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3070 ins_cost(150); 3071 ins_encode %{ 3072 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 3073 %} 3074 ins_pipe(pipe_slow); 3075 %} 3076 3077 instruct sqrtD_reg(regD dst, regD src) %{ 3078 predicate(UseSSE>=2); 3079 match(Set dst (SqrtD src)); 3080 3081 format %{ "sqrtsd $dst, $src" %} 3082 ins_cost(150); 3083 ins_encode %{ 3084 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 3085 %} 3086 ins_pipe(pipe_slow); 3087 %} 3088 3089 instruct sqrtD_mem(regD dst, memory src) %{ 3090 predicate(UseSSE>=2); 3091 match(Set dst (SqrtD (LoadD src))); 3092 3093 format %{ "sqrtsd $dst, $src" %} 3094 ins_cost(150); 3095 ins_encode %{ 3096 __ sqrtsd($dst$$XMMRegister, $src$$Address); 3097 %} 3098 ins_pipe(pipe_slow); 3099 %} 3100 3101 instruct sqrtD_imm(regD dst, immD con) %{ 3102 predicate(UseSSE>=2); 3103 match(Set dst (SqrtD con)); 3104 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3105 ins_cost(150); 3106 ins_encode %{ 3107 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 3108 %} 3109 ins_pipe(pipe_slow); 3110 %} 3111 3112 3113 #ifdef _LP64 3114 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3115 match(Set dst (RoundDoubleMode src rmode)); 3116 format %{ "roundsd $dst,$src" %} 3117 ins_cost(150); 3118 ins_encode %{ 3119 assert(UseSSE >= 4, "required"); 3120 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3121 %} 3122 ins_pipe(pipe_slow); 3123 %} 3124 3125 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3126 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3127 format %{ "roundsd $dst,$src" %} 3128 ins_cost(150); 3129 ins_encode %{ 3130 assert(UseSSE >= 4, "required"); 3131 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3132 %} 3133 ins_pipe(pipe_slow); 3134 %} 3135 3136 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 3137 match(Set dst (RoundDoubleMode con rmode)); 3138 effect(TEMP scratch_reg); 3139 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3140 ins_cost(150); 3141 ins_encode %{ 3142 assert(UseSSE >= 4, "required"); 3143 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 3144 %} 3145 ins_pipe(pipe_slow); 3146 %} 3147 3148 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3149 predicate(n->as_Vector()->length() < 8); 3150 match(Set dst (RoundDoubleModeV src rmode)); 3151 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3152 ins_encode %{ 3153 assert(UseAVX > 0, "required"); 3154 int vector_len = vector_length_encoding(this); 3155 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len); 3156 %} 3157 ins_pipe( pipe_slow ); 3158 %} 3159 3160 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3161 predicate(n->as_Vector()->length() == 8); 3162 match(Set dst (RoundDoubleModeV src rmode)); 3163 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3164 ins_encode %{ 3165 assert(UseAVX > 2, "required"); 3166 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3167 %} 3168 ins_pipe( pipe_slow ); 3169 %} 3170 3171 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3172 predicate(n->as_Vector()->length() < 8); 3173 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3174 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3175 ins_encode %{ 3176 assert(UseAVX > 0, "required"); 3177 int vector_len = vector_length_encoding(this); 3178 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len); 3179 %} 3180 ins_pipe( pipe_slow ); 3181 %} 3182 3183 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3184 predicate(n->as_Vector()->length() == 8); 3185 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3186 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3187 ins_encode %{ 3188 assert(UseAVX > 2, "required"); 3189 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3190 %} 3191 ins_pipe( pipe_slow ); 3192 %} 3193 #endif // _LP64 3194 3195 instruct onspinwait() %{ 3196 match(OnSpinWait); 3197 ins_cost(200); 3198 3199 format %{ 3200 $$template 3201 $$emit$$"pause\t! membar_onspinwait" 3202 %} 3203 ins_encode %{ 3204 __ pause(); 3205 %} 3206 ins_pipe(pipe_slow); 3207 %} 3208 3209 // a * b + c 3210 instruct fmaD_reg(regD a, regD b, regD c) %{ 3211 predicate(UseFMA); 3212 match(Set c (FmaD c (Binary a b))); 3213 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3214 ins_cost(150); 3215 ins_encode %{ 3216 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3217 %} 3218 ins_pipe( pipe_slow ); 3219 %} 3220 3221 // a * b + c 3222 instruct fmaF_reg(regF a, regF b, regF c) %{ 3223 predicate(UseFMA); 3224 match(Set c (FmaF c (Binary a b))); 3225 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3226 ins_cost(150); 3227 ins_encode %{ 3228 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3229 %} 3230 ins_pipe( pipe_slow ); 3231 %} 3232 3233 // ====================VECTOR INSTRUCTIONS===================================== 3234 3235 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3236 instruct MoveVec2Leg(legVec dst, vec src) %{ 3237 match(Set dst src); 3238 format %{ "" %} 3239 ins_encode %{ 3240 ShouldNotReachHere(); 3241 %} 3242 ins_pipe( fpu_reg_reg ); 3243 %} 3244 3245 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3246 match(Set dst src); 3247 format %{ "" %} 3248 ins_encode %{ 3249 ShouldNotReachHere(); 3250 %} 3251 ins_pipe( fpu_reg_reg ); 3252 %} 3253 3254 // ============================================================================ 3255 3256 // Load vectors 3257 instruct loadV(vec dst, memory mem) %{ 3258 match(Set dst (LoadVector mem)); 3259 ins_cost(125); 3260 format %{ "load_vector $dst,$mem" %} 3261 ins_encode %{ 3262 switch (vector_length_in_bytes(this)) { 3263 case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; 3264 case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; 3265 case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; 3266 case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break; 3267 case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break; 3268 default: ShouldNotReachHere(); 3269 } 3270 %} 3271 ins_pipe( pipe_slow ); 3272 %} 3273 3274 // Store vectors generic operand pattern. 3275 instruct storeV(memory mem, vec src) %{ 3276 match(Set mem (StoreVector mem src)); 3277 ins_cost(145); 3278 format %{ "store_vector $mem,$src\n\t" %} 3279 ins_encode %{ 3280 switch (vector_length_in_bytes(this, $src)) { 3281 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3282 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3283 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3284 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3285 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3286 default: ShouldNotReachHere(); 3287 } 3288 %} 3289 ins_pipe( pipe_slow ); 3290 %} 3291 3292 // ====================REPLICATE======================================= 3293 3294 // Replicate byte scalar to be vector 3295 instruct ReplB_reg(vec dst, rRegI src) %{ 3296 match(Set dst (ReplicateB src)); 3297 format %{ "replicateB $dst,$src" %} 3298 ins_encode %{ 3299 uint vlen = vector_length(this); 3300 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3301 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 3302 int vlen_enc = vector_length_encoding(this); 3303 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 3304 } else { 3305 __ movdl($dst$$XMMRegister, $src$$Register); 3306 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3307 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3308 if (vlen >= 16) { 3309 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3310 if (vlen >= 32) { 3311 assert(vlen == 32, "sanity"); 3312 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3313 } 3314 } 3315 } 3316 %} 3317 ins_pipe( pipe_slow ); 3318 %} 3319 3320 instruct ReplB_mem(vec dst, memory mem) %{ 3321 predicate(VM_Version::supports_avx2()); 3322 match(Set dst (ReplicateB (LoadB mem))); 3323 format %{ "replicateB $dst,$mem" %} 3324 ins_encode %{ 3325 int vector_len = vector_length_encoding(this); 3326 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3327 %} 3328 ins_pipe( pipe_slow ); 3329 %} 3330 3331 instruct ReplB_imm(vec dst, immI con) %{ 3332 match(Set dst (ReplicateB con)); 3333 format %{ "replicateB $dst,$con" %} 3334 ins_encode %{ 3335 uint vlen = vector_length(this); 3336 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1)); 3337 if (vlen == 4) { 3338 __ movdl($dst$$XMMRegister, const_addr); 3339 } else { 3340 __ movq($dst$$XMMRegister, const_addr); 3341 if (vlen >= 16) { 3342 if (VM_Version::supports_avx2()) { 3343 int vlen_enc = vector_length_encoding(this); 3344 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3345 } else { 3346 assert(vlen == 16, "sanity"); 3347 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3348 } 3349 } 3350 } 3351 %} 3352 ins_pipe( pipe_slow ); 3353 %} 3354 3355 // Replicate byte scalar zero to be vector 3356 instruct ReplB_zero(vec dst, immI0 zero) %{ 3357 match(Set dst (ReplicateB zero)); 3358 format %{ "replicateB $dst,$zero" %} 3359 ins_encode %{ 3360 uint vlen = vector_length(this); 3361 if (vlen <= 16) { 3362 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3363 } else { 3364 // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ). 3365 int vlen_enc = vector_length_encoding(this); 3366 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3367 } 3368 %} 3369 ins_pipe( fpu_reg_reg ); 3370 %} 3371 3372 // ====================ReplicateS======================================= 3373 3374 instruct ReplS_reg(vec dst, rRegI src) %{ 3375 match(Set dst (ReplicateS src)); 3376 format %{ "replicateS $dst,$src" %} 3377 ins_encode %{ 3378 uint vlen = vector_length(this); 3379 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3380 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 3381 int vlen_enc = vector_length_encoding(this); 3382 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 3383 } else { 3384 __ movdl($dst$$XMMRegister, $src$$Register); 3385 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3386 if (vlen >= 8) { 3387 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3388 if (vlen >= 16) { 3389 assert(vlen == 16, "sanity"); 3390 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3391 } 3392 } 3393 } 3394 %} 3395 ins_pipe( pipe_slow ); 3396 %} 3397 3398 instruct ReplS_mem(vec dst, memory mem) %{ 3399 predicate(VM_Version::supports_avx2()); 3400 match(Set dst (ReplicateS (LoadS mem))); 3401 format %{ "replicateS $dst,$mem" %} 3402 ins_encode %{ 3403 int vlen_enc = vector_length_encoding(this); 3404 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 3405 %} 3406 ins_pipe( pipe_slow ); 3407 %} 3408 3409 instruct ReplS_imm(vec dst, immI con) %{ 3410 match(Set dst (ReplicateS con)); 3411 format %{ "replicateS $dst,$con" %} 3412 ins_encode %{ 3413 uint vlen = vector_length(this); 3414 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2)); 3415 if (vlen == 2) { 3416 __ movdl($dst$$XMMRegister, const_addr); 3417 } else { 3418 __ movq($dst$$XMMRegister, const_addr); 3419 if (vlen >= 8) { 3420 if (VM_Version::supports_avx2()) { 3421 int vlen_enc = vector_length_encoding(this); 3422 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3423 } else { 3424 assert(vlen == 8, "sanity"); 3425 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3426 } 3427 } 3428 } 3429 %} 3430 ins_pipe( fpu_reg_reg ); 3431 %} 3432 3433 instruct ReplS_zero(vec dst, immI0 zero) %{ 3434 match(Set dst (ReplicateS zero)); 3435 format %{ "replicateS $dst,$zero" %} 3436 ins_encode %{ 3437 uint vlen = vector_length(this); 3438 if (vlen <= 8) { 3439 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3440 } else { 3441 int vlen_enc = vector_length_encoding(this); 3442 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3443 } 3444 %} 3445 ins_pipe( fpu_reg_reg ); 3446 %} 3447 3448 // ====================ReplicateI======================================= 3449 3450 instruct ReplI_reg(vec dst, rRegI src) %{ 3451 match(Set dst (ReplicateI src)); 3452 format %{ "replicateI $dst,$src" %} 3453 ins_encode %{ 3454 uint vlen = vector_length(this); 3455 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3456 int vlen_enc = vector_length_encoding(this); 3457 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 3458 } else { 3459 __ movdl($dst$$XMMRegister, $src$$Register); 3460 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3461 if (vlen >= 8) { 3462 assert(vlen == 8, "sanity"); 3463 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3464 } 3465 } 3466 %} 3467 ins_pipe( pipe_slow ); 3468 %} 3469 3470 instruct ReplI_mem(vec dst, memory mem) %{ 3471 match(Set dst (ReplicateI (LoadI mem))); 3472 format %{ "replicateI $dst,$mem" %} 3473 ins_encode %{ 3474 uint vlen = vector_length(this); 3475 if (vlen <= 4) { 3476 __ movdl($dst$$XMMRegister, $mem$$Address); 3477 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3478 } else { 3479 assert(VM_Version::supports_avx2(), "sanity"); 3480 int vector_len = vector_length_encoding(this); 3481 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3482 } 3483 %} 3484 ins_pipe( pipe_slow ); 3485 %} 3486 3487 instruct ReplI_imm(vec dst, immI con) %{ 3488 match(Set dst (ReplicateI con)); 3489 format %{ "replicateI $dst,$con" %} 3490 ins_encode %{ 3491 uint vlen = vector_length(this); 3492 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4)); 3493 if (vlen <= 4) { 3494 __ movq($dst$$XMMRegister, const_addr); 3495 if (vlen == 4) { 3496 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3497 } 3498 } else { 3499 assert(VM_Version::supports_avx2(), "sanity"); 3500 int vector_len = vector_length_encoding(this); 3501 __ movq($dst$$XMMRegister, const_addr); 3502 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3503 } 3504 %} 3505 ins_pipe( pipe_slow ); 3506 %} 3507 3508 // Replicate integer (4 byte) scalar zero to be vector 3509 instruct ReplI_zero(vec dst, immI0 zero) %{ 3510 match(Set dst (ReplicateI zero)); 3511 format %{ "replicateI $dst,$zero" %} 3512 ins_encode %{ 3513 uint vlen = vector_length(this); 3514 if (vlen <= 4) { 3515 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3516 } else { 3517 int vlen_enc = vector_length_encoding(this); 3518 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3519 } 3520 %} 3521 ins_pipe( fpu_reg_reg ); 3522 %} 3523 3524 instruct ReplI_M1(vec dst, immI_M1 con) %{ 3525 predicate(UseAVX > 0); 3526 match(Set dst (ReplicateB con)); 3527 match(Set dst (ReplicateS con)); 3528 match(Set dst (ReplicateI con)); 3529 effect(TEMP dst); 3530 format %{ "vallones $dst" %} 3531 ins_encode %{ 3532 int vector_len = vector_length_encoding(this); 3533 __ vallones($dst$$XMMRegister, vector_len); 3534 %} 3535 ins_pipe( pipe_slow ); 3536 %} 3537 3538 // ====================ReplicateL======================================= 3539 3540 #ifdef _LP64 3541 // Replicate long (8 byte) scalar to be vector 3542 instruct ReplL_reg(vec dst, rRegL src) %{ 3543 match(Set dst (ReplicateL src)); 3544 format %{ "replicateL $dst,$src" %} 3545 ins_encode %{ 3546 uint vlen = vector_length(this); 3547 if (vlen == 2) { 3548 __ movdq($dst$$XMMRegister, $src$$Register); 3549 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3550 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3551 int vlen_enc = vector_length_encoding(this); 3552 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 3553 } else { 3554 assert(vlen == 4, "sanity"); 3555 __ movdq($dst$$XMMRegister, $src$$Register); 3556 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3557 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3558 } 3559 %} 3560 ins_pipe( pipe_slow ); 3561 %} 3562 #else // _LP64 3563 // Replicate long (8 byte) scalar to be vector 3564 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 3565 predicate(n->as_Vector()->length() <= 4); 3566 match(Set dst (ReplicateL src)); 3567 effect(TEMP dst, USE src, TEMP tmp); 3568 format %{ "replicateL $dst,$src" %} 3569 ins_encode %{ 3570 uint vlen = vector_length(this); 3571 if (vlen == 2) { 3572 __ movdl($dst$$XMMRegister, $src$$Register); 3573 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3574 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3575 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3576 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3577 int vector_len = Assembler::AVX_256bit; 3578 __ movdl($dst$$XMMRegister, $src$$Register); 3579 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3580 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3581 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3582 } else { 3583 __ movdl($dst$$XMMRegister, $src$$Register); 3584 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3585 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3586 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3587 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3588 } 3589 %} 3590 ins_pipe( pipe_slow ); 3591 %} 3592 3593 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 3594 predicate(n->as_Vector()->length() == 8); 3595 match(Set dst (ReplicateL src)); 3596 effect(TEMP dst, USE src, TEMP tmp); 3597 format %{ "replicateL $dst,$src" %} 3598 ins_encode %{ 3599 if (VM_Version::supports_avx512vl()) { 3600 __ movdl($dst$$XMMRegister, $src$$Register); 3601 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3602 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3603 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3604 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3605 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3606 } else { 3607 int vector_len = Assembler::AVX_512bit; 3608 __ movdl($dst$$XMMRegister, $src$$Register); 3609 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3610 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3611 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3612 } 3613 %} 3614 ins_pipe( pipe_slow ); 3615 %} 3616 #endif // _LP64 3617 3618 instruct ReplL_mem(vec dst, memory mem) %{ 3619 match(Set dst (ReplicateL (LoadL mem))); 3620 format %{ "replicateL $dst,$mem" %} 3621 ins_encode %{ 3622 uint vlen = vector_length(this); 3623 if (vlen == 2) { 3624 __ movq($dst$$XMMRegister, $mem$$Address); 3625 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3626 } else { 3627 assert(VM_Version::supports_avx2(), "sanity"); 3628 int vlen_enc = vector_length_encoding(this); 3629 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 3630 } 3631 %} 3632 ins_pipe( pipe_slow ); 3633 %} 3634 3635 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3636 instruct ReplL_imm(vec dst, immL con) %{ 3637 match(Set dst (ReplicateL con)); 3638 format %{ "replicateL $dst,$con" %} 3639 ins_encode %{ 3640 uint vlen = vector_length(this); 3641 InternalAddress const_addr = $constantaddress($con); 3642 if (vlen == 2) { 3643 __ movq($dst$$XMMRegister, const_addr); 3644 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3645 } else { 3646 assert(VM_Version::supports_avx2(), "sanity"); 3647 int vlen_enc = vector_length_encoding(this); 3648 __ movq($dst$$XMMRegister, const_addr); 3649 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3650 } 3651 %} 3652 ins_pipe( pipe_slow ); 3653 %} 3654 3655 instruct ReplL_zero(vec dst, immL0 zero) %{ 3656 match(Set dst (ReplicateL zero)); 3657 format %{ "replicateL $dst,$zero" %} 3658 ins_encode %{ 3659 int vlen = vector_length(this); 3660 if (vlen == 2) { 3661 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3662 } else { 3663 int vlen_enc = vector_length_encoding(this); 3664 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3665 } 3666 %} 3667 ins_pipe( fpu_reg_reg ); 3668 %} 3669 3670 instruct ReplL_M1(vec dst, immL_M1 con) %{ 3671 predicate(UseAVX > 0); 3672 match(Set dst (ReplicateL con)); 3673 effect(TEMP dst); 3674 format %{ "vallones $dst" %} 3675 ins_encode %{ 3676 int vector_len = vector_length_encoding(this); 3677 __ vallones($dst$$XMMRegister, vector_len); 3678 %} 3679 ins_pipe( pipe_slow ); 3680 %} 3681 3682 // ====================ReplicateF======================================= 3683 3684 instruct ReplF_reg(vec dst, vlRegF src) %{ 3685 match(Set dst (ReplicateF src)); 3686 format %{ "replicateF $dst,$src" %} 3687 ins_encode %{ 3688 uint vlen = vector_length(this); 3689 if (vlen <= 4) { 3690 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3691 } else if (VM_Version::supports_avx2()) { 3692 int vector_len = vector_length_encoding(this); 3693 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2 3694 } else { 3695 assert(vlen == 8, "sanity"); 3696 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3697 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3698 } 3699 %} 3700 ins_pipe( pipe_slow ); 3701 %} 3702 3703 instruct ReplF_mem(vec dst, memory mem) %{ 3704 match(Set dst (ReplicateF (LoadF mem))); 3705 format %{ "replicateF $dst,$mem" %} 3706 ins_encode %{ 3707 uint vlen = vector_length(this); 3708 if (vlen <= 4) { 3709 __ movdl($dst$$XMMRegister, $mem$$Address); 3710 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3711 } else { 3712 assert(VM_Version::supports_avx(), "sanity"); 3713 int vector_len = vector_length_encoding(this); 3714 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 3715 } 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 instruct ReplF_zero(vec dst, immF0 zero) %{ 3721 match(Set dst (ReplicateF zero)); 3722 format %{ "replicateF $dst,$zero" %} 3723 ins_encode %{ 3724 uint vlen = vector_length(this); 3725 if (vlen <= 4) { 3726 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3727 } else { 3728 int vlen_enc = vector_length_encoding(this); 3729 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 3730 } 3731 %} 3732 ins_pipe( fpu_reg_reg ); 3733 %} 3734 3735 // ====================ReplicateD======================================= 3736 3737 // Replicate double (8 bytes) scalar to be vector 3738 instruct ReplD_reg(vec dst, vlRegD src) %{ 3739 match(Set dst (ReplicateD src)); 3740 format %{ "replicateD $dst,$src" %} 3741 ins_encode %{ 3742 uint vlen = vector_length(this); 3743 if (vlen == 2) { 3744 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3745 } else if (VM_Version::supports_avx2()) { 3746 int vector_len = vector_length_encoding(this); 3747 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2 3748 } else { 3749 assert(vlen == 4, "sanity"); 3750 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3751 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3752 } 3753 %} 3754 ins_pipe( pipe_slow ); 3755 %} 3756 3757 instruct ReplD_mem(vec dst, memory mem) %{ 3758 match(Set dst (ReplicateD (LoadD mem))); 3759 format %{ "replicateD $dst,$mem" %} 3760 ins_encode %{ 3761 uint vlen = vector_length(this); 3762 if (vlen == 2) { 3763 __ movq($dst$$XMMRegister, $mem$$Address); 3764 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 3765 } else { 3766 assert(VM_Version::supports_avx(), "sanity"); 3767 int vector_len = vector_length_encoding(this); 3768 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 3769 } 3770 %} 3771 ins_pipe( pipe_slow ); 3772 %} 3773 3774 instruct ReplD_zero(vec dst, immD0 zero) %{ 3775 match(Set dst (ReplicateD zero)); 3776 format %{ "replicateD $dst,$zero" %} 3777 ins_encode %{ 3778 uint vlen = vector_length(this); 3779 if (vlen == 2) { 3780 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3781 } else { 3782 int vlen_enc = vector_length_encoding(this); 3783 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 3784 } 3785 %} 3786 ins_pipe( fpu_reg_reg ); 3787 %} 3788 3789 // ====================REDUCTION ARITHMETIC======================================= 3790 // =======================Int Reduction========================================== 3791 3792 instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 3793 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT && 3794 n->in(2)->bottom_type()->is_vect()->length() < 16); 3795 match(Set dst (AddReductionVI src1 src2)); 3796 match(Set dst (MulReductionVI src1 src2)); 3797 match(Set dst (AndReductionV src1 src2)); 3798 match(Set dst ( OrReductionV src1 src2)); 3799 match(Set dst (XorReductionV src1 src2)); 3800 effect(TEMP vtmp1, TEMP vtmp2); 3801 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3802 ins_encode %{ 3803 int opcode = this->ideal_Opcode(); 3804 int vlen = vector_length(this, $src2); 3805 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3806 %} 3807 ins_pipe( pipe_slow ); 3808 %} 3809 3810 instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 3811 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT && 3812 n->in(2)->bottom_type()->is_vect()->length() == 16); 3813 match(Set dst (AddReductionVI src1 src2)); 3814 match(Set dst (MulReductionVI src1 src2)); 3815 match(Set dst (AndReductionV src1 src2)); 3816 match(Set dst ( OrReductionV src1 src2)); 3817 match(Set dst (XorReductionV src1 src2)); 3818 effect(TEMP vtmp1, TEMP vtmp2); 3819 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3820 ins_encode %{ 3821 int opcode = this->ideal_Opcode(); 3822 int vlen = vector_length(this, $src2); 3823 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3824 %} 3825 ins_pipe( pipe_slow ); 3826 %} 3827 3828 // =======================Long Reduction========================================== 3829 3830 #ifdef _LP64 3831 instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 3832 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && 3833 n->in(2)->bottom_type()->is_vect()->length() < 8); 3834 match(Set dst (AddReductionVL src1 src2)); 3835 match(Set dst (MulReductionVL src1 src2)); 3836 match(Set dst (AndReductionV src1 src2)); 3837 match(Set dst ( OrReductionV src1 src2)); 3838 match(Set dst (XorReductionV src1 src2)); 3839 effect(TEMP vtmp1, TEMP vtmp2); 3840 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3841 ins_encode %{ 3842 int opcode = this->ideal_Opcode(); 3843 int vlen = vector_length(this, $src2); 3844 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3845 %} 3846 ins_pipe( pipe_slow ); 3847 %} 3848 3849 instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 3850 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && 3851 n->in(2)->bottom_type()->is_vect()->length() == 8); 3852 match(Set dst (AddReductionVL src1 src2)); 3853 match(Set dst (MulReductionVL src1 src2)); 3854 match(Set dst (AndReductionV src1 src2)); 3855 match(Set dst ( OrReductionV src1 src2)); 3856 match(Set dst (XorReductionV src1 src2)); 3857 effect(TEMP vtmp1, TEMP vtmp2); 3858 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3859 ins_encode %{ 3860 int opcode = this->ideal_Opcode(); 3861 int vlen = vector_length(this, $src2); 3862 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3863 %} 3864 ins_pipe( pipe_slow ); 3865 %} 3866 #endif // _LP64 3867 3868 // =======================Float Reduction========================================== 3869 3870 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 3871 predicate(n->in(2)->bottom_type()->is_vect()->length() <= 4); 3872 match(Set dst (AddReductionVF dst src)); 3873 match(Set dst (MulReductionVF dst src)); 3874 effect(TEMP dst, TEMP vtmp); 3875 format %{ "vector_reduction_fp $dst,$src ; using $vtmp as TEMP" %} 3876 ins_encode %{ 3877 int opcode = this->ideal_Opcode(); 3878 int vlen = vector_length(this, $src); 3879 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 3880 %} 3881 ins_pipe( pipe_slow ); 3882 %} 3883 3884 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 3885 predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); 3886 match(Set dst (AddReductionVF dst src)); 3887 match(Set dst (MulReductionVF dst src)); 3888 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3889 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3890 ins_encode %{ 3891 int opcode = this->ideal_Opcode(); 3892 int vlen = vector_length(this, $src); 3893 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3894 %} 3895 ins_pipe( pipe_slow ); 3896 %} 3897 3898 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 3899 predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); 3900 match(Set dst (AddReductionVF dst src)); 3901 match(Set dst (MulReductionVF dst src)); 3902 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3903 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3904 ins_encode %{ 3905 int opcode = this->ideal_Opcode(); 3906 int vlen = vector_length(this, $src); 3907 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3908 %} 3909 ins_pipe( pipe_slow ); 3910 %} 3911 3912 // =======================Double Reduction========================================== 3913 3914 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 3915 predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); 3916 match(Set dst (AddReductionVD dst src)); 3917 match(Set dst (MulReductionVD dst src)); 3918 effect(TEMP dst, TEMP vtmp); 3919 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 3920 ins_encode %{ 3921 int opcode = this->ideal_Opcode(); 3922 int vlen = vector_length(this, $src); 3923 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 3924 %} 3925 ins_pipe( pipe_slow ); 3926 %} 3927 3928 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 3929 predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); 3930 match(Set dst (AddReductionVD dst src)); 3931 match(Set dst (MulReductionVD dst src)); 3932 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3933 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3934 ins_encode %{ 3935 int opcode = this->ideal_Opcode(); 3936 int vlen = vector_length(this, $src); 3937 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3938 %} 3939 ins_pipe( pipe_slow ); 3940 %} 3941 3942 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 3943 predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); 3944 match(Set dst (AddReductionVD dst src)); 3945 match(Set dst (MulReductionVD dst src)); 3946 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3947 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3948 ins_encode %{ 3949 int opcode = this->ideal_Opcode(); 3950 int vlen = vector_length(this, $src); 3951 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3952 %} 3953 ins_pipe( pipe_slow ); 3954 %} 3955 3956 // ====================VECTOR ARITHMETIC======================================= 3957 3958 // --------------------------------- ADD -------------------------------------- 3959 3960 // Bytes vector add 3961 instruct vaddB(vec dst, vec src) %{ 3962 predicate(UseAVX == 0); 3963 match(Set dst (AddVB dst src)); 3964 format %{ "paddb $dst,$src\t! add packedB" %} 3965 ins_encode %{ 3966 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3967 %} 3968 ins_pipe( pipe_slow ); 3969 %} 3970 3971 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 3972 predicate(UseAVX > 0); 3973 match(Set dst (AddVB src1 src2)); 3974 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 3975 ins_encode %{ 3976 int vector_len = vector_length_encoding(this); 3977 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 3983 predicate(UseAVX > 0); 3984 match(Set dst (AddVB src (LoadVector mem))); 3985 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 3986 ins_encode %{ 3987 int vector_len = vector_length_encoding(this); 3988 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3989 %} 3990 ins_pipe( pipe_slow ); 3991 %} 3992 3993 // Shorts/Chars vector add 3994 instruct vaddS(vec dst, vec src) %{ 3995 predicate(UseAVX == 0); 3996 match(Set dst (AddVS dst src)); 3997 format %{ "paddw $dst,$src\t! add packedS" %} 3998 ins_encode %{ 3999 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 4000 %} 4001 ins_pipe( pipe_slow ); 4002 %} 4003 4004 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 4005 predicate(UseAVX > 0); 4006 match(Set dst (AddVS src1 src2)); 4007 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 4008 ins_encode %{ 4009 int vector_len = vector_length_encoding(this); 4010 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4011 %} 4012 ins_pipe( pipe_slow ); 4013 %} 4014 4015 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 4016 predicate(UseAVX > 0); 4017 match(Set dst (AddVS src (LoadVector mem))); 4018 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 4019 ins_encode %{ 4020 int vector_len = vector_length_encoding(this); 4021 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4022 %} 4023 ins_pipe( pipe_slow ); 4024 %} 4025 4026 // Integers vector add 4027 instruct vaddI(vec dst, vec src) %{ 4028 predicate(UseAVX == 0); 4029 match(Set dst (AddVI dst src)); 4030 format %{ "paddd $dst,$src\t! add packedI" %} 4031 ins_encode %{ 4032 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 4033 %} 4034 ins_pipe( pipe_slow ); 4035 %} 4036 4037 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 4038 predicate(UseAVX > 0); 4039 match(Set dst (AddVI src1 src2)); 4040 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 4041 ins_encode %{ 4042 int vector_len = vector_length_encoding(this); 4043 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4044 %} 4045 ins_pipe( pipe_slow ); 4046 %} 4047 4048 4049 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 4050 predicate(UseAVX > 0); 4051 match(Set dst (AddVI src (LoadVector mem))); 4052 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 4053 ins_encode %{ 4054 int vector_len = vector_length_encoding(this); 4055 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4056 %} 4057 ins_pipe( pipe_slow ); 4058 %} 4059 4060 // Longs vector add 4061 instruct vaddL(vec dst, vec src) %{ 4062 predicate(UseAVX == 0); 4063 match(Set dst (AddVL dst src)); 4064 format %{ "paddq $dst,$src\t! add packedL" %} 4065 ins_encode %{ 4066 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 4067 %} 4068 ins_pipe( pipe_slow ); 4069 %} 4070 4071 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 4072 predicate(UseAVX > 0); 4073 match(Set dst (AddVL src1 src2)); 4074 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 4075 ins_encode %{ 4076 int vector_len = vector_length_encoding(this); 4077 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4078 %} 4079 ins_pipe( pipe_slow ); 4080 %} 4081 4082 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 4083 predicate(UseAVX > 0); 4084 match(Set dst (AddVL src (LoadVector mem))); 4085 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 4086 ins_encode %{ 4087 int vector_len = vector_length_encoding(this); 4088 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4089 %} 4090 ins_pipe( pipe_slow ); 4091 %} 4092 4093 // Floats vector add 4094 instruct vaddF(vec dst, vec src) %{ 4095 predicate(UseAVX == 0); 4096 match(Set dst (AddVF dst src)); 4097 format %{ "addps $dst,$src\t! add packedF" %} 4098 ins_encode %{ 4099 __ addps($dst$$XMMRegister, $src$$XMMRegister); 4100 %} 4101 ins_pipe( pipe_slow ); 4102 %} 4103 4104 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 4105 predicate(UseAVX > 0); 4106 match(Set dst (AddVF src1 src2)); 4107 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 4108 ins_encode %{ 4109 int vector_len = vector_length_encoding(this); 4110 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 4115 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 4116 predicate(UseAVX > 0); 4117 match(Set dst (AddVF src (LoadVector mem))); 4118 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 4119 ins_encode %{ 4120 int vector_len = vector_length_encoding(this); 4121 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4122 %} 4123 ins_pipe( pipe_slow ); 4124 %} 4125 4126 // Doubles vector add 4127 instruct vaddD(vec dst, vec src) %{ 4128 predicate(UseAVX == 0); 4129 match(Set dst (AddVD dst src)); 4130 format %{ "addpd $dst,$src\t! add packedD" %} 4131 ins_encode %{ 4132 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 4133 %} 4134 ins_pipe( pipe_slow ); 4135 %} 4136 4137 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 4138 predicate(UseAVX > 0); 4139 match(Set dst (AddVD src1 src2)); 4140 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 4141 ins_encode %{ 4142 int vector_len = vector_length_encoding(this); 4143 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4144 %} 4145 ins_pipe( pipe_slow ); 4146 %} 4147 4148 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 4149 predicate(UseAVX > 0); 4150 match(Set dst (AddVD src (LoadVector mem))); 4151 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 4152 ins_encode %{ 4153 int vector_len = vector_length_encoding(this); 4154 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4155 %} 4156 ins_pipe( pipe_slow ); 4157 %} 4158 4159 // --------------------------------- SUB -------------------------------------- 4160 4161 // Bytes vector sub 4162 instruct vsubB(vec dst, vec src) %{ 4163 predicate(UseAVX == 0); 4164 match(Set dst (SubVB dst src)); 4165 format %{ "psubb $dst,$src\t! sub packedB" %} 4166 ins_encode %{ 4167 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 4168 %} 4169 ins_pipe( pipe_slow ); 4170 %} 4171 4172 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 4173 predicate(UseAVX > 0); 4174 match(Set dst (SubVB src1 src2)); 4175 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 4176 ins_encode %{ 4177 int vector_len = vector_length_encoding(this); 4178 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4179 %} 4180 ins_pipe( pipe_slow ); 4181 %} 4182 4183 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 4184 predicate(UseAVX > 0); 4185 match(Set dst (SubVB src (LoadVector mem))); 4186 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 4187 ins_encode %{ 4188 int vector_len = vector_length_encoding(this); 4189 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4190 %} 4191 ins_pipe( pipe_slow ); 4192 %} 4193 4194 // Shorts/Chars vector sub 4195 instruct vsubS(vec dst, vec src) %{ 4196 predicate(UseAVX == 0); 4197 match(Set dst (SubVS dst src)); 4198 format %{ "psubw $dst,$src\t! sub packedS" %} 4199 ins_encode %{ 4200 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 4201 %} 4202 ins_pipe( pipe_slow ); 4203 %} 4204 4205 4206 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 4207 predicate(UseAVX > 0); 4208 match(Set dst (SubVS src1 src2)); 4209 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 4210 ins_encode %{ 4211 int vector_len = vector_length_encoding(this); 4212 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4213 %} 4214 ins_pipe( pipe_slow ); 4215 %} 4216 4217 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 4218 predicate(UseAVX > 0); 4219 match(Set dst (SubVS src (LoadVector mem))); 4220 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 4221 ins_encode %{ 4222 int vector_len = vector_length_encoding(this); 4223 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4224 %} 4225 ins_pipe( pipe_slow ); 4226 %} 4227 4228 // Integers vector sub 4229 instruct vsubI(vec dst, vec src) %{ 4230 predicate(UseAVX == 0); 4231 match(Set dst (SubVI dst src)); 4232 format %{ "psubd $dst,$src\t! sub packedI" %} 4233 ins_encode %{ 4234 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 4235 %} 4236 ins_pipe( pipe_slow ); 4237 %} 4238 4239 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 4240 predicate(UseAVX > 0); 4241 match(Set dst (SubVI src1 src2)); 4242 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 4243 ins_encode %{ 4244 int vector_len = vector_length_encoding(this); 4245 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4246 %} 4247 ins_pipe( pipe_slow ); 4248 %} 4249 4250 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 4251 predicate(UseAVX > 0); 4252 match(Set dst (SubVI src (LoadVector mem))); 4253 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 4254 ins_encode %{ 4255 int vector_len = vector_length_encoding(this); 4256 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4257 %} 4258 ins_pipe( pipe_slow ); 4259 %} 4260 4261 // Longs vector sub 4262 instruct vsubL(vec dst, vec src) %{ 4263 predicate(UseAVX == 0); 4264 match(Set dst (SubVL dst src)); 4265 format %{ "psubq $dst,$src\t! sub packedL" %} 4266 ins_encode %{ 4267 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 4268 %} 4269 ins_pipe( pipe_slow ); 4270 %} 4271 4272 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 4273 predicate(UseAVX > 0); 4274 match(Set dst (SubVL src1 src2)); 4275 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 4276 ins_encode %{ 4277 int vector_len = vector_length_encoding(this); 4278 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4279 %} 4280 ins_pipe( pipe_slow ); 4281 %} 4282 4283 4284 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 4285 predicate(UseAVX > 0); 4286 match(Set dst (SubVL src (LoadVector mem))); 4287 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 4288 ins_encode %{ 4289 int vector_len = vector_length_encoding(this); 4290 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4291 %} 4292 ins_pipe( pipe_slow ); 4293 %} 4294 4295 // Floats vector sub 4296 instruct vsubF(vec dst, vec src) %{ 4297 predicate(UseAVX == 0); 4298 match(Set dst (SubVF dst src)); 4299 format %{ "subps $dst,$src\t! sub packedF" %} 4300 ins_encode %{ 4301 __ subps($dst$$XMMRegister, $src$$XMMRegister); 4302 %} 4303 ins_pipe( pipe_slow ); 4304 %} 4305 4306 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 4307 predicate(UseAVX > 0); 4308 match(Set dst (SubVF src1 src2)); 4309 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 4310 ins_encode %{ 4311 int vector_len = vector_length_encoding(this); 4312 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 4317 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 4318 predicate(UseAVX > 0); 4319 match(Set dst (SubVF src (LoadVector mem))); 4320 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 4321 ins_encode %{ 4322 int vector_len = vector_length_encoding(this); 4323 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4324 %} 4325 ins_pipe( pipe_slow ); 4326 %} 4327 4328 // Doubles vector sub 4329 instruct vsubD(vec dst, vec src) %{ 4330 predicate(UseAVX == 0); 4331 match(Set dst (SubVD dst src)); 4332 format %{ "subpd $dst,$src\t! sub packedD" %} 4333 ins_encode %{ 4334 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 4335 %} 4336 ins_pipe( pipe_slow ); 4337 %} 4338 4339 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 4340 predicate(UseAVX > 0); 4341 match(Set dst (SubVD src1 src2)); 4342 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 4343 ins_encode %{ 4344 int vector_len = vector_length_encoding(this); 4345 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4346 %} 4347 ins_pipe( pipe_slow ); 4348 %} 4349 4350 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 4351 predicate(UseAVX > 0); 4352 match(Set dst (SubVD src (LoadVector mem))); 4353 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 4354 ins_encode %{ 4355 int vector_len = vector_length_encoding(this); 4356 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4357 %} 4358 ins_pipe( pipe_slow ); 4359 %} 4360 4361 // --------------------------------- MUL -------------------------------------- 4362 4363 // Byte vector mul 4364 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 4365 predicate(n->as_Vector()->length() == 4 || 4366 n->as_Vector()->length() == 8); 4367 match(Set dst (MulVB src1 src2)); 4368 effect(TEMP dst, TEMP tmp, TEMP scratch); 4369 format %{"vector_mulB $dst,$src1,$src2" %} 4370 ins_encode %{ 4371 assert(UseSSE > 3, "required"); 4372 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 4373 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 4374 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 4375 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4376 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 4377 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 4378 %} 4379 ins_pipe( pipe_slow ); 4380 %} 4381 4382 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4383 predicate(n->as_Vector()->length() == 16 && UseAVX <= 1); 4384 match(Set dst (MulVB src1 src2)); 4385 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4386 format %{"vector_mulB $dst,$src1,$src2" %} 4387 ins_encode %{ 4388 assert(UseSSE > 3, "required"); 4389 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 4390 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 4391 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 4392 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 4393 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 4394 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4395 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 4396 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 4397 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4398 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 4399 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 4400 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 4401 %} 4402 ins_pipe( pipe_slow ); 4403 %} 4404 4405 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 4406 predicate(n->as_Vector()->length() == 16 && UseAVX > 1); 4407 match(Set dst (MulVB src1 src2)); 4408 effect(TEMP dst, TEMP tmp, TEMP scratch); 4409 format %{"vector_mulB $dst,$src1,$src2" %} 4410 ins_encode %{ 4411 int vector_len = Assembler::AVX_256bit; 4412 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 4413 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4414 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); 4415 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4416 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 4417 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 4418 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 4419 %} 4420 ins_pipe( pipe_slow ); 4421 %} 4422 4423 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4424 predicate(n->as_Vector()->length() == 32); 4425 match(Set dst (MulVB src1 src2)); 4426 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4427 format %{"vector_mulB $dst,$src1,$src2" %} 4428 ins_encode %{ 4429 assert(UseAVX > 1, "required"); 4430 int vector_len = Assembler::AVX_256bit; 4431 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 4432 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 4433 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4434 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4435 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4436 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 4437 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4438 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4439 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4440 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4441 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4442 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4443 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4444 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 4445 %} 4446 ins_pipe( pipe_slow ); 4447 %} 4448 4449 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4450 predicate(n->as_Vector()->length() == 64); 4451 match(Set dst (MulVB src1 src2)); 4452 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4453 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 4454 ins_encode %{ 4455 assert(UseAVX > 2, "required"); 4456 int vector_len = Assembler::AVX_512bit; 4457 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 4458 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 4459 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4460 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4461 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4462 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 4463 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4464 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4465 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4466 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4467 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4468 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4469 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4470 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 4471 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4472 %} 4473 ins_pipe( pipe_slow ); 4474 %} 4475 4476 // Shorts/Chars vector mul 4477 instruct vmulS(vec dst, vec src) %{ 4478 predicate(UseAVX == 0); 4479 match(Set dst (MulVS dst src)); 4480 format %{ "pmullw $dst,$src\t! mul packedS" %} 4481 ins_encode %{ 4482 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4483 %} 4484 ins_pipe( pipe_slow ); 4485 %} 4486 4487 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 4488 predicate(UseAVX > 0); 4489 match(Set dst (MulVS src1 src2)); 4490 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 4491 ins_encode %{ 4492 int vector_len = vector_length_encoding(this); 4493 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4494 %} 4495 ins_pipe( pipe_slow ); 4496 %} 4497 4498 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 4499 predicate(UseAVX > 0); 4500 match(Set dst (MulVS src (LoadVector mem))); 4501 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 4502 ins_encode %{ 4503 int vector_len = vector_length_encoding(this); 4504 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4505 %} 4506 ins_pipe( pipe_slow ); 4507 %} 4508 4509 // Integers vector mul 4510 instruct vmulI(vec dst, vec src) %{ 4511 predicate(UseAVX == 0); 4512 match(Set dst (MulVI dst src)); 4513 format %{ "pmulld $dst,$src\t! mul packedI" %} 4514 ins_encode %{ 4515 assert(UseSSE > 3, "required"); 4516 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 4517 %} 4518 ins_pipe( pipe_slow ); 4519 %} 4520 4521 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 4522 predicate(UseAVX > 0); 4523 match(Set dst (MulVI src1 src2)); 4524 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 4525 ins_encode %{ 4526 int vector_len = vector_length_encoding(this); 4527 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4528 %} 4529 ins_pipe( pipe_slow ); 4530 %} 4531 4532 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 4533 predicate(UseAVX > 0); 4534 match(Set dst (MulVI src (LoadVector mem))); 4535 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 4536 ins_encode %{ 4537 int vector_len = vector_length_encoding(this); 4538 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4539 %} 4540 ins_pipe( pipe_slow ); 4541 %} 4542 4543 // Longs vector mul 4544 instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 4545 match(Set dst (MulVL src1 src2)); 4546 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 4547 ins_encode %{ 4548 assert(UseAVX > 2, "required"); 4549 int vector_len = vector_length_encoding(this); 4550 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4551 %} 4552 ins_pipe( pipe_slow ); 4553 %} 4554 4555 instruct vmulL_mem(vec dst, vec src, memory mem) %{ 4556 match(Set dst (MulVL src (LoadVector mem))); 4557 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 4558 ins_encode %{ 4559 assert(UseAVX > 2, "required"); 4560 int vector_len = vector_length_encoding(this); 4561 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4562 %} 4563 ins_pipe( pipe_slow ); 4564 %} 4565 4566 // Floats vector mul 4567 instruct vmulF(vec dst, vec src) %{ 4568 predicate(UseAVX == 0); 4569 match(Set dst (MulVF dst src)); 4570 format %{ "mulps $dst,$src\t! mul packedF" %} 4571 ins_encode %{ 4572 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 4573 %} 4574 ins_pipe( pipe_slow ); 4575 %} 4576 4577 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 4578 predicate(UseAVX > 0); 4579 match(Set dst (MulVF src1 src2)); 4580 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 4581 ins_encode %{ 4582 int vector_len = vector_length_encoding(this); 4583 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4584 %} 4585 ins_pipe( pipe_slow ); 4586 %} 4587 4588 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 4589 predicate(UseAVX > 0); 4590 match(Set dst (MulVF src (LoadVector mem))); 4591 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 4592 ins_encode %{ 4593 int vector_len = vector_length_encoding(this); 4594 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4595 %} 4596 ins_pipe( pipe_slow ); 4597 %} 4598 4599 // Doubles vector mul 4600 instruct vmulD(vec dst, vec src) %{ 4601 predicate(UseAVX == 0); 4602 match(Set dst (MulVD dst src)); 4603 format %{ "mulpd $dst,$src\t! mul packedD" %} 4604 ins_encode %{ 4605 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 4606 %} 4607 ins_pipe( pipe_slow ); 4608 %} 4609 4610 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 4611 predicate(UseAVX > 0); 4612 match(Set dst (MulVD src1 src2)); 4613 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 4614 ins_encode %{ 4615 int vector_len = vector_length_encoding(this); 4616 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4617 %} 4618 ins_pipe( pipe_slow ); 4619 %} 4620 4621 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 4622 predicate(UseAVX > 0); 4623 match(Set dst (MulVD src (LoadVector mem))); 4624 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 4625 ins_encode %{ 4626 int vector_len = vector_length_encoding(this); 4627 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4628 %} 4629 ins_pipe( pipe_slow ); 4630 %} 4631 4632 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 4633 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4634 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 4635 effect(TEMP dst, USE src1, USE src2); 4636 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 4637 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 4638 %} 4639 ins_encode %{ 4640 int vector_len = 1; 4641 int cond = (Assembler::Condition)($copnd$$cmpcode); 4642 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 4643 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 4644 %} 4645 ins_pipe( pipe_slow ); 4646 %} 4647 4648 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 4649 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4650 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 4651 effect(TEMP dst, USE src1, USE src2); 4652 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 4653 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 4654 %} 4655 ins_encode %{ 4656 int vector_len = 1; 4657 int cond = (Assembler::Condition)($copnd$$cmpcode); 4658 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 4659 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 4660 %} 4661 ins_pipe( pipe_slow ); 4662 %} 4663 4664 // --------------------------------- DIV -------------------------------------- 4665 4666 // Floats vector div 4667 instruct vdivF(vec dst, vec src) %{ 4668 predicate(UseAVX == 0); 4669 match(Set dst (DivVF dst src)); 4670 format %{ "divps $dst,$src\t! div packedF" %} 4671 ins_encode %{ 4672 __ divps($dst$$XMMRegister, $src$$XMMRegister); 4673 %} 4674 ins_pipe( pipe_slow ); 4675 %} 4676 4677 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 4678 predicate(UseAVX > 0); 4679 match(Set dst (DivVF src1 src2)); 4680 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 4681 ins_encode %{ 4682 int vector_len = vector_length_encoding(this); 4683 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4684 %} 4685 ins_pipe( pipe_slow ); 4686 %} 4687 4688 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 4689 predicate(UseAVX > 0); 4690 match(Set dst (DivVF src (LoadVector mem))); 4691 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 4692 ins_encode %{ 4693 int vector_len = vector_length_encoding(this); 4694 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4695 %} 4696 ins_pipe( pipe_slow ); 4697 %} 4698 4699 // Doubles vector div 4700 instruct vdivD(vec dst, vec src) %{ 4701 predicate(UseAVX == 0); 4702 match(Set dst (DivVD dst src)); 4703 format %{ "divpd $dst,$src\t! div packedD" %} 4704 ins_encode %{ 4705 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 4706 %} 4707 ins_pipe( pipe_slow ); 4708 %} 4709 4710 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 4711 predicate(UseAVX > 0); 4712 match(Set dst (DivVD src1 src2)); 4713 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 4714 ins_encode %{ 4715 int vector_len = vector_length_encoding(this); 4716 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4717 %} 4718 ins_pipe( pipe_slow ); 4719 %} 4720 4721 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 4722 predicate(UseAVX > 0); 4723 match(Set dst (DivVD src (LoadVector mem))); 4724 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 4725 ins_encode %{ 4726 int vector_len = vector_length_encoding(this); 4727 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4728 %} 4729 ins_pipe( pipe_slow ); 4730 %} 4731 4732 // --------------------------------- Sqrt -------------------------------------- 4733 4734 instruct vsqrtF_reg(vec dst, vec src) %{ 4735 match(Set dst (SqrtVF src)); 4736 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 4737 ins_encode %{ 4738 assert(UseAVX > 0, "required"); 4739 int vector_len = vector_length_encoding(this); 4740 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4741 %} 4742 ins_pipe( pipe_slow ); 4743 %} 4744 4745 instruct vsqrtF_mem(vec dst, memory mem) %{ 4746 match(Set dst (SqrtVF (LoadVector mem))); 4747 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 4748 ins_encode %{ 4749 assert(UseAVX > 0, "required"); 4750 int vector_len = vector_length_encoding(this); 4751 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 4752 %} 4753 ins_pipe( pipe_slow ); 4754 %} 4755 4756 // Floating point vector sqrt 4757 instruct vsqrtD_reg(vec dst, vec src) %{ 4758 match(Set dst (SqrtVD src)); 4759 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 4760 ins_encode %{ 4761 assert(UseAVX > 0, "required"); 4762 int vector_len = vector_length_encoding(this); 4763 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4764 %} 4765 ins_pipe( pipe_slow ); 4766 %} 4767 4768 instruct vsqrtD_mem(vec dst, memory mem) %{ 4769 match(Set dst (SqrtVD (LoadVector mem))); 4770 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 4771 ins_encode %{ 4772 assert(UseAVX > 0, "required"); 4773 int vector_len = vector_length_encoding(this); 4774 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 4775 %} 4776 ins_pipe( pipe_slow ); 4777 %} 4778 4779 // ------------------------------ Shift --------------------------------------- 4780 4781 // Left and right shift count vectors are the same on x86 4782 // (only lowest bits of xmm reg are used for count). 4783 instruct vshiftcnt(vec dst, rRegI cnt) %{ 4784 match(Set dst (LShiftCntV cnt)); 4785 match(Set dst (RShiftCntV cnt)); 4786 format %{ "movdl $dst,$cnt\t! load shift count" %} 4787 ins_encode %{ 4788 __ movdl($dst$$XMMRegister, $cnt$$Register); 4789 %} 4790 ins_pipe( pipe_slow ); 4791 %} 4792 4793 // Byte vector shift 4794 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4795 predicate(n->as_Vector()->length() <= 8); 4796 match(Set dst (LShiftVB src shift)); 4797 match(Set dst (RShiftVB src shift)); 4798 match(Set dst (URShiftVB src shift)); 4799 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 4800 format %{"vector_byte_shift $dst,$src,$shift" %} 4801 ins_encode %{ 4802 assert(UseSSE > 3, "required"); 4803 int opcode = this->ideal_Opcode(); 4804 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 4805 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 4806 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4807 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 4808 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 4809 %} 4810 ins_pipe( pipe_slow ); 4811 %} 4812 4813 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 4814 predicate(n->as_Vector()->length() == 16 && UseAVX <= 1); 4815 match(Set dst (LShiftVB src shift)); 4816 match(Set dst (RShiftVB src shift)); 4817 match(Set dst (URShiftVB src shift)); 4818 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 4819 format %{"vector_byte_shift $dst,$src,$shift" %} 4820 ins_encode %{ 4821 assert(UseSSE > 3, "required"); 4822 int opcode = this->ideal_Opcode(); 4823 4824 __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister); 4825 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 4826 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 4827 __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 4828 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 4829 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4830 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 4831 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 4832 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 4833 %} 4834 ins_pipe( pipe_slow ); 4835 %} 4836 4837 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4838 predicate(n->as_Vector()->length() == 16 && UseAVX > 1); 4839 match(Set dst (LShiftVB src shift)); 4840 match(Set dst (RShiftVB src shift)); 4841 match(Set dst (URShiftVB src shift)); 4842 effect(TEMP dst, TEMP tmp, TEMP scratch); 4843 format %{"vector_byte_shift $dst,$src,$shift" %} 4844 ins_encode %{ 4845 int opcode = this->ideal_Opcode(); 4846 int vector_len = Assembler::AVX_256bit; 4847 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 4848 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 4849 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4850 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 4851 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 4852 %} 4853 ins_pipe( pipe_slow ); 4854 %} 4855 4856 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4857 predicate(n->as_Vector()->length() == 32); 4858 match(Set dst (LShiftVB src shift)); 4859 match(Set dst (RShiftVB src shift)); 4860 match(Set dst (URShiftVB src shift)); 4861 effect(TEMP dst, TEMP tmp, TEMP scratch); 4862 format %{"vector_byte_shift $dst,$src,$shift" %} 4863 ins_encode %{ 4864 assert(UseAVX > 1, "required"); 4865 int opcode = this->ideal_Opcode(); 4866 int vector_len = Assembler::AVX_256bit; 4867 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 4868 __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4869 __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 4870 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 4871 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len); 4872 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4873 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4874 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 4875 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 4876 %} 4877 ins_pipe( pipe_slow ); 4878 %} 4879 4880 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 4881 predicate(n->as_Vector()->length() == 64); 4882 match(Set dst (LShiftVB src shift)); 4883 match(Set dst (RShiftVB src shift)); 4884 match(Set dst (URShiftVB src shift)); 4885 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4886 format %{"vector_byte_shift $dst,$src,$shift" %} 4887 ins_encode %{ 4888 assert(UseAVX > 2, "required"); 4889 int opcode = this->ideal_Opcode(); 4890 int vector_len = Assembler::AVX_512bit; 4891 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 4892 __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4893 __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len); 4894 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len); 4895 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); 4896 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4897 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4898 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4899 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4900 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4901 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 4902 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4903 %} 4904 ins_pipe( pipe_slow ); 4905 %} 4906 4907 // Shorts vector logical right shift produces incorrect Java result 4908 // for negative data because java code convert short value into int with 4909 // sign extension before a shift. But char vectors are fine since chars are 4910 // unsigned values. 4911 // Shorts/Chars vector left shift 4912 instruct vshiftS(vec dst, vec src, vec shift) %{ 4913 match(Set dst (LShiftVS src shift)); 4914 match(Set dst (RShiftVS src shift)); 4915 match(Set dst (URShiftVS src shift)); 4916 effect(TEMP dst, USE src, USE shift); 4917 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 4918 ins_encode %{ 4919 int opcode = this->ideal_Opcode(); 4920 if (UseAVX > 0) { 4921 int vlen_enc = vector_length_encoding(this); 4922 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 4923 } else { 4924 int vlen = vector_length(this); 4925 if (vlen == 2) { 4926 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 4927 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4928 } else if (vlen == 4) { 4929 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 4930 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4931 } else { 4932 assert (vlen == 8, "sanity"); 4933 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4934 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4935 } 4936 } 4937 %} 4938 ins_pipe( pipe_slow ); 4939 %} 4940 4941 // Integers vector left shift 4942 instruct vshiftI(vec dst, vec src, vec shift) %{ 4943 match(Set dst (LShiftVI src shift)); 4944 match(Set dst (RShiftVI src shift)); 4945 match(Set dst (URShiftVI src shift)); 4946 effect(TEMP dst, USE src, USE shift); 4947 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 4948 ins_encode %{ 4949 int opcode = this->ideal_Opcode(); 4950 if (UseAVX > 0) { 4951 int vector_len = vector_length_encoding(this); 4952 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 4953 } else { 4954 int vlen = vector_length(this); 4955 if (vlen == 2) { 4956 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 4957 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4958 } else { 4959 assert(vlen == 4, "sanity"); 4960 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4961 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4962 } 4963 } 4964 %} 4965 ins_pipe( pipe_slow ); 4966 %} 4967 4968 // Integers vector left constant shift 4969 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 4970 match(Set dst (LShiftVI src (LShiftCntV shift))); 4971 match(Set dst (RShiftVI src (RShiftCntV shift))); 4972 match(Set dst (URShiftVI src (RShiftCntV shift))); 4973 effect(TEMP dst, USE src); 4974 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 4975 ins_encode %{ 4976 int opcode = this->ideal_Opcode(); 4977 if (UseAVX > 0) { 4978 int vector_len = vector_length_encoding(this); 4979 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 4980 } else { 4981 int vlen = vector_length(this); 4982 if (vlen == 2) { 4983 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 4984 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 4985 } else { 4986 assert(vlen == 4, "sanity"); 4987 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4988 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 4989 } 4990 } 4991 %} 4992 ins_pipe( pipe_slow ); 4993 %} 4994 4995 // Longs vector shift 4996 instruct vshiftL(vec dst, vec src, vec shift) %{ 4997 match(Set dst (LShiftVL src shift)); 4998 match(Set dst (URShiftVL src shift)); 4999 effect(TEMP dst, USE src, USE shift); 5000 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 5001 ins_encode %{ 5002 int opcode = this->ideal_Opcode(); 5003 if (UseAVX > 0) { 5004 int vector_len = vector_length_encoding(this); 5005 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 5006 } else { 5007 assert(vector_length(this) == 2, ""); 5008 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5009 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 5010 } 5011 %} 5012 ins_pipe( pipe_slow ); 5013 %} 5014 5015 // Longs vector constant shift 5016 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 5017 match(Set dst (LShiftVL src (LShiftCntV shift))); 5018 match(Set dst (URShiftVL src (RShiftCntV shift))); 5019 effect(TEMP dst, USE src, USE shift); 5020 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 5021 ins_encode %{ 5022 int opcode = this->ideal_Opcode(); 5023 if (UseAVX > 0) { 5024 int vector_len = vector_length_encoding(this); 5025 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 5026 } else { 5027 assert(vector_length(this) == 2, ""); 5028 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5029 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 5030 } 5031 %} 5032 ins_pipe( pipe_slow ); 5033 %} 5034 5035 // -------------------ArithmeticRightShift ----------------------------------- 5036 // Long vector arithmetic right shift 5037 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5038 predicate(UseAVX <= 2); 5039 match(Set dst (RShiftVL src shift)); 5040 effect(TEMP dst, TEMP tmp, TEMP scratch); 5041 format %{ "vshiftq $dst,$src,$shift" %} 5042 ins_encode %{ 5043 uint vlen = vector_length(this); 5044 if (vlen == 2) { 5045 assert(UseSSE >= 2, "required"); 5046 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5047 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 5048 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 5049 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 5050 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 5051 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 5052 } else { 5053 assert(vlen == 4, "sanity"); 5054 assert(UseAVX > 1, "required"); 5055 int vector_len = Assembler::AVX_256bit; 5056 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 5057 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 5058 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 5059 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 5060 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 5061 } 5062 %} 5063 ins_pipe( pipe_slow ); 5064 %} 5065 5066 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 5067 predicate(UseAVX > 2); 5068 match(Set dst (RShiftVL src shift)); 5069 format %{ "vshiftq $dst,$src,$shift" %} 5070 ins_encode %{ 5071 int vector_len = vector_length_encoding(this); 5072 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 5073 %} 5074 ins_pipe( pipe_slow ); 5075 %} 5076 5077 // --------------------------------- AND -------------------------------------- 5078 5079 instruct vand(vec dst, vec src) %{ 5080 predicate(UseAVX == 0); 5081 match(Set dst (AndV dst src)); 5082 format %{ "pand $dst,$src\t! and vectors" %} 5083 ins_encode %{ 5084 __ pand($dst$$XMMRegister, $src$$XMMRegister); 5085 %} 5086 ins_pipe( pipe_slow ); 5087 %} 5088 5089 instruct vand_reg(vec dst, vec src1, vec src2) %{ 5090 predicate(UseAVX > 0); 5091 match(Set dst (AndV src1 src2)); 5092 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 5093 ins_encode %{ 5094 int vector_len = vector_length_encoding(this); 5095 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5096 %} 5097 ins_pipe( pipe_slow ); 5098 %} 5099 5100 instruct vand_mem(vec dst, vec src, memory mem) %{ 5101 predicate(UseAVX > 0); 5102 match(Set dst (AndV src (LoadVector mem))); 5103 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 5104 ins_encode %{ 5105 int vector_len = vector_length_encoding(this); 5106 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5107 %} 5108 ins_pipe( pipe_slow ); 5109 %} 5110 5111 // --------------------------------- OR --------------------------------------- 5112 5113 instruct vor(vec dst, vec src) %{ 5114 predicate(UseAVX == 0); 5115 match(Set dst (OrV dst src)); 5116 format %{ "por $dst,$src\t! or vectors" %} 5117 ins_encode %{ 5118 __ por($dst$$XMMRegister, $src$$XMMRegister); 5119 %} 5120 ins_pipe( pipe_slow ); 5121 %} 5122 5123 instruct vor_reg(vec dst, vec src1, vec src2) %{ 5124 predicate(UseAVX > 0); 5125 match(Set dst (OrV src1 src2)); 5126 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 5127 ins_encode %{ 5128 int vector_len = vector_length_encoding(this); 5129 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5130 %} 5131 ins_pipe( pipe_slow ); 5132 %} 5133 5134 instruct vor_mem(vec dst, vec src, memory mem) %{ 5135 predicate(UseAVX > 0); 5136 match(Set dst (OrV src (LoadVector mem))); 5137 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 5138 ins_encode %{ 5139 int vector_len = vector_length_encoding(this); 5140 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5141 %} 5142 ins_pipe( pipe_slow ); 5143 %} 5144 5145 // --------------------------------- XOR -------------------------------------- 5146 5147 instruct vxor(vec dst, vec src) %{ 5148 predicate(UseAVX == 0); 5149 match(Set dst (XorV dst src)); 5150 format %{ "pxor $dst,$src\t! xor vectors" %} 5151 ins_encode %{ 5152 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5153 %} 5154 ins_pipe( pipe_slow ); 5155 %} 5156 5157 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 5158 predicate(UseAVX > 0); 5159 match(Set dst (XorV src1 src2)); 5160 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 5161 ins_encode %{ 5162 int vector_len = vector_length_encoding(this); 5163 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5164 %} 5165 ins_pipe( pipe_slow ); 5166 %} 5167 5168 instruct vxor_mem(vec dst, vec src, memory mem) %{ 5169 predicate(UseAVX > 0); 5170 match(Set dst (XorV src (LoadVector mem))); 5171 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 5172 ins_encode %{ 5173 int vector_len = vector_length_encoding(this); 5174 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5175 %} 5176 ins_pipe( pipe_slow ); 5177 %} 5178 5179 // --------------------------------- ABS -------------------------------------- 5180 // a = |a| 5181 instruct vabsB_reg(vec dst, vec src) %{ 5182 match(Set dst (AbsVB src)); 5183 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 5184 ins_encode %{ 5185 uint vlen = vector_length(this); 5186 if (vlen <= 16) { 5187 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 5188 } else { 5189 int vlen_enc = vector_length_encoding(this); 5190 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5191 } 5192 %} 5193 ins_pipe( pipe_slow ); 5194 %} 5195 5196 instruct vabsS_reg(vec dst, vec src) %{ 5197 match(Set dst (AbsVS src)); 5198 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 5199 ins_encode %{ 5200 uint vlen = vector_length(this); 5201 if (vlen <= 8) { 5202 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 5203 } else { 5204 int vlen_enc = vector_length_encoding(this); 5205 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5206 } 5207 %} 5208 ins_pipe( pipe_slow ); 5209 %} 5210 5211 instruct vabsI_reg(vec dst, vec src) %{ 5212 match(Set dst (AbsVI src)); 5213 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 5214 ins_encode %{ 5215 uint vlen = vector_length(this); 5216 if (vlen <= 4) { 5217 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 5218 } else { 5219 int vlen_enc = vector_length_encoding(this); 5220 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5221 } 5222 %} 5223 ins_pipe( pipe_slow ); 5224 %} 5225 5226 instruct vabsL_reg(vec dst, vec src) %{ 5227 match(Set dst (AbsVL src)); 5228 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 5229 ins_encode %{ 5230 assert(UseAVX > 2, "required"); 5231 int vector_len = vector_length_encoding(this); 5232 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5233 %} 5234 ins_pipe( pipe_slow ); 5235 %} 5236 5237 // --------------------------------- ABSNEG -------------------------------------- 5238 5239 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 5240 predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F 5241 match(Set dst (AbsVF src)); 5242 match(Set dst (NegVF src)); 5243 effect(TEMP scratch); 5244 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 5245 ins_cost(150); 5246 ins_encode %{ 5247 int opcode = this->ideal_Opcode(); 5248 int vlen = vector_length(this); 5249 if (vlen == 2) { 5250 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 5251 } else { 5252 assert(vlen == 8 || vlen == 16, "required"); 5253 int vlen_enc = vector_length_encoding(this); 5254 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 5255 } 5256 %} 5257 ins_pipe( pipe_slow ); 5258 %} 5259 5260 instruct vabsneg4F(vec dst, rRegI scratch) %{ 5261 predicate(n->as_Vector()->length() == 4); 5262 match(Set dst (AbsVF dst)); 5263 match(Set dst (NegVF dst)); 5264 effect(TEMP scratch); 5265 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 5266 ins_cost(150); 5267 ins_encode %{ 5268 int opcode = this->ideal_Opcode(); 5269 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 5270 %} 5271 ins_pipe( pipe_slow ); 5272 %} 5273 5274 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 5275 match(Set dst (AbsVD src)); 5276 match(Set dst (NegVD src)); 5277 effect(TEMP scratch); 5278 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 5279 ins_encode %{ 5280 int opcode = this->ideal_Opcode(); 5281 uint vlen = vector_length(this); 5282 if (vlen == 2) { 5283 assert(UseSSE >= 2, "required"); 5284 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 5285 } else { 5286 int vlen_enc = vector_length_encoding(this); 5287 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 5288 } 5289 %} 5290 ins_pipe( pipe_slow ); 5291 %} 5292 5293 // --------------------------------- FMA -------------------------------------- 5294 // a * b + c 5295 5296 instruct vfmaF_reg(vec a, vec b, vec c) %{ 5297 match(Set c (FmaVF c (Binary a b))); 5298 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 5299 ins_cost(150); 5300 ins_encode %{ 5301 assert(UseFMA, "not enabled"); 5302 int vector_len = vector_length_encoding(this); 5303 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 5304 %} 5305 ins_pipe( pipe_slow ); 5306 %} 5307 5308 instruct vfmaF_mem(vec a, memory b, vec c) %{ 5309 match(Set c (FmaVF c (Binary a (LoadVector b)))); 5310 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 5311 ins_cost(150); 5312 ins_encode %{ 5313 assert(UseFMA, "not enabled"); 5314 int vector_len = vector_length_encoding(this); 5315 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 5316 %} 5317 ins_pipe( pipe_slow ); 5318 %} 5319 5320 instruct vfmaD_reg(vec a, vec b, vec c) %{ 5321 match(Set c (FmaVD c (Binary a b))); 5322 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 5323 ins_cost(150); 5324 ins_encode %{ 5325 assert(UseFMA, "not enabled"); 5326 int vector_len = vector_length_encoding(this); 5327 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 5328 %} 5329 ins_pipe( pipe_slow ); 5330 %} 5331 5332 instruct vfmaD_mem(vec a, memory b, vec c) %{ 5333 match(Set c (FmaVD c (Binary a (LoadVector b)))); 5334 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 5335 ins_cost(150); 5336 ins_encode %{ 5337 assert(UseFMA, "not enabled"); 5338 int vector_len = vector_length_encoding(this); 5339 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 5340 %} 5341 ins_pipe( pipe_slow ); 5342 %} 5343 5344 // --------------------------------- Vector Multiply Add -------------------------------------- 5345 5346 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 5347 predicate(UseAVX == 0); 5348 match(Set dst (MulAddVS2VI dst src1)); 5349 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %} 5350 ins_encode %{ 5351 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 5352 %} 5353 ins_pipe( pipe_slow ); 5354 %} 5355 5356 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 5357 predicate(UseAVX > 0); 5358 match(Set dst (MulAddVS2VI src1 src2)); 5359 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 5360 ins_encode %{ 5361 int vector_len = vector_length_encoding(this); 5362 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5363 %} 5364 ins_pipe( pipe_slow ); 5365 %} 5366 5367 // --------------------------------- Vector Multiply Add Add ---------------------------------- 5368 5369 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 5370 predicate(VM_Version::supports_avx512_vnni()); 5371 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 5372 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 5373 ins_encode %{ 5374 assert(UseAVX > 2, "required"); 5375 int vector_len = vector_length_encoding(this); 5376 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5377 %} 5378 ins_pipe( pipe_slow ); 5379 ins_cost(10); 5380 %} 5381 5382 // --------------------------------- PopCount -------------------------------------- 5383 5384 instruct vpopcountI(vec dst, vec src) %{ 5385 match(Set dst (PopCountVI src)); 5386 format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %} 5387 ins_encode %{ 5388 assert(UsePopCountInstruction, "not enabled"); 5389 5390 int vector_len = vector_length_encoding(this); 5391 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5392 %} 5393 ins_pipe( pipe_slow ); 5394 %} 5395 5396 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 5397 5398 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 5399 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 5400 effect(TEMP dst); 5401 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 5402 ins_encode %{ 5403 int vector_len = vector_length_encoding(this); 5404 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5408 5409 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 5410 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 5411 effect(TEMP dst); 5412 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 5413 ins_encode %{ 5414 int vector_len = vector_length_encoding(this); 5415 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 5416 %} 5417 ins_pipe( pipe_slow ); 5418 %} 5419 5420 // --------------------------------- Rotation Operations ---------------------------------- 5421 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 5422 match(Set dst (RotateLeftV src shift)); 5423 match(Set dst (RotateRightV src shift)); 5424 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 5425 ins_encode %{ 5426 int opcode = this->ideal_Opcode(); 5427 int vector_len = vector_length_encoding(this); 5428 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 5429 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 5430 %} 5431 ins_pipe( pipe_slow ); 5432 %} 5433 5434 instruct vprorate(vec dst, vec src, vec shift) %{ 5435 match(Set dst (RotateLeftV src shift)); 5436 match(Set dst (RotateRightV src shift)); 5437 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 5438 ins_encode %{ 5439 int opcode = this->ideal_Opcode(); 5440 int vector_len = vector_length_encoding(this); 5441 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 5442 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 5443 %} 5444 ins_pipe( pipe_slow ); 5445 %} 5446