1 // 2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 %} 1101 1102 1103 //----------SOURCE BLOCK------------------------------------------------------- 1104 // This is a block of C++ code which provides values, functions, and 1105 // definitions necessary in the rest of the architecture description 1106 1107 source_hpp %{ 1108 // Header information of the source block. 1109 // Method declarations/definitions which are used outside 1110 // the ad-scope can conveniently be defined here. 1111 // 1112 // To keep related declarations/definitions/uses close together, 1113 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1114 1115 class NativeJump; 1116 1117 class CallStubImpl { 1118 1119 //-------------------------------------------------------------- 1120 //---< Used for optimization in Compile::shorten_branches >--- 1121 //-------------------------------------------------------------- 1122 1123 public: 1124 // Size of call trampoline stub. 1125 static uint size_call_trampoline() { 1126 return 0; // no call trampolines on this platform 1127 } 1128 1129 // number of relocations needed by a call trampoline stub 1130 static uint reloc_call_trampoline() { 1131 return 0; // no call trampolines on this platform 1132 } 1133 }; 1134 1135 class HandlerImpl { 1136 1137 public: 1138 1139 static int emit_exception_handler(CodeBuffer &cbuf); 1140 static int emit_deopt_handler(CodeBuffer& cbuf); 1141 1142 static uint size_exception_handler() { 1143 // NativeCall instruction size is the same as NativeJump. 1144 // exception handler starts out as jump and can be patched to 1145 // a call be deoptimization. (4932387) 1146 // Note that this value is also credited (in output.cpp) to 1147 // the size of the code section. 1148 return NativeJump::instruction_size; 1149 } 1150 1151 #ifdef _LP64 1152 static uint size_deopt_handler() { 1153 // three 5 byte instructions plus one move for unreachable address. 1154 return 15+3; 1155 } 1156 #else 1157 static uint size_deopt_handler() { 1158 // NativeCall instruction size is the same as NativeJump. 1159 // exception handler starts out as jump and can be patched to 1160 // a call be deoptimization. (4932387) 1161 // Note that this value is also credited (in output.cpp) to 1162 // the size of the code section. 1163 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1164 } 1165 #endif 1166 }; 1167 1168 %} // end source_hpp 1169 1170 source %{ 1171 1172 #include "opto/addnode.hpp" 1173 1174 // Emit exception handler code. 1175 // Stuff framesize into a register and call a VM stub routine. 1176 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1177 1178 // Note that the code buffer's insts_mark is always relative to insts. 1179 // That's why we must use the macroassembler to generate a handler. 1180 C2_MacroAssembler _masm(&cbuf); 1181 address base = __ start_a_stub(size_exception_handler()); 1182 if (base == NULL) { 1183 ciEnv::current()->record_failure("CodeCache is full"); 1184 return 0; // CodeBuffer::expand failed 1185 } 1186 int offset = __ offset(); 1187 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1188 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1189 __ end_a_stub(); 1190 return offset; 1191 } 1192 1193 // Emit deopt handler code. 1194 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1195 1196 // Note that the code buffer's insts_mark is always relative to insts. 1197 // That's why we must use the macroassembler to generate a handler. 1198 C2_MacroAssembler _masm(&cbuf); 1199 address base = __ start_a_stub(size_deopt_handler()); 1200 if (base == NULL) { 1201 ciEnv::current()->record_failure("CodeCache is full"); 1202 return 0; // CodeBuffer::expand failed 1203 } 1204 int offset = __ offset(); 1205 1206 #ifdef _LP64 1207 address the_pc = (address) __ pc(); 1208 Label next; 1209 // push a "the_pc" on the stack without destroying any registers 1210 // as they all may be live. 1211 1212 // push address of "next" 1213 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1214 __ bind(next); 1215 // adjust it so it matches "the_pc" 1216 __ subptr(Address(rsp, 0), __ offset() - offset); 1217 #else 1218 InternalAddress here(__ pc()); 1219 __ pushptr(here.addr()); 1220 #endif 1221 1222 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1223 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1224 __ end_a_stub(); 1225 return offset; 1226 } 1227 1228 1229 //============================================================================= 1230 1231 // Float masks come from different places depending on platform. 1232 #ifdef _LP64 1233 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1234 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1235 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1236 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1237 #else 1238 static address float_signmask() { return (address)float_signmask_pool; } 1239 static address float_signflip() { return (address)float_signflip_pool; } 1240 static address double_signmask() { return (address)double_signmask_pool; } 1241 static address double_signflip() { return (address)double_signflip_pool; } 1242 #endif 1243 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1244 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1245 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1246 1247 //============================================================================= 1248 const bool Matcher::match_rule_supported(int opcode) { 1249 if (!has_match_rule(opcode)) { 1250 return false; // no match rule present 1251 } 1252 switch (opcode) { 1253 case Op_AbsVL: 1254 if (UseAVX < 3) { 1255 return false; 1256 } 1257 break; 1258 case Op_PopCountI: 1259 case Op_PopCountL: 1260 if (!UsePopCountInstruction) { 1261 return false; 1262 } 1263 break; 1264 case Op_PopCountVI: 1265 if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) { 1266 return false; 1267 } 1268 break; 1269 case Op_MulVI: 1270 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1271 return false; 1272 } 1273 break; 1274 case Op_MulVL: 1275 case Op_MulReductionVL: 1276 if (VM_Version::supports_avx512dq() == false) { 1277 return false; 1278 } 1279 break; 1280 case Op_AbsVB: 1281 case Op_AbsVS: 1282 case Op_AbsVI: 1283 case Op_AddReductionVI: 1284 case Op_AndReductionV: 1285 case Op_OrReductionV: 1286 case Op_XorReductionV: 1287 if (UseSSE < 3) { // requires at least SSSE3 1288 return false; 1289 } 1290 break; 1291 case Op_MulReductionVI: 1292 if (UseSSE < 4) { // requires at least SSE4 1293 return false; 1294 } 1295 break; 1296 case Op_SqrtVD: 1297 case Op_SqrtVF: 1298 if (UseAVX < 1) { // enabled for AVX only 1299 return false; 1300 } 1301 break; 1302 case Op_CompareAndSwapL: 1303 #ifdef _LP64 1304 case Op_CompareAndSwapP: 1305 #endif 1306 if (!VM_Version::supports_cx8()) { 1307 return false; 1308 } 1309 break; 1310 case Op_CMoveVF: 1311 case Op_CMoveVD: 1312 if (UseAVX < 1 || UseAVX > 2) { 1313 return false; 1314 } 1315 break; 1316 case Op_StrIndexOf: 1317 if (!UseSSE42Intrinsics) { 1318 return false; 1319 } 1320 break; 1321 case Op_StrIndexOfChar: 1322 if (!UseSSE42Intrinsics) { 1323 return false; 1324 } 1325 break; 1326 case Op_OnSpinWait: 1327 if (VM_Version::supports_on_spin_wait() == false) { 1328 return false; 1329 } 1330 break; 1331 case Op_MulVB: 1332 case Op_LShiftVB: 1333 case Op_RShiftVB: 1334 case Op_URShiftVB: 1335 if (UseSSE < 4) { 1336 return false; 1337 } 1338 break; 1339 #ifdef _LP64 1340 case Op_MaxD: 1341 case Op_MaxF: 1342 case Op_MinD: 1343 case Op_MinF: 1344 if (UseAVX < 1) { // enabled for AVX only 1345 return false; 1346 } 1347 break; 1348 #endif 1349 case Op_CacheWB: 1350 case Op_CacheWBPreSync: 1351 case Op_CacheWBPostSync: 1352 if (!VM_Version::supports_data_cache_line_flush()) { 1353 return false; 1354 } 1355 break; 1356 case Op_RoundDoubleMode: 1357 if (UseSSE < 4) { 1358 return false; 1359 } 1360 break; 1361 case Op_RoundDoubleModeV: 1362 if (VM_Version::supports_avx() == false) { 1363 return false; // 128bit vroundpd is not available 1364 } 1365 break; 1366 #ifndef _LP64 1367 case Op_AddReductionVF: 1368 case Op_AddReductionVD: 1369 case Op_MulReductionVF: 1370 case Op_MulReductionVD: 1371 if (UseSSE < 1) { // requires at least SSE 1372 return false; 1373 } 1374 break; 1375 case Op_MulAddVS2VI: 1376 case Op_RShiftVL: 1377 case Op_AbsVD: 1378 case Op_NegVD: 1379 if (UseSSE < 2) { 1380 return false; 1381 } 1382 break; 1383 #endif // !LP64 1384 } 1385 return true; // Match rules are supported by default. 1386 } 1387 1388 //------------------------------------------------------------------------ 1389 1390 // Identify extra cases that we might want to provide match rules for vector nodes and 1391 // other intrinsics guarded with vector length (vlen) and element type (bt). 1392 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1393 if (!match_rule_supported(opcode)) { 1394 return false; 1395 } 1396 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1397 // * SSE2 supports 128bit vectors for all types; 1398 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1399 // * AVX2 supports 256bit vectors for all types; 1400 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1401 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1402 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1403 // And MaxVectorSize is taken into account as well. 1404 if (!vector_size_supported(bt, vlen)) { 1405 return false; 1406 } 1407 // Special cases which require vector length follow: 1408 // * implementation limitations 1409 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1410 // * 128bit vroundpd instruction is present only in AVX1 1411 switch (opcode) { 1412 case Op_AbsVF: 1413 case Op_NegVF: 1414 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1415 return false; // 512bit vandps and vxorps are not available 1416 } 1417 break; 1418 case Op_AbsVD: 1419 case Op_NegVD: 1420 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1421 return false; // 512bit vandpd and vxorpd are not available 1422 } 1423 break; 1424 case Op_CMoveVF: 1425 if (vlen != 8) { 1426 return false; // implementation limitation (only vcmov8F_reg is present) 1427 } 1428 break; 1429 case Op_CMoveVD: 1430 if (vlen != 4) { 1431 return false; // implementation limitation (only vcmov4D_reg is present) 1432 } 1433 break; 1434 } 1435 return true; // Per default match rules are supported. 1436 } 1437 1438 // x86 supports generic vector operands: vec and legVec. 1439 const bool Matcher::supports_generic_vector_operands = true; 1440 1441 MachOper* Matcher::specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 1442 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 1443 bool legacy = (generic_opnd->opcode() == LEGVEC); 1444 if (!VM_Version::supports_avx512vlbwdq() && // KNL 1445 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 1446 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 1447 return new legVecZOper(); 1448 } 1449 if (legacy) { 1450 switch (ideal_reg) { 1451 case Op_VecS: return new legVecSOper(); 1452 case Op_VecD: return new legVecDOper(); 1453 case Op_VecX: return new legVecXOper(); 1454 case Op_VecY: return new legVecYOper(); 1455 case Op_VecZ: return new legVecZOper(); 1456 } 1457 } else { 1458 switch (ideal_reg) { 1459 case Op_VecS: return new vecSOper(); 1460 case Op_VecD: return new vecDOper(); 1461 case Op_VecX: return new vecXOper(); 1462 case Op_VecY: return new vecYOper(); 1463 case Op_VecZ: return new vecZOper(); 1464 } 1465 } 1466 ShouldNotReachHere(); 1467 return NULL; 1468 } 1469 1470 bool Matcher::is_generic_reg2reg_move(MachNode* m) { 1471 switch (m->rule()) { 1472 case MoveVec2Leg_rule: 1473 case MoveLeg2Vec_rule: 1474 return true; 1475 default: 1476 return false; 1477 } 1478 } 1479 1480 bool Matcher::is_generic_vector(MachOper* opnd) { 1481 switch (opnd->opcode()) { 1482 case VEC: 1483 case LEGVEC: 1484 return true; 1485 default: 1486 return false; 1487 } 1488 } 1489 1490 //------------------------------------------------------------------------ 1491 1492 const bool Matcher::has_predicated_vectors(void) { 1493 bool ret_value = false; 1494 if (UseAVX > 2) { 1495 ret_value = VM_Version::supports_avx512vl(); 1496 } 1497 1498 return ret_value; 1499 } 1500 1501 const int Matcher::float_pressure(int default_pressure_threshold) { 1502 int float_pressure_threshold = default_pressure_threshold; 1503 #ifdef _LP64 1504 if (UseAVX > 2) { 1505 // Increase pressure threshold on machines with AVX3 which have 1506 // 2x more XMM registers. 1507 float_pressure_threshold = default_pressure_threshold * 2; 1508 } 1509 #endif 1510 return float_pressure_threshold; 1511 } 1512 1513 // Max vector size in bytes. 0 if not supported. 1514 const int Matcher::vector_width_in_bytes(BasicType bt) { 1515 assert(is_java_primitive(bt), "only primitive type vectors"); 1516 if (UseSSE < 2) return 0; 1517 // SSE2 supports 128bit vectors for all types. 1518 // AVX2 supports 256bit vectors for all types. 1519 // AVX2/EVEX supports 512bit vectors for all types. 1520 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1521 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1522 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1523 size = (UseAVX > 2) ? 64 : 32; 1524 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1525 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1526 // Use flag to limit vector size. 1527 size = MIN2(size,(int)MaxVectorSize); 1528 // Minimum 2 values in vector (or 4 for bytes). 1529 switch (bt) { 1530 case T_DOUBLE: 1531 case T_LONG: 1532 if (size < 16) return 0; 1533 break; 1534 case T_FLOAT: 1535 case T_INT: 1536 if (size < 8) return 0; 1537 break; 1538 case T_BOOLEAN: 1539 if (size < 4) return 0; 1540 break; 1541 case T_CHAR: 1542 if (size < 4) return 0; 1543 break; 1544 case T_BYTE: 1545 if (size < 4) return 0; 1546 break; 1547 case T_SHORT: 1548 if (size < 4) return 0; 1549 break; 1550 default: 1551 ShouldNotReachHere(); 1552 } 1553 return size; 1554 } 1555 1556 // Limits on vector size (number of elements) loaded into vector. 1557 const int Matcher::max_vector_size(const BasicType bt) { 1558 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1559 } 1560 const int Matcher::min_vector_size(const BasicType bt) { 1561 int max_size = max_vector_size(bt); 1562 // Min size which can be loaded into vector is 4 bytes. 1563 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1564 return MIN2(size,max_size); 1565 } 1566 1567 // Vector ideal reg corresponding to specified size in bytes 1568 const uint Matcher::vector_ideal_reg(int size) { 1569 assert(MaxVectorSize >= size, ""); 1570 switch(size) { 1571 case 4: return Op_VecS; 1572 case 8: return Op_VecD; 1573 case 16: return Op_VecX; 1574 case 32: return Op_VecY; 1575 case 64: return Op_VecZ; 1576 } 1577 ShouldNotReachHere(); 1578 return 0; 1579 } 1580 1581 // Only lowest bits of xmm reg are used for vector shift count. 1582 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1583 return Op_VecS; 1584 } 1585 1586 // x86 supports misaligned vectors store/load. 1587 const bool Matcher::misaligned_vectors_ok() { 1588 return true; 1589 } 1590 1591 // x86 AES instructions are compatible with SunJCE expanded 1592 // keys, hence we do not need to pass the original key to stubs 1593 const bool Matcher::pass_original_key_for_aes() { 1594 return false; 1595 } 1596 1597 1598 const bool Matcher::convi2l_type_required = true; 1599 1600 // Check for shift by small constant as well 1601 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1602 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1603 shift->in(2)->get_int() <= 3 && 1604 // Are there other uses besides address expressions? 1605 !matcher->is_visited(shift)) { 1606 address_visited.set(shift->_idx); // Flag as address_visited 1607 mstack.push(shift->in(2), Matcher::Visit); 1608 Node *conv = shift->in(1); 1609 #ifdef _LP64 1610 // Allow Matcher to match the rule which bypass 1611 // ConvI2L operation for an array index on LP64 1612 // if the index value is positive. 1613 if (conv->Opcode() == Op_ConvI2L && 1614 conv->as_Type()->type()->is_long()->_lo >= 0 && 1615 // Are there other uses besides address expressions? 1616 !matcher->is_visited(conv)) { 1617 address_visited.set(conv->_idx); // Flag as address_visited 1618 mstack.push(conv->in(1), Matcher::Pre_Visit); 1619 } else 1620 #endif 1621 mstack.push(conv, Matcher::Pre_Visit); 1622 return true; 1623 } 1624 return false; 1625 } 1626 1627 // Should the Matcher clone shifts on addressing modes, expecting them 1628 // to be subsumed into complex addressing expressions or compute them 1629 // into registers? 1630 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1631 Node *off = m->in(AddPNode::Offset); 1632 if (off->is_Con()) { 1633 address_visited.test_set(m->_idx); // Flag as address_visited 1634 Node *adr = m->in(AddPNode::Address); 1635 1636 // Intel can handle 2 adds in addressing mode 1637 // AtomicAdd is not an addressing expression. 1638 // Cheap to find it by looking for screwy base. 1639 if (adr->is_AddP() && 1640 !adr->in(AddPNode::Base)->is_top() && 1641 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1642 // Are there other uses besides address expressions? 1643 !is_visited(adr)) { 1644 address_visited.set(adr->_idx); // Flag as address_visited 1645 Node *shift = adr->in(AddPNode::Offset); 1646 if (!clone_shift(shift, this, mstack, address_visited)) { 1647 mstack.push(shift, Pre_Visit); 1648 } 1649 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1650 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1651 } else { 1652 mstack.push(adr, Pre_Visit); 1653 } 1654 1655 // Clone X+offset as it also folds into most addressing expressions 1656 mstack.push(off, Visit); 1657 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1658 return true; 1659 } else if (clone_shift(off, this, mstack, address_visited)) { 1660 address_visited.test_set(m->_idx); // Flag as address_visited 1661 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1662 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1663 return true; 1664 } 1665 return false; 1666 } 1667 1668 void Compile::reshape_address(AddPNode* addp) { 1669 } 1670 1671 static inline uint vector_length(const MachNode* n) { 1672 const TypeVect* vt = n->bottom_type()->is_vect(); 1673 return vt->length(); 1674 } 1675 1676 static inline uint vector_length(const MachNode* use, MachOper* opnd) { 1677 uint def_idx = use->operand_index(opnd); 1678 Node* def = use->in(def_idx); 1679 return def->bottom_type()->is_vect()->length(); 1680 } 1681 1682 static inline uint vector_length_in_bytes(const MachNode* n) { 1683 const TypeVect* vt = n->bottom_type()->is_vect(); 1684 return vt->length_in_bytes(); 1685 } 1686 1687 static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { 1688 uint def_idx = use->operand_index(opnd); 1689 Node* def = use->in(def_idx); 1690 return def->bottom_type()->is_vect()->length_in_bytes(); 1691 } 1692 1693 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) { 1694 switch(vector_length_in_bytes(n)) { 1695 case 4: // fall-through 1696 case 8: // fall-through 1697 case 16: return Assembler::AVX_128bit; 1698 case 32: return Assembler::AVX_256bit; 1699 case 64: return Assembler::AVX_512bit; 1700 1701 default: { 1702 ShouldNotReachHere(); 1703 return Assembler::AVX_NoVec; 1704 } 1705 } 1706 } 1707 1708 // Helper methods for MachSpillCopyNode::implementation(). 1709 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1710 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1711 // In 64-bit VM size calculation is very complex. Emitting instructions 1712 // into scratch buffer is used to get size in 64-bit VM. 1713 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1714 assert(ireg == Op_VecS || // 32bit vector 1715 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1716 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1717 "no non-adjacent vector moves" ); 1718 if (cbuf) { 1719 C2_MacroAssembler _masm(cbuf); 1720 int offset = __ offset(); 1721 switch (ireg) { 1722 case Op_VecS: // copy whole register 1723 case Op_VecD: 1724 case Op_VecX: 1725 #ifndef _LP64 1726 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1727 #else 1728 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1729 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1730 } else { 1731 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1732 } 1733 #endif 1734 break; 1735 case Op_VecY: 1736 #ifndef _LP64 1737 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1738 #else 1739 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1740 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1741 } else { 1742 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1743 } 1744 #endif 1745 break; 1746 case Op_VecZ: 1747 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1748 break; 1749 default: 1750 ShouldNotReachHere(); 1751 } 1752 int size = __ offset() - offset; 1753 #ifdef ASSERT 1754 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1755 assert(!do_size || size == 4, "incorrect size calculattion"); 1756 #endif 1757 return size; 1758 #ifndef PRODUCT 1759 } else if (!do_size) { 1760 switch (ireg) { 1761 case Op_VecS: 1762 case Op_VecD: 1763 case Op_VecX: 1764 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1765 break; 1766 case Op_VecY: 1767 case Op_VecZ: 1768 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1769 break; 1770 default: 1771 ShouldNotReachHere(); 1772 } 1773 #endif 1774 } 1775 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1776 return (UseAVX > 2) ? 6 : 4; 1777 } 1778 1779 int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1780 int stack_offset, int reg, uint ireg, outputStream* st) { 1781 // In 64-bit VM size calculation is very complex. Emitting instructions 1782 // into scratch buffer is used to get size in 64-bit VM. 1783 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1784 if (cbuf) { 1785 C2_MacroAssembler _masm(cbuf); 1786 int offset = __ offset(); 1787 if (is_load) { 1788 switch (ireg) { 1789 case Op_VecS: 1790 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1791 break; 1792 case Op_VecD: 1793 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1794 break; 1795 case Op_VecX: 1796 #ifndef _LP64 1797 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1798 #else 1799 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1800 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1801 } else { 1802 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1803 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1804 } 1805 #endif 1806 break; 1807 case Op_VecY: 1808 #ifndef _LP64 1809 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1810 #else 1811 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1812 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1813 } else { 1814 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1815 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1816 } 1817 #endif 1818 break; 1819 case Op_VecZ: 1820 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1821 break; 1822 default: 1823 ShouldNotReachHere(); 1824 } 1825 } else { // store 1826 switch (ireg) { 1827 case Op_VecS: 1828 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1829 break; 1830 case Op_VecD: 1831 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1832 break; 1833 case Op_VecX: 1834 #ifndef _LP64 1835 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1836 #else 1837 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1838 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1839 } 1840 else { 1841 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1842 } 1843 #endif 1844 break; 1845 case Op_VecY: 1846 #ifndef _LP64 1847 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1848 #else 1849 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1850 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1851 } 1852 else { 1853 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1854 } 1855 #endif 1856 break; 1857 case Op_VecZ: 1858 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1859 break; 1860 default: 1861 ShouldNotReachHere(); 1862 } 1863 } 1864 int size = __ offset() - offset; 1865 #ifdef ASSERT 1866 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1867 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1868 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1869 #endif 1870 return size; 1871 #ifndef PRODUCT 1872 } else if (!do_size) { 1873 if (is_load) { 1874 switch (ireg) { 1875 case Op_VecS: 1876 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1877 break; 1878 case Op_VecD: 1879 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1880 break; 1881 case Op_VecX: 1882 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1883 break; 1884 case Op_VecY: 1885 case Op_VecZ: 1886 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1887 break; 1888 default: 1889 ShouldNotReachHere(); 1890 } 1891 } else { // store 1892 switch (ireg) { 1893 case Op_VecS: 1894 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1895 break; 1896 case Op_VecD: 1897 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1898 break; 1899 case Op_VecX: 1900 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1901 break; 1902 case Op_VecY: 1903 case Op_VecZ: 1904 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1905 break; 1906 default: 1907 ShouldNotReachHere(); 1908 } 1909 } 1910 #endif 1911 } 1912 bool is_single_byte = false; 1913 int vec_len = 0; 1914 if ((UseAVX > 2) && (stack_offset != 0)) { 1915 int tuple_type = Assembler::EVEX_FVM; 1916 int input_size = Assembler::EVEX_32bit; 1917 switch (ireg) { 1918 case Op_VecS: 1919 tuple_type = Assembler::EVEX_T1S; 1920 break; 1921 case Op_VecD: 1922 tuple_type = Assembler::EVEX_T1S; 1923 input_size = Assembler::EVEX_64bit; 1924 break; 1925 case Op_VecX: 1926 break; 1927 case Op_VecY: 1928 vec_len = 1; 1929 break; 1930 case Op_VecZ: 1931 vec_len = 2; 1932 break; 1933 } 1934 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1935 } 1936 int offset_size = 0; 1937 int size = 5; 1938 if (UseAVX > 2 ) { 1939 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1940 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1941 size += 2; // Need an additional two bytes for EVEX encoding 1942 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1943 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1944 } else { 1945 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1946 size += 2; // Need an additional two bytes for EVEX encodding 1947 } 1948 } else { 1949 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1950 } 1951 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1952 return size+offset_size; 1953 } 1954 1955 static inline jint replicate4_imm(int con, int width) { 1956 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1957 assert(width == 1 || width == 2, "only byte or short types here"); 1958 int bit_width = width * 8; 1959 jint val = con; 1960 val &= (1 << bit_width) - 1; // mask off sign bits 1961 while(bit_width < 32) { 1962 val |= (val << bit_width); 1963 bit_width <<= 1; 1964 } 1965 return val; 1966 } 1967 1968 static inline jlong replicate8_imm(int con, int width) { 1969 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1970 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1971 int bit_width = width * 8; 1972 jlong val = con; 1973 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1974 while(bit_width < 64) { 1975 val |= (val << bit_width); 1976 bit_width <<= 1; 1977 } 1978 return val; 1979 } 1980 1981 #ifndef PRODUCT 1982 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1983 st->print("nop \t# %d bytes pad for loops and calls", _count); 1984 } 1985 #endif 1986 1987 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1988 C2_MacroAssembler _masm(&cbuf); 1989 __ nop(_count); 1990 } 1991 1992 uint MachNopNode::size(PhaseRegAlloc*) const { 1993 return _count; 1994 } 1995 1996 #ifndef PRODUCT 1997 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1998 st->print("# breakpoint"); 1999 } 2000 #endif 2001 2002 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2003 C2_MacroAssembler _masm(&cbuf); 2004 __ int3(); 2005 } 2006 2007 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2008 return MachNode::size(ra_); 2009 } 2010 2011 %} 2012 2013 encode %{ 2014 2015 enc_class call_epilog %{ 2016 if (VerifyStackAtCalls) { 2017 // Check that stack depth is unchanged: find majik cookie on stack 2018 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2019 C2_MacroAssembler _masm(&cbuf); 2020 Label L; 2021 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2022 __ jccb(Assembler::equal, L); 2023 // Die if stack mismatch 2024 __ int3(); 2025 __ bind(L); 2026 } 2027 %} 2028 2029 %} 2030 2031 2032 //----------OPERANDS----------------------------------------------------------- 2033 // Operand definitions must precede instruction definitions for correct parsing 2034 // in the ADLC because operands constitute user defined types which are used in 2035 // instruction definitions. 2036 2037 // Vectors 2038 2039 // Dummy generic vector class. Should be used for all vector operands. 2040 // Replaced with vec[SDXYZ] during post-selection pass. 2041 operand vec() %{ 2042 constraint(ALLOC_IN_RC(dynamic)); 2043 match(VecX); 2044 match(VecY); 2045 match(VecZ); 2046 match(VecS); 2047 match(VecD); 2048 2049 format %{ %} 2050 interface(REG_INTER); 2051 %} 2052 2053 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2054 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2055 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2056 // runtime code generation via reg_class_dynamic. 2057 operand legVec() %{ 2058 constraint(ALLOC_IN_RC(dynamic)); 2059 match(VecX); 2060 match(VecY); 2061 match(VecZ); 2062 match(VecS); 2063 match(VecD); 2064 2065 format %{ %} 2066 interface(REG_INTER); 2067 %} 2068 2069 // Replaces vec during post-selection cleanup. See above. 2070 operand vecS() %{ 2071 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2072 match(VecS); 2073 2074 format %{ %} 2075 interface(REG_INTER); 2076 %} 2077 2078 // Replaces legVec during post-selection cleanup. See above. 2079 operand legVecS() %{ 2080 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2081 match(VecS); 2082 2083 format %{ %} 2084 interface(REG_INTER); 2085 %} 2086 2087 // Replaces vec during post-selection cleanup. See above. 2088 operand vecD() %{ 2089 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2090 match(VecD); 2091 2092 format %{ %} 2093 interface(REG_INTER); 2094 %} 2095 2096 // Replaces legVec during post-selection cleanup. See above. 2097 operand legVecD() %{ 2098 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2099 match(VecD); 2100 2101 format %{ %} 2102 interface(REG_INTER); 2103 %} 2104 2105 // Replaces vec during post-selection cleanup. See above. 2106 operand vecX() %{ 2107 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2108 match(VecX); 2109 2110 format %{ %} 2111 interface(REG_INTER); 2112 %} 2113 2114 // Replaces legVec during post-selection cleanup. See above. 2115 operand legVecX() %{ 2116 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2117 match(VecX); 2118 2119 format %{ %} 2120 interface(REG_INTER); 2121 %} 2122 2123 // Replaces vec during post-selection cleanup. See above. 2124 operand vecY() %{ 2125 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2126 match(VecY); 2127 2128 format %{ %} 2129 interface(REG_INTER); 2130 %} 2131 2132 // Replaces legVec during post-selection cleanup. See above. 2133 operand legVecY() %{ 2134 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2135 match(VecY); 2136 2137 format %{ %} 2138 interface(REG_INTER); 2139 %} 2140 2141 // Replaces vec during post-selection cleanup. See above. 2142 operand vecZ() %{ 2143 constraint(ALLOC_IN_RC(vectorz_reg)); 2144 match(VecZ); 2145 2146 format %{ %} 2147 interface(REG_INTER); 2148 %} 2149 2150 // Replaces legVec during post-selection cleanup. See above. 2151 operand legVecZ() %{ 2152 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2153 match(VecZ); 2154 2155 format %{ %} 2156 interface(REG_INTER); 2157 %} 2158 2159 // Comparison Code for FP conditional move 2160 operand cmpOp_vcmppd() %{ 2161 match(Bool); 2162 2163 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2164 n->as_Bool()->_test._test != BoolTest::no_overflow); 2165 format %{ "" %} 2166 interface(COND_INTER) %{ 2167 equal (0x0, "eq"); 2168 less (0x1, "lt"); 2169 less_equal (0x2, "le"); 2170 not_equal (0xC, "ne"); 2171 greater_equal(0xD, "ge"); 2172 greater (0xE, "gt"); 2173 //TODO cannot compile (adlc breaks) without two next lines with error: 2174 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2175 // equal' for overflow. 2176 overflow (0x20, "o"); // not really supported by the instruction 2177 no_overflow (0x21, "no"); // not really supported by the instruction 2178 %} 2179 %} 2180 2181 2182 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2183 2184 // ============================================================================ 2185 2186 instruct ShouldNotReachHere() %{ 2187 match(Halt); 2188 format %{ "ud2\t# ShouldNotReachHere" %} 2189 ins_encode %{ 2190 __ stop(_halt_reason); 2191 %} 2192 ins_pipe(pipe_slow); 2193 %} 2194 2195 // =================================EVEX special=============================== 2196 2197 instruct setMask(rRegI dst, rRegI src) %{ 2198 predicate(Matcher::has_predicated_vectors()); 2199 match(Set dst (SetVectMaskI src)); 2200 effect(TEMP dst); 2201 format %{ "setvectmask $dst, $src" %} 2202 ins_encode %{ 2203 __ setvectmask($dst$$Register, $src$$Register); 2204 %} 2205 ins_pipe(pipe_slow); 2206 %} 2207 2208 // ============================================================================ 2209 2210 instruct addF_reg(regF dst, regF src) %{ 2211 predicate((UseSSE>=1) && (UseAVX == 0)); 2212 match(Set dst (AddF dst src)); 2213 2214 format %{ "addss $dst, $src" %} 2215 ins_cost(150); 2216 ins_encode %{ 2217 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2218 %} 2219 ins_pipe(pipe_slow); 2220 %} 2221 2222 instruct addF_mem(regF dst, memory src) %{ 2223 predicate((UseSSE>=1) && (UseAVX == 0)); 2224 match(Set dst (AddF dst (LoadF src))); 2225 2226 format %{ "addss $dst, $src" %} 2227 ins_cost(150); 2228 ins_encode %{ 2229 __ addss($dst$$XMMRegister, $src$$Address); 2230 %} 2231 ins_pipe(pipe_slow); 2232 %} 2233 2234 instruct addF_imm(regF dst, immF con) %{ 2235 predicate((UseSSE>=1) && (UseAVX == 0)); 2236 match(Set dst (AddF dst con)); 2237 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2238 ins_cost(150); 2239 ins_encode %{ 2240 __ addss($dst$$XMMRegister, $constantaddress($con)); 2241 %} 2242 ins_pipe(pipe_slow); 2243 %} 2244 2245 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2246 predicate(UseAVX > 0); 2247 match(Set dst (AddF src1 src2)); 2248 2249 format %{ "vaddss $dst, $src1, $src2" %} 2250 ins_cost(150); 2251 ins_encode %{ 2252 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2253 %} 2254 ins_pipe(pipe_slow); 2255 %} 2256 2257 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2258 predicate(UseAVX > 0); 2259 match(Set dst (AddF src1 (LoadF src2))); 2260 2261 format %{ "vaddss $dst, $src1, $src2" %} 2262 ins_cost(150); 2263 ins_encode %{ 2264 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2265 %} 2266 ins_pipe(pipe_slow); 2267 %} 2268 2269 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2270 predicate(UseAVX > 0); 2271 match(Set dst (AddF src con)); 2272 2273 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2274 ins_cost(150); 2275 ins_encode %{ 2276 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2277 %} 2278 ins_pipe(pipe_slow); 2279 %} 2280 2281 instruct addD_reg(regD dst, regD src) %{ 2282 predicate((UseSSE>=2) && (UseAVX == 0)); 2283 match(Set dst (AddD dst src)); 2284 2285 format %{ "addsd $dst, $src" %} 2286 ins_cost(150); 2287 ins_encode %{ 2288 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2289 %} 2290 ins_pipe(pipe_slow); 2291 %} 2292 2293 instruct addD_mem(regD dst, memory src) %{ 2294 predicate((UseSSE>=2) && (UseAVX == 0)); 2295 match(Set dst (AddD dst (LoadD src))); 2296 2297 format %{ "addsd $dst, $src" %} 2298 ins_cost(150); 2299 ins_encode %{ 2300 __ addsd($dst$$XMMRegister, $src$$Address); 2301 %} 2302 ins_pipe(pipe_slow); 2303 %} 2304 2305 instruct addD_imm(regD dst, immD con) %{ 2306 predicate((UseSSE>=2) && (UseAVX == 0)); 2307 match(Set dst (AddD dst con)); 2308 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2309 ins_cost(150); 2310 ins_encode %{ 2311 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2312 %} 2313 ins_pipe(pipe_slow); 2314 %} 2315 2316 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2317 predicate(UseAVX > 0); 2318 match(Set dst (AddD src1 src2)); 2319 2320 format %{ "vaddsd $dst, $src1, $src2" %} 2321 ins_cost(150); 2322 ins_encode %{ 2323 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2324 %} 2325 ins_pipe(pipe_slow); 2326 %} 2327 2328 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2329 predicate(UseAVX > 0); 2330 match(Set dst (AddD src1 (LoadD src2))); 2331 2332 format %{ "vaddsd $dst, $src1, $src2" %} 2333 ins_cost(150); 2334 ins_encode %{ 2335 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2336 %} 2337 ins_pipe(pipe_slow); 2338 %} 2339 2340 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2341 predicate(UseAVX > 0); 2342 match(Set dst (AddD src con)); 2343 2344 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2345 ins_cost(150); 2346 ins_encode %{ 2347 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2348 %} 2349 ins_pipe(pipe_slow); 2350 %} 2351 2352 instruct subF_reg(regF dst, regF src) %{ 2353 predicate((UseSSE>=1) && (UseAVX == 0)); 2354 match(Set dst (SubF dst src)); 2355 2356 format %{ "subss $dst, $src" %} 2357 ins_cost(150); 2358 ins_encode %{ 2359 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2360 %} 2361 ins_pipe(pipe_slow); 2362 %} 2363 2364 instruct subF_mem(regF dst, memory src) %{ 2365 predicate((UseSSE>=1) && (UseAVX == 0)); 2366 match(Set dst (SubF dst (LoadF src))); 2367 2368 format %{ "subss $dst, $src" %} 2369 ins_cost(150); 2370 ins_encode %{ 2371 __ subss($dst$$XMMRegister, $src$$Address); 2372 %} 2373 ins_pipe(pipe_slow); 2374 %} 2375 2376 instruct subF_imm(regF dst, immF con) %{ 2377 predicate((UseSSE>=1) && (UseAVX == 0)); 2378 match(Set dst (SubF dst con)); 2379 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2380 ins_cost(150); 2381 ins_encode %{ 2382 __ subss($dst$$XMMRegister, $constantaddress($con)); 2383 %} 2384 ins_pipe(pipe_slow); 2385 %} 2386 2387 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2388 predicate(UseAVX > 0); 2389 match(Set dst (SubF src1 src2)); 2390 2391 format %{ "vsubss $dst, $src1, $src2" %} 2392 ins_cost(150); 2393 ins_encode %{ 2394 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2395 %} 2396 ins_pipe(pipe_slow); 2397 %} 2398 2399 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2400 predicate(UseAVX > 0); 2401 match(Set dst (SubF src1 (LoadF src2))); 2402 2403 format %{ "vsubss $dst, $src1, $src2" %} 2404 ins_cost(150); 2405 ins_encode %{ 2406 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2407 %} 2408 ins_pipe(pipe_slow); 2409 %} 2410 2411 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2412 predicate(UseAVX > 0); 2413 match(Set dst (SubF src con)); 2414 2415 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2416 ins_cost(150); 2417 ins_encode %{ 2418 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2419 %} 2420 ins_pipe(pipe_slow); 2421 %} 2422 2423 instruct subD_reg(regD dst, regD src) %{ 2424 predicate((UseSSE>=2) && (UseAVX == 0)); 2425 match(Set dst (SubD dst src)); 2426 2427 format %{ "subsd $dst, $src" %} 2428 ins_cost(150); 2429 ins_encode %{ 2430 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2431 %} 2432 ins_pipe(pipe_slow); 2433 %} 2434 2435 instruct subD_mem(regD dst, memory src) %{ 2436 predicate((UseSSE>=2) && (UseAVX == 0)); 2437 match(Set dst (SubD dst (LoadD src))); 2438 2439 format %{ "subsd $dst, $src" %} 2440 ins_cost(150); 2441 ins_encode %{ 2442 __ subsd($dst$$XMMRegister, $src$$Address); 2443 %} 2444 ins_pipe(pipe_slow); 2445 %} 2446 2447 instruct subD_imm(regD dst, immD con) %{ 2448 predicate((UseSSE>=2) && (UseAVX == 0)); 2449 match(Set dst (SubD dst con)); 2450 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2451 ins_cost(150); 2452 ins_encode %{ 2453 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2454 %} 2455 ins_pipe(pipe_slow); 2456 %} 2457 2458 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2459 predicate(UseAVX > 0); 2460 match(Set dst (SubD src1 src2)); 2461 2462 format %{ "vsubsd $dst, $src1, $src2" %} 2463 ins_cost(150); 2464 ins_encode %{ 2465 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2466 %} 2467 ins_pipe(pipe_slow); 2468 %} 2469 2470 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2471 predicate(UseAVX > 0); 2472 match(Set dst (SubD src1 (LoadD src2))); 2473 2474 format %{ "vsubsd $dst, $src1, $src2" %} 2475 ins_cost(150); 2476 ins_encode %{ 2477 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2478 %} 2479 ins_pipe(pipe_slow); 2480 %} 2481 2482 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2483 predicate(UseAVX > 0); 2484 match(Set dst (SubD src con)); 2485 2486 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2487 ins_cost(150); 2488 ins_encode %{ 2489 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2490 %} 2491 ins_pipe(pipe_slow); 2492 %} 2493 2494 instruct mulF_reg(regF dst, regF src) %{ 2495 predicate((UseSSE>=1) && (UseAVX == 0)); 2496 match(Set dst (MulF dst src)); 2497 2498 format %{ "mulss $dst, $src" %} 2499 ins_cost(150); 2500 ins_encode %{ 2501 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2502 %} 2503 ins_pipe(pipe_slow); 2504 %} 2505 2506 instruct mulF_mem(regF dst, memory src) %{ 2507 predicate((UseSSE>=1) && (UseAVX == 0)); 2508 match(Set dst (MulF dst (LoadF src))); 2509 2510 format %{ "mulss $dst, $src" %} 2511 ins_cost(150); 2512 ins_encode %{ 2513 __ mulss($dst$$XMMRegister, $src$$Address); 2514 %} 2515 ins_pipe(pipe_slow); 2516 %} 2517 2518 instruct mulF_imm(regF dst, immF con) %{ 2519 predicate((UseSSE>=1) && (UseAVX == 0)); 2520 match(Set dst (MulF dst con)); 2521 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2522 ins_cost(150); 2523 ins_encode %{ 2524 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2525 %} 2526 ins_pipe(pipe_slow); 2527 %} 2528 2529 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2530 predicate(UseAVX > 0); 2531 match(Set dst (MulF src1 src2)); 2532 2533 format %{ "vmulss $dst, $src1, $src2" %} 2534 ins_cost(150); 2535 ins_encode %{ 2536 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2537 %} 2538 ins_pipe(pipe_slow); 2539 %} 2540 2541 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2542 predicate(UseAVX > 0); 2543 match(Set dst (MulF src1 (LoadF src2))); 2544 2545 format %{ "vmulss $dst, $src1, $src2" %} 2546 ins_cost(150); 2547 ins_encode %{ 2548 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2549 %} 2550 ins_pipe(pipe_slow); 2551 %} 2552 2553 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2554 predicate(UseAVX > 0); 2555 match(Set dst (MulF src con)); 2556 2557 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2558 ins_cost(150); 2559 ins_encode %{ 2560 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2561 %} 2562 ins_pipe(pipe_slow); 2563 %} 2564 2565 instruct mulD_reg(regD dst, regD src) %{ 2566 predicate((UseSSE>=2) && (UseAVX == 0)); 2567 match(Set dst (MulD dst src)); 2568 2569 format %{ "mulsd $dst, $src" %} 2570 ins_cost(150); 2571 ins_encode %{ 2572 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2573 %} 2574 ins_pipe(pipe_slow); 2575 %} 2576 2577 instruct mulD_mem(regD dst, memory src) %{ 2578 predicate((UseSSE>=2) && (UseAVX == 0)); 2579 match(Set dst (MulD dst (LoadD src))); 2580 2581 format %{ "mulsd $dst, $src" %} 2582 ins_cost(150); 2583 ins_encode %{ 2584 __ mulsd($dst$$XMMRegister, $src$$Address); 2585 %} 2586 ins_pipe(pipe_slow); 2587 %} 2588 2589 instruct mulD_imm(regD dst, immD con) %{ 2590 predicate((UseSSE>=2) && (UseAVX == 0)); 2591 match(Set dst (MulD dst con)); 2592 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2593 ins_cost(150); 2594 ins_encode %{ 2595 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2596 %} 2597 ins_pipe(pipe_slow); 2598 %} 2599 2600 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2601 predicate(UseAVX > 0); 2602 match(Set dst (MulD src1 src2)); 2603 2604 format %{ "vmulsd $dst, $src1, $src2" %} 2605 ins_cost(150); 2606 ins_encode %{ 2607 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2608 %} 2609 ins_pipe(pipe_slow); 2610 %} 2611 2612 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2613 predicate(UseAVX > 0); 2614 match(Set dst (MulD src1 (LoadD src2))); 2615 2616 format %{ "vmulsd $dst, $src1, $src2" %} 2617 ins_cost(150); 2618 ins_encode %{ 2619 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2620 %} 2621 ins_pipe(pipe_slow); 2622 %} 2623 2624 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2625 predicate(UseAVX > 0); 2626 match(Set dst (MulD src con)); 2627 2628 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2629 ins_cost(150); 2630 ins_encode %{ 2631 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2632 %} 2633 ins_pipe(pipe_slow); 2634 %} 2635 2636 instruct divF_reg(regF dst, regF src) %{ 2637 predicate((UseSSE>=1) && (UseAVX == 0)); 2638 match(Set dst (DivF dst src)); 2639 2640 format %{ "divss $dst, $src" %} 2641 ins_cost(150); 2642 ins_encode %{ 2643 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2644 %} 2645 ins_pipe(pipe_slow); 2646 %} 2647 2648 instruct divF_mem(regF dst, memory src) %{ 2649 predicate((UseSSE>=1) && (UseAVX == 0)); 2650 match(Set dst (DivF dst (LoadF src))); 2651 2652 format %{ "divss $dst, $src" %} 2653 ins_cost(150); 2654 ins_encode %{ 2655 __ divss($dst$$XMMRegister, $src$$Address); 2656 %} 2657 ins_pipe(pipe_slow); 2658 %} 2659 2660 instruct divF_imm(regF dst, immF con) %{ 2661 predicate((UseSSE>=1) && (UseAVX == 0)); 2662 match(Set dst (DivF dst con)); 2663 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2664 ins_cost(150); 2665 ins_encode %{ 2666 __ divss($dst$$XMMRegister, $constantaddress($con)); 2667 %} 2668 ins_pipe(pipe_slow); 2669 %} 2670 2671 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2672 predicate(UseAVX > 0); 2673 match(Set dst (DivF src1 src2)); 2674 2675 format %{ "vdivss $dst, $src1, $src2" %} 2676 ins_cost(150); 2677 ins_encode %{ 2678 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2679 %} 2680 ins_pipe(pipe_slow); 2681 %} 2682 2683 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2684 predicate(UseAVX > 0); 2685 match(Set dst (DivF src1 (LoadF src2))); 2686 2687 format %{ "vdivss $dst, $src1, $src2" %} 2688 ins_cost(150); 2689 ins_encode %{ 2690 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2691 %} 2692 ins_pipe(pipe_slow); 2693 %} 2694 2695 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2696 predicate(UseAVX > 0); 2697 match(Set dst (DivF src con)); 2698 2699 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2700 ins_cost(150); 2701 ins_encode %{ 2702 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2703 %} 2704 ins_pipe(pipe_slow); 2705 %} 2706 2707 instruct divD_reg(regD dst, regD src) %{ 2708 predicate((UseSSE>=2) && (UseAVX == 0)); 2709 match(Set dst (DivD dst src)); 2710 2711 format %{ "divsd $dst, $src" %} 2712 ins_cost(150); 2713 ins_encode %{ 2714 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2715 %} 2716 ins_pipe(pipe_slow); 2717 %} 2718 2719 instruct divD_mem(regD dst, memory src) %{ 2720 predicate((UseSSE>=2) && (UseAVX == 0)); 2721 match(Set dst (DivD dst (LoadD src))); 2722 2723 format %{ "divsd $dst, $src" %} 2724 ins_cost(150); 2725 ins_encode %{ 2726 __ divsd($dst$$XMMRegister, $src$$Address); 2727 %} 2728 ins_pipe(pipe_slow); 2729 %} 2730 2731 instruct divD_imm(regD dst, immD con) %{ 2732 predicate((UseSSE>=2) && (UseAVX == 0)); 2733 match(Set dst (DivD dst con)); 2734 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2735 ins_cost(150); 2736 ins_encode %{ 2737 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2743 predicate(UseAVX > 0); 2744 match(Set dst (DivD src1 src2)); 2745 2746 format %{ "vdivsd $dst, $src1, $src2" %} 2747 ins_cost(150); 2748 ins_encode %{ 2749 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2755 predicate(UseAVX > 0); 2756 match(Set dst (DivD src1 (LoadD src2))); 2757 2758 format %{ "vdivsd $dst, $src1, $src2" %} 2759 ins_cost(150); 2760 ins_encode %{ 2761 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2762 %} 2763 ins_pipe(pipe_slow); 2764 %} 2765 2766 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2767 predicate(UseAVX > 0); 2768 match(Set dst (DivD src con)); 2769 2770 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2771 ins_cost(150); 2772 ins_encode %{ 2773 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2774 %} 2775 ins_pipe(pipe_slow); 2776 %} 2777 2778 instruct absF_reg(regF dst) %{ 2779 predicate((UseSSE>=1) && (UseAVX == 0)); 2780 match(Set dst (AbsF dst)); 2781 ins_cost(150); 2782 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2783 ins_encode %{ 2784 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2785 %} 2786 ins_pipe(pipe_slow); 2787 %} 2788 2789 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2790 predicate(UseAVX > 0); 2791 match(Set dst (AbsF src)); 2792 ins_cost(150); 2793 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2794 ins_encode %{ 2795 int vector_len = 0; 2796 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2797 ExternalAddress(float_signmask()), vector_len); 2798 %} 2799 ins_pipe(pipe_slow); 2800 %} 2801 2802 instruct absD_reg(regD dst) %{ 2803 predicate((UseSSE>=2) && (UseAVX == 0)); 2804 match(Set dst (AbsD dst)); 2805 ins_cost(150); 2806 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2807 "# abs double by sign masking" %} 2808 ins_encode %{ 2809 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2810 %} 2811 ins_pipe(pipe_slow); 2812 %} 2813 2814 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2815 predicate(UseAVX > 0); 2816 match(Set dst (AbsD src)); 2817 ins_cost(150); 2818 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2819 "# abs double by sign masking" %} 2820 ins_encode %{ 2821 int vector_len = 0; 2822 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2823 ExternalAddress(double_signmask()), vector_len); 2824 %} 2825 ins_pipe(pipe_slow); 2826 %} 2827 2828 instruct negF_reg(regF dst) %{ 2829 predicate((UseSSE>=1) && (UseAVX == 0)); 2830 match(Set dst (NegF dst)); 2831 ins_cost(150); 2832 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2833 ins_encode %{ 2834 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2835 %} 2836 ins_pipe(pipe_slow); 2837 %} 2838 2839 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2840 predicate(UseAVX > 0); 2841 match(Set dst (NegF src)); 2842 ins_cost(150); 2843 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2844 ins_encode %{ 2845 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2846 ExternalAddress(float_signflip())); 2847 %} 2848 ins_pipe(pipe_slow); 2849 %} 2850 2851 instruct negD_reg(regD dst) %{ 2852 predicate((UseSSE>=2) && (UseAVX == 0)); 2853 match(Set dst (NegD dst)); 2854 ins_cost(150); 2855 format %{ "xorpd $dst, [0x8000000000000000]\t" 2856 "# neg double by sign flipping" %} 2857 ins_encode %{ 2858 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2859 %} 2860 ins_pipe(pipe_slow); 2861 %} 2862 2863 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2864 predicate(UseAVX > 0); 2865 match(Set dst (NegD src)); 2866 ins_cost(150); 2867 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2868 "# neg double by sign flipping" %} 2869 ins_encode %{ 2870 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2871 ExternalAddress(double_signflip())); 2872 %} 2873 ins_pipe(pipe_slow); 2874 %} 2875 2876 instruct sqrtF_reg(regF dst, regF src) %{ 2877 predicate(UseSSE>=1); 2878 match(Set dst (SqrtF src)); 2879 2880 format %{ "sqrtss $dst, $src" %} 2881 ins_cost(150); 2882 ins_encode %{ 2883 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2884 %} 2885 ins_pipe(pipe_slow); 2886 %} 2887 2888 instruct sqrtF_mem(regF dst, memory src) %{ 2889 predicate(UseSSE>=1); 2890 match(Set dst (SqrtF (LoadF src))); 2891 2892 format %{ "sqrtss $dst, $src" %} 2893 ins_cost(150); 2894 ins_encode %{ 2895 __ sqrtss($dst$$XMMRegister, $src$$Address); 2896 %} 2897 ins_pipe(pipe_slow); 2898 %} 2899 2900 instruct sqrtF_imm(regF dst, immF con) %{ 2901 predicate(UseSSE>=1); 2902 match(Set dst (SqrtF con)); 2903 2904 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2905 ins_cost(150); 2906 ins_encode %{ 2907 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2908 %} 2909 ins_pipe(pipe_slow); 2910 %} 2911 2912 instruct sqrtD_reg(regD dst, regD src) %{ 2913 predicate(UseSSE>=2); 2914 match(Set dst (SqrtD src)); 2915 2916 format %{ "sqrtsd $dst, $src" %} 2917 ins_cost(150); 2918 ins_encode %{ 2919 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2920 %} 2921 ins_pipe(pipe_slow); 2922 %} 2923 2924 instruct sqrtD_mem(regD dst, memory src) %{ 2925 predicate(UseSSE>=2); 2926 match(Set dst (SqrtD (LoadD src))); 2927 2928 format %{ "sqrtsd $dst, $src" %} 2929 ins_cost(150); 2930 ins_encode %{ 2931 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2932 %} 2933 ins_pipe(pipe_slow); 2934 %} 2935 2936 instruct sqrtD_imm(regD dst, immD con) %{ 2937 predicate(UseSSE>=2); 2938 match(Set dst (SqrtD con)); 2939 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2940 ins_cost(150); 2941 ins_encode %{ 2942 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2943 %} 2944 ins_pipe(pipe_slow); 2945 %} 2946 2947 2948 #ifdef _LP64 2949 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 2950 match(Set dst (RoundDoubleMode src rmode)); 2951 format %{ "roundsd $dst,$src" %} 2952 ins_cost(150); 2953 ins_encode %{ 2954 assert(UseSSE >= 4, "required"); 2955 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 2956 %} 2957 ins_pipe(pipe_slow); 2958 %} 2959 2960 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 2961 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 2962 format %{ "roundsd $dst,$src" %} 2963 ins_cost(150); 2964 ins_encode %{ 2965 assert(UseSSE >= 4, "required"); 2966 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 2967 %} 2968 ins_pipe(pipe_slow); 2969 %} 2970 2971 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 2972 match(Set dst (RoundDoubleMode con rmode)); 2973 effect(TEMP scratch_reg); 2974 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 2975 ins_cost(150); 2976 ins_encode %{ 2977 assert(UseSSE >= 4, "required"); 2978 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 2979 %} 2980 ins_pipe(pipe_slow); 2981 %} 2982 2983 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 2984 predicate(n->as_Vector()->length() < 8); 2985 match(Set dst (RoundDoubleModeV src rmode)); 2986 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 2987 ins_encode %{ 2988 assert(UseAVX > 0, "required"); 2989 int vector_len = vector_length_encoding(this); 2990 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len); 2991 %} 2992 ins_pipe( pipe_slow ); 2993 %} 2994 2995 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 2996 predicate(n->as_Vector()->length() == 8); 2997 match(Set dst (RoundDoubleModeV src rmode)); 2998 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 2999 ins_encode %{ 3000 assert(UseAVX > 2, "required"); 3001 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3002 %} 3003 ins_pipe( pipe_slow ); 3004 %} 3005 3006 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3007 predicate(n->as_Vector()->length() < 8); 3008 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3009 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3010 ins_encode %{ 3011 assert(UseAVX > 0, "required"); 3012 int vector_len = vector_length_encoding(this); 3013 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len); 3014 %} 3015 ins_pipe( pipe_slow ); 3016 %} 3017 3018 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3019 predicate(n->as_Vector()->length() == 8); 3020 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3021 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3022 ins_encode %{ 3023 assert(UseAVX > 2, "required"); 3024 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3025 %} 3026 ins_pipe( pipe_slow ); 3027 %} 3028 #endif // _LP64 3029 3030 instruct onspinwait() %{ 3031 match(OnSpinWait); 3032 ins_cost(200); 3033 3034 format %{ 3035 $$template 3036 $$emit$$"pause\t! membar_onspinwait" 3037 %} 3038 ins_encode %{ 3039 __ pause(); 3040 %} 3041 ins_pipe(pipe_slow); 3042 %} 3043 3044 // a * b + c 3045 instruct fmaD_reg(regD a, regD b, regD c) %{ 3046 predicate(UseFMA); 3047 match(Set c (FmaD c (Binary a b))); 3048 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3049 ins_cost(150); 3050 ins_encode %{ 3051 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3052 %} 3053 ins_pipe( pipe_slow ); 3054 %} 3055 3056 // a * b + c 3057 instruct fmaF_reg(regF a, regF b, regF c) %{ 3058 predicate(UseFMA); 3059 match(Set c (FmaF c (Binary a b))); 3060 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3061 ins_cost(150); 3062 ins_encode %{ 3063 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3064 %} 3065 ins_pipe( pipe_slow ); 3066 %} 3067 3068 // ====================VECTOR INSTRUCTIONS===================================== 3069 3070 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3071 instruct MoveVec2Leg(legVec dst, vec src) %{ 3072 match(Set dst src); 3073 format %{ "" %} 3074 ins_encode %{ 3075 ShouldNotReachHere(); 3076 %} 3077 ins_pipe( fpu_reg_reg ); 3078 %} 3079 3080 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3081 match(Set dst src); 3082 format %{ "" %} 3083 ins_encode %{ 3084 ShouldNotReachHere(); 3085 %} 3086 ins_pipe( fpu_reg_reg ); 3087 %} 3088 3089 // ============================================================================ 3090 3091 // Load vectors 3092 instruct loadV(vec dst, memory mem) %{ 3093 match(Set dst (LoadVector mem)); 3094 ins_cost(125); 3095 format %{ "load_vector $dst,$mem" %} 3096 ins_encode %{ 3097 switch (vector_length_in_bytes(this)) { 3098 case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; 3099 case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; 3100 case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; 3101 case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break; 3102 case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break; 3103 default: ShouldNotReachHere(); 3104 } 3105 %} 3106 ins_pipe( pipe_slow ); 3107 %} 3108 3109 // Store vectors generic operand pattern. 3110 instruct storeV(memory mem, vec src) %{ 3111 match(Set mem (StoreVector mem src)); 3112 ins_cost(145); 3113 format %{ "store_vector $mem,$src\n\t" %} 3114 ins_encode %{ 3115 switch (vector_length_in_bytes(this, $src)) { 3116 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3117 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3118 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3119 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3120 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3121 default: ShouldNotReachHere(); 3122 } 3123 %} 3124 ins_pipe( pipe_slow ); 3125 %} 3126 3127 // ====================REPLICATE======================================= 3128 3129 // Replicate byte scalar to be vector 3130 instruct ReplB_reg(vec dst, rRegI src) %{ 3131 match(Set dst (ReplicateB src)); 3132 format %{ "replicateB $dst,$src" %} 3133 ins_encode %{ 3134 uint vlen = vector_length(this); 3135 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3136 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 3137 int vlen_enc = vector_length_encoding(this); 3138 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 3139 } else { 3140 __ movdl($dst$$XMMRegister, $src$$Register); 3141 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3142 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3143 if (vlen >= 16) { 3144 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3145 if (vlen >= 32) { 3146 assert(vlen == 32, "sanity"); 3147 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3148 } 3149 } 3150 } 3151 %} 3152 ins_pipe( pipe_slow ); 3153 %} 3154 3155 instruct ReplB_mem(vec dst, memory mem) %{ 3156 predicate(VM_Version::supports_avx2()); 3157 match(Set dst (ReplicateB (LoadB mem))); 3158 format %{ "replicateB $dst,$mem" %} 3159 ins_encode %{ 3160 int vector_len = vector_length_encoding(this); 3161 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3162 %} 3163 ins_pipe( pipe_slow ); 3164 %} 3165 3166 instruct ReplB_imm(vec dst, immI con) %{ 3167 match(Set dst (ReplicateB con)); 3168 format %{ "replicateB $dst,$con" %} 3169 ins_encode %{ 3170 uint vlen = vector_length(this); 3171 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1)); 3172 if (vlen == 4) { 3173 __ movdl($dst$$XMMRegister, const_addr); 3174 } else { 3175 __ movq($dst$$XMMRegister, const_addr); 3176 if (vlen >= 16) { 3177 if (VM_Version::supports_avx2()) { 3178 int vlen_enc = vector_length_encoding(this); 3179 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3180 } else { 3181 assert(vlen == 16, "sanity"); 3182 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3183 } 3184 } 3185 } 3186 %} 3187 ins_pipe( pipe_slow ); 3188 %} 3189 3190 // Replicate byte scalar zero to be vector 3191 instruct ReplB_zero(vec dst, immI0 zero) %{ 3192 match(Set dst (ReplicateB zero)); 3193 format %{ "replicateB $dst,$zero" %} 3194 ins_encode %{ 3195 uint vlen = vector_length(this); 3196 if (vlen <= 16) { 3197 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3198 } else { 3199 // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ). 3200 int vlen_enc = vector_length_encoding(this); 3201 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3202 } 3203 %} 3204 ins_pipe( fpu_reg_reg ); 3205 %} 3206 3207 // ====================ReplicateS======================================= 3208 3209 instruct ReplS_reg(vec dst, rRegI src) %{ 3210 match(Set dst (ReplicateS src)); 3211 format %{ "replicateS $dst,$src" %} 3212 ins_encode %{ 3213 uint vlen = vector_length(this); 3214 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3215 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 3216 int vlen_enc = vector_length_encoding(this); 3217 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 3218 } else { 3219 __ movdl($dst$$XMMRegister, $src$$Register); 3220 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3221 if (vlen >= 8) { 3222 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3223 if (vlen >= 16) { 3224 assert(vlen == 16, "sanity"); 3225 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3226 } 3227 } 3228 } 3229 %} 3230 ins_pipe( pipe_slow ); 3231 %} 3232 3233 instruct ReplS_mem(vec dst, memory mem) %{ 3234 predicate(VM_Version::supports_avx2()); 3235 match(Set dst (ReplicateS (LoadS mem))); 3236 format %{ "replicateS $dst,$mem" %} 3237 ins_encode %{ 3238 int vlen_enc = vector_length_encoding(this); 3239 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 3240 %} 3241 ins_pipe( pipe_slow ); 3242 %} 3243 3244 instruct ReplS_imm(vec dst, immI con) %{ 3245 match(Set dst (ReplicateS con)); 3246 format %{ "replicateS $dst,$con" %} 3247 ins_encode %{ 3248 uint vlen = vector_length(this); 3249 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2)); 3250 if (vlen == 2) { 3251 __ movdl($dst$$XMMRegister, const_addr); 3252 } else { 3253 __ movq($dst$$XMMRegister, const_addr); 3254 if (vlen >= 8) { 3255 if (VM_Version::supports_avx2()) { 3256 int vlen_enc = vector_length_encoding(this); 3257 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3258 } else { 3259 assert(vlen == 8, "sanity"); 3260 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3261 } 3262 } 3263 } 3264 %} 3265 ins_pipe( fpu_reg_reg ); 3266 %} 3267 3268 instruct ReplS_zero(vec dst, immI0 zero) %{ 3269 match(Set dst (ReplicateS zero)); 3270 format %{ "replicateS $dst,$zero" %} 3271 ins_encode %{ 3272 uint vlen = vector_length(this); 3273 if (vlen <= 8) { 3274 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3275 } else { 3276 int vlen_enc = vector_length_encoding(this); 3277 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3278 } 3279 %} 3280 ins_pipe( fpu_reg_reg ); 3281 %} 3282 3283 // ====================ReplicateI======================================= 3284 3285 instruct ReplI_reg(vec dst, rRegI src) %{ 3286 match(Set dst (ReplicateI src)); 3287 format %{ "replicateI $dst,$src" %} 3288 ins_encode %{ 3289 uint vlen = vector_length(this); 3290 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3291 int vlen_enc = vector_length_encoding(this); 3292 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 3293 } else { 3294 __ movdl($dst$$XMMRegister, $src$$Register); 3295 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3296 if (vlen >= 8) { 3297 assert(vlen == 8, "sanity"); 3298 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3299 } 3300 } 3301 %} 3302 ins_pipe( pipe_slow ); 3303 %} 3304 3305 instruct ReplI_mem(vec dst, memory mem) %{ 3306 match(Set dst (ReplicateI (LoadI mem))); 3307 format %{ "replicateI $dst,$mem" %} 3308 ins_encode %{ 3309 uint vlen = vector_length(this); 3310 if (vlen <= 4) { 3311 __ movdl($dst$$XMMRegister, $mem$$Address); 3312 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3313 } else { 3314 assert(VM_Version::supports_avx2(), "sanity"); 3315 int vector_len = vector_length_encoding(this); 3316 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3317 } 3318 %} 3319 ins_pipe( pipe_slow ); 3320 %} 3321 3322 instruct ReplI_imm(vec dst, immI con) %{ 3323 match(Set dst (ReplicateI con)); 3324 format %{ "replicateI $dst,$con" %} 3325 ins_encode %{ 3326 uint vlen = vector_length(this); 3327 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4)); 3328 if (vlen <= 4) { 3329 __ movq($dst$$XMMRegister, const_addr); 3330 if (vlen == 4) { 3331 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3332 } 3333 } else { 3334 assert(VM_Version::supports_avx2(), "sanity"); 3335 int vector_len = vector_length_encoding(this); 3336 __ movq($dst$$XMMRegister, const_addr); 3337 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3338 } 3339 %} 3340 ins_pipe( pipe_slow ); 3341 %} 3342 3343 // Replicate integer (4 byte) scalar zero to be vector 3344 instruct ReplI_zero(vec dst, immI0 zero) %{ 3345 match(Set dst (ReplicateI zero)); 3346 format %{ "replicateI $dst,$zero" %} 3347 ins_encode %{ 3348 uint vlen = vector_length(this); 3349 if (vlen <= 4) { 3350 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3351 } else { 3352 int vlen_enc = vector_length_encoding(this); 3353 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3354 } 3355 %} 3356 ins_pipe( fpu_reg_reg ); 3357 %} 3358 3359 // ====================ReplicateL======================================= 3360 3361 #ifdef _LP64 3362 // Replicate long (8 byte) scalar to be vector 3363 instruct ReplL_reg(vec dst, rRegL src) %{ 3364 match(Set dst (ReplicateL src)); 3365 format %{ "replicateL $dst,$src" %} 3366 ins_encode %{ 3367 uint vlen = vector_length(this); 3368 if (vlen == 2) { 3369 __ movdq($dst$$XMMRegister, $src$$Register); 3370 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3371 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3372 int vlen_enc = vector_length_encoding(this); 3373 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 3374 } else { 3375 assert(vlen == 4, "sanity"); 3376 __ movdq($dst$$XMMRegister, $src$$Register); 3377 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3378 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3379 } 3380 %} 3381 ins_pipe( pipe_slow ); 3382 %} 3383 #else // _LP64 3384 // Replicate long (8 byte) scalar to be vector 3385 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 3386 predicate(n->as_Vector()->length() <= 4); 3387 match(Set dst (ReplicateL src)); 3388 effect(TEMP dst, USE src, TEMP tmp); 3389 format %{ "replicateL $dst,$src" %} 3390 ins_encode %{ 3391 uint vlen = vector_length(this); 3392 if (vlen == 2) { 3393 __ movdl($dst$$XMMRegister, $src$$Register); 3394 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3395 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3396 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3397 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3398 int vector_len = Assembler::AVX_256bit; 3399 __ movdl($dst$$XMMRegister, $src$$Register); 3400 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3401 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3402 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3403 } else { 3404 __ movdl($dst$$XMMRegister, $src$$Register); 3405 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3406 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3407 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3408 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3409 } 3410 %} 3411 ins_pipe( pipe_slow ); 3412 %} 3413 3414 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 3415 predicate(n->as_Vector()->length() == 8); 3416 match(Set dst (ReplicateL src)); 3417 effect(TEMP dst, USE src, TEMP tmp); 3418 format %{ "replicateL $dst,$src" %} 3419 ins_encode %{ 3420 if (VM_Version::supports_avx512vl()) { 3421 __ movdl($dst$$XMMRegister, $src$$Register); 3422 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3423 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3424 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3425 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3426 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3427 } else { 3428 int vector_len = Assembler::AVX_512bit; 3429 __ movdl($dst$$XMMRegister, $src$$Register); 3430 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3431 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3432 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3433 } 3434 %} 3435 ins_pipe( pipe_slow ); 3436 %} 3437 #endif // _LP64 3438 3439 instruct ReplL_mem(vec dst, memory mem) %{ 3440 match(Set dst (ReplicateL (LoadL mem))); 3441 format %{ "replicateL $dst,$mem" %} 3442 ins_encode %{ 3443 uint vlen = vector_length(this); 3444 if (vlen == 2) { 3445 __ movq($dst$$XMMRegister, $mem$$Address); 3446 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3447 } else { 3448 assert(VM_Version::supports_avx2(), "sanity"); 3449 int vlen_enc = vector_length_encoding(this); 3450 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 3451 } 3452 %} 3453 ins_pipe( pipe_slow ); 3454 %} 3455 3456 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3457 instruct ReplL_imm(vec dst, immL con) %{ 3458 match(Set dst (ReplicateL con)); 3459 format %{ "replicateL $dst,$con" %} 3460 ins_encode %{ 3461 uint vlen = vector_length(this); 3462 InternalAddress const_addr = $constantaddress($con); 3463 if (vlen == 2) { 3464 __ movq($dst$$XMMRegister, const_addr); 3465 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3466 } else { 3467 assert(VM_Version::supports_avx2(), "sanity"); 3468 int vlen_enc = vector_length_encoding(this); 3469 __ movq($dst$$XMMRegister, const_addr); 3470 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3471 } 3472 %} 3473 ins_pipe( pipe_slow ); 3474 %} 3475 3476 instruct ReplL_zero(vec dst, immL0 zero) %{ 3477 match(Set dst (ReplicateL zero)); 3478 format %{ "replicateL $dst,$zero" %} 3479 ins_encode %{ 3480 int vlen = vector_length(this); 3481 if (vlen == 2) { 3482 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3483 } else { 3484 int vlen_enc = vector_length_encoding(this); 3485 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3486 } 3487 %} 3488 ins_pipe( fpu_reg_reg ); 3489 %} 3490 3491 // ====================ReplicateF======================================= 3492 3493 instruct ReplF_reg(vec dst, vlRegF src) %{ 3494 match(Set dst (ReplicateF src)); 3495 format %{ "replicateF $dst,$src" %} 3496 ins_encode %{ 3497 uint vlen = vector_length(this); 3498 if (vlen <= 4) { 3499 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3500 } else if (VM_Version::supports_avx2()) { 3501 int vector_len = vector_length_encoding(this); 3502 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2 3503 } else { 3504 assert(vlen == 8, "sanity"); 3505 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3506 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3507 } 3508 %} 3509 ins_pipe( pipe_slow ); 3510 %} 3511 3512 instruct ReplF_mem(vec dst, memory mem) %{ 3513 match(Set dst (ReplicateF (LoadF mem))); 3514 format %{ "replicateF $dst,$mem" %} 3515 ins_encode %{ 3516 uint vlen = vector_length(this); 3517 if (vlen <= 4) { 3518 __ movdl($dst$$XMMRegister, $mem$$Address); 3519 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3520 } else { 3521 assert(VM_Version::supports_avx(), "sanity"); 3522 int vector_len = vector_length_encoding(this); 3523 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 3524 } 3525 %} 3526 ins_pipe( pipe_slow ); 3527 %} 3528 3529 instruct ReplF_zero(vec dst, immF0 zero) %{ 3530 match(Set dst (ReplicateF zero)); 3531 format %{ "replicateF $dst,$zero" %} 3532 ins_encode %{ 3533 uint vlen = vector_length(this); 3534 if (vlen <= 4) { 3535 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3536 } else { 3537 int vlen_enc = vector_length_encoding(this); 3538 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 3539 } 3540 %} 3541 ins_pipe( fpu_reg_reg ); 3542 %} 3543 3544 // ====================ReplicateD======================================= 3545 3546 // Replicate double (8 bytes) scalar to be vector 3547 instruct ReplD_reg(vec dst, vlRegD src) %{ 3548 match(Set dst (ReplicateD src)); 3549 format %{ "replicateD $dst,$src" %} 3550 ins_encode %{ 3551 uint vlen = vector_length(this); 3552 if (vlen == 2) { 3553 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3554 } else if (VM_Version::supports_avx2()) { 3555 int vector_len = vector_length_encoding(this); 3556 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2 3557 } else { 3558 assert(vlen == 4, "sanity"); 3559 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3560 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3561 } 3562 %} 3563 ins_pipe( pipe_slow ); 3564 %} 3565 3566 instruct ReplD_mem(vec dst, memory mem) %{ 3567 match(Set dst (ReplicateD (LoadD mem))); 3568 format %{ "replicateD $dst,$mem" %} 3569 ins_encode %{ 3570 uint vlen = vector_length(this); 3571 if (vlen == 2) { 3572 __ movq($dst$$XMMRegister, $mem$$Address); 3573 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 3574 } else { 3575 assert(VM_Version::supports_avx(), "sanity"); 3576 int vector_len = vector_length_encoding(this); 3577 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 3578 } 3579 %} 3580 ins_pipe( pipe_slow ); 3581 %} 3582 3583 instruct ReplD_zero(vec dst, immD0 zero) %{ 3584 match(Set dst (ReplicateD zero)); 3585 format %{ "replicateD $dst,$zero" %} 3586 ins_encode %{ 3587 uint vlen = vector_length(this); 3588 if (vlen == 2) { 3589 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3590 } else { 3591 int vlen_enc = vector_length_encoding(this); 3592 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 3593 } 3594 %} 3595 ins_pipe( fpu_reg_reg ); 3596 %} 3597 3598 // ====================REDUCTION ARITHMETIC======================================= 3599 // =======================Int Reduction========================================== 3600 3601 instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 3602 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT && 3603 n->in(2)->bottom_type()->is_vect()->length() < 16); 3604 match(Set dst (AddReductionVI src1 src2)); 3605 match(Set dst (MulReductionVI src1 src2)); 3606 match(Set dst (AndReductionV src1 src2)); 3607 match(Set dst ( OrReductionV src1 src2)); 3608 match(Set dst (XorReductionV src1 src2)); 3609 effect(TEMP vtmp1, TEMP vtmp2); 3610 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3611 ins_encode %{ 3612 int opcode = this->ideal_Opcode(); 3613 int vlen = vector_length(this, $src2); 3614 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3615 %} 3616 ins_pipe( pipe_slow ); 3617 %} 3618 3619 instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 3620 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT && 3621 n->in(2)->bottom_type()->is_vect()->length() == 16); 3622 match(Set dst (AddReductionVI src1 src2)); 3623 match(Set dst (MulReductionVI src1 src2)); 3624 match(Set dst (AndReductionV src1 src2)); 3625 match(Set dst ( OrReductionV src1 src2)); 3626 match(Set dst (XorReductionV src1 src2)); 3627 effect(TEMP vtmp1, TEMP vtmp2); 3628 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3629 ins_encode %{ 3630 int opcode = this->ideal_Opcode(); 3631 int vlen = vector_length(this, $src2); 3632 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3633 %} 3634 ins_pipe( pipe_slow ); 3635 %} 3636 3637 // =======================Long Reduction========================================== 3638 3639 #ifdef _LP64 3640 instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 3641 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && 3642 n->in(2)->bottom_type()->is_vect()->length() < 8); 3643 match(Set dst (AddReductionVL src1 src2)); 3644 match(Set dst (MulReductionVL src1 src2)); 3645 match(Set dst (AndReductionV src1 src2)); 3646 match(Set dst ( OrReductionV src1 src2)); 3647 match(Set dst (XorReductionV src1 src2)); 3648 effect(TEMP vtmp1, TEMP vtmp2); 3649 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3650 ins_encode %{ 3651 int opcode = this->ideal_Opcode(); 3652 int vlen = vector_length(this, $src2); 3653 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3654 %} 3655 ins_pipe( pipe_slow ); 3656 %} 3657 3658 instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 3659 predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && 3660 n->in(2)->bottom_type()->is_vect()->length() == 8); 3661 match(Set dst (AddReductionVL src1 src2)); 3662 match(Set dst (MulReductionVL src1 src2)); 3663 match(Set dst (AndReductionV src1 src2)); 3664 match(Set dst ( OrReductionV src1 src2)); 3665 match(Set dst (XorReductionV src1 src2)); 3666 effect(TEMP vtmp1, TEMP vtmp2); 3667 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 3668 ins_encode %{ 3669 int opcode = this->ideal_Opcode(); 3670 int vlen = vector_length(this, $src2); 3671 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3672 %} 3673 ins_pipe( pipe_slow ); 3674 %} 3675 #endif // _LP64 3676 3677 // =======================Float Reduction========================================== 3678 3679 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 3680 predicate(n->in(2)->bottom_type()->is_vect()->length() <= 4); 3681 match(Set dst (AddReductionVF dst src)); 3682 match(Set dst (MulReductionVF dst src)); 3683 effect(TEMP dst, TEMP vtmp); 3684 format %{ "vector_reduction_fp $dst,$src ; using $vtmp as TEMP" %} 3685 ins_encode %{ 3686 int opcode = this->ideal_Opcode(); 3687 int vlen = vector_length(this, $src); 3688 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 3689 %} 3690 ins_pipe( pipe_slow ); 3691 %} 3692 3693 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 3694 predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); 3695 match(Set dst (AddReductionVF dst src)); 3696 match(Set dst (MulReductionVF dst src)); 3697 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3698 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3699 ins_encode %{ 3700 int opcode = this->ideal_Opcode(); 3701 int vlen = vector_length(this, $src); 3702 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3703 %} 3704 ins_pipe( pipe_slow ); 3705 %} 3706 3707 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 3708 predicate(n->in(2)->bottom_type()->is_vect()->length() == 16); 3709 match(Set dst (AddReductionVF dst src)); 3710 match(Set dst (MulReductionVF dst src)); 3711 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3712 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3713 ins_encode %{ 3714 int opcode = this->ideal_Opcode(); 3715 int vlen = vector_length(this, $src); 3716 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3717 %} 3718 ins_pipe( pipe_slow ); 3719 %} 3720 3721 // =======================Double Reduction========================================== 3722 3723 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 3724 predicate(n->in(2)->bottom_type()->is_vect()->length() == 2); 3725 match(Set dst (AddReductionVD dst src)); 3726 match(Set dst (MulReductionVD dst src)); 3727 effect(TEMP dst, TEMP vtmp); 3728 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 3729 ins_encode %{ 3730 int opcode = this->ideal_Opcode(); 3731 int vlen = vector_length(this, $src); 3732 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 3733 %} 3734 ins_pipe( pipe_slow ); 3735 %} 3736 3737 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 3738 predicate(n->in(2)->bottom_type()->is_vect()->length() == 4); 3739 match(Set dst (AddReductionVD dst src)); 3740 match(Set dst (MulReductionVD dst src)); 3741 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3742 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3743 ins_encode %{ 3744 int opcode = this->ideal_Opcode(); 3745 int vlen = vector_length(this, $src); 3746 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3747 %} 3748 ins_pipe( pipe_slow ); 3749 %} 3750 3751 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 3752 predicate(n->in(2)->bottom_type()->is_vect()->length() == 8); 3753 match(Set dst (AddReductionVD dst src)); 3754 match(Set dst (MulReductionVD dst src)); 3755 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 3756 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 3757 ins_encode %{ 3758 int opcode = this->ideal_Opcode(); 3759 int vlen = vector_length(this, $src); 3760 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 3765 // ====================VECTOR ARITHMETIC======================================= 3766 3767 // --------------------------------- ADD -------------------------------------- 3768 3769 // Bytes vector add 3770 instruct vaddB(vec dst, vec src) %{ 3771 predicate(UseAVX == 0); 3772 match(Set dst (AddVB dst src)); 3773 format %{ "paddb $dst,$src\t! add packedB" %} 3774 ins_encode %{ 3775 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3776 %} 3777 ins_pipe( pipe_slow ); 3778 %} 3779 3780 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 3781 predicate(UseAVX > 0); 3782 match(Set dst (AddVB src1 src2)); 3783 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 3784 ins_encode %{ 3785 int vector_len = vector_length_encoding(this); 3786 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3787 %} 3788 ins_pipe( pipe_slow ); 3789 %} 3790 3791 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 3792 predicate(UseAVX > 0); 3793 match(Set dst (AddVB src (LoadVector mem))); 3794 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 3795 ins_encode %{ 3796 int vector_len = vector_length_encoding(this); 3797 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3798 %} 3799 ins_pipe( pipe_slow ); 3800 %} 3801 3802 // Shorts/Chars vector add 3803 instruct vaddS(vec dst, vec src) %{ 3804 predicate(UseAVX == 0); 3805 match(Set dst (AddVS dst src)); 3806 format %{ "paddw $dst,$src\t! add packedS" %} 3807 ins_encode %{ 3808 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 3809 %} 3810 ins_pipe( pipe_slow ); 3811 %} 3812 3813 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 3814 predicate(UseAVX > 0); 3815 match(Set dst (AddVS src1 src2)); 3816 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 3817 ins_encode %{ 3818 int vector_len = vector_length_encoding(this); 3819 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3820 %} 3821 ins_pipe( pipe_slow ); 3822 %} 3823 3824 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 3825 predicate(UseAVX > 0); 3826 match(Set dst (AddVS src (LoadVector mem))); 3827 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 3828 ins_encode %{ 3829 int vector_len = vector_length_encoding(this); 3830 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3831 %} 3832 ins_pipe( pipe_slow ); 3833 %} 3834 3835 // Integers vector add 3836 instruct vaddI(vec dst, vec src) %{ 3837 predicate(UseAVX == 0); 3838 match(Set dst (AddVI dst src)); 3839 format %{ "paddd $dst,$src\t! add packedI" %} 3840 ins_encode %{ 3841 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 3842 %} 3843 ins_pipe( pipe_slow ); 3844 %} 3845 3846 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 3847 predicate(UseAVX > 0); 3848 match(Set dst (AddVI src1 src2)); 3849 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 3850 ins_encode %{ 3851 int vector_len = vector_length_encoding(this); 3852 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3853 %} 3854 ins_pipe( pipe_slow ); 3855 %} 3856 3857 3858 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 3859 predicate(UseAVX > 0); 3860 match(Set dst (AddVI src (LoadVector mem))); 3861 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 3862 ins_encode %{ 3863 int vector_len = vector_length_encoding(this); 3864 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3865 %} 3866 ins_pipe( pipe_slow ); 3867 %} 3868 3869 // Longs vector add 3870 instruct vaddL(vec dst, vec src) %{ 3871 predicate(UseAVX == 0); 3872 match(Set dst (AddVL dst src)); 3873 format %{ "paddq $dst,$src\t! add packedL" %} 3874 ins_encode %{ 3875 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 3876 %} 3877 ins_pipe( pipe_slow ); 3878 %} 3879 3880 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 3881 predicate(UseAVX > 0); 3882 match(Set dst (AddVL src1 src2)); 3883 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 3884 ins_encode %{ 3885 int vector_len = vector_length_encoding(this); 3886 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3887 %} 3888 ins_pipe( pipe_slow ); 3889 %} 3890 3891 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 3892 predicate(UseAVX > 0); 3893 match(Set dst (AddVL src (LoadVector mem))); 3894 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 3895 ins_encode %{ 3896 int vector_len = vector_length_encoding(this); 3897 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 // Floats vector add 3903 instruct vaddF(vec dst, vec src) %{ 3904 predicate(UseAVX == 0); 3905 match(Set dst (AddVF dst src)); 3906 format %{ "addps $dst,$src\t! add packedF" %} 3907 ins_encode %{ 3908 __ addps($dst$$XMMRegister, $src$$XMMRegister); 3909 %} 3910 ins_pipe( pipe_slow ); 3911 %} 3912 3913 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 3914 predicate(UseAVX > 0); 3915 match(Set dst (AddVF src1 src2)); 3916 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 3917 ins_encode %{ 3918 int vector_len = vector_length_encoding(this); 3919 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3920 %} 3921 ins_pipe( pipe_slow ); 3922 %} 3923 3924 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 3925 predicate(UseAVX > 0); 3926 match(Set dst (AddVF src (LoadVector mem))); 3927 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 3928 ins_encode %{ 3929 int vector_len = vector_length_encoding(this); 3930 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3931 %} 3932 ins_pipe( pipe_slow ); 3933 %} 3934 3935 // Doubles vector add 3936 instruct vaddD(vec dst, vec src) %{ 3937 predicate(UseAVX == 0); 3938 match(Set dst (AddVD dst src)); 3939 format %{ "addpd $dst,$src\t! add packedD" %} 3940 ins_encode %{ 3941 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 3942 %} 3943 ins_pipe( pipe_slow ); 3944 %} 3945 3946 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 3947 predicate(UseAVX > 0); 3948 match(Set dst (AddVD src1 src2)); 3949 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 3950 ins_encode %{ 3951 int vector_len = vector_length_encoding(this); 3952 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3953 %} 3954 ins_pipe( pipe_slow ); 3955 %} 3956 3957 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 3958 predicate(UseAVX > 0); 3959 match(Set dst (AddVD src (LoadVector mem))); 3960 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 3961 ins_encode %{ 3962 int vector_len = vector_length_encoding(this); 3963 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3964 %} 3965 ins_pipe( pipe_slow ); 3966 %} 3967 3968 // --------------------------------- SUB -------------------------------------- 3969 3970 // Bytes vector sub 3971 instruct vsubB(vec dst, vec src) %{ 3972 predicate(UseAVX == 0); 3973 match(Set dst (SubVB dst src)); 3974 format %{ "psubb $dst,$src\t! sub packedB" %} 3975 ins_encode %{ 3976 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3977 %} 3978 ins_pipe( pipe_slow ); 3979 %} 3980 3981 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 3982 predicate(UseAVX > 0); 3983 match(Set dst (SubVB src1 src2)); 3984 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 3985 ins_encode %{ 3986 int vector_len = vector_length_encoding(this); 3987 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 3988 %} 3989 ins_pipe( pipe_slow ); 3990 %} 3991 3992 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 3993 predicate(UseAVX > 0); 3994 match(Set dst (SubVB src (LoadVector mem))); 3995 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 3996 ins_encode %{ 3997 int vector_len = vector_length_encoding(this); 3998 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 3999 %} 4000 ins_pipe( pipe_slow ); 4001 %} 4002 4003 // Shorts/Chars vector sub 4004 instruct vsubS(vec dst, vec src) %{ 4005 predicate(UseAVX == 0); 4006 match(Set dst (SubVS dst src)); 4007 format %{ "psubw $dst,$src\t! sub packedS" %} 4008 ins_encode %{ 4009 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 4010 %} 4011 ins_pipe( pipe_slow ); 4012 %} 4013 4014 4015 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 4016 predicate(UseAVX > 0); 4017 match(Set dst (SubVS src1 src2)); 4018 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 4019 ins_encode %{ 4020 int vector_len = vector_length_encoding(this); 4021 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4022 %} 4023 ins_pipe( pipe_slow ); 4024 %} 4025 4026 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 4027 predicate(UseAVX > 0); 4028 match(Set dst (SubVS src (LoadVector mem))); 4029 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 4030 ins_encode %{ 4031 int vector_len = vector_length_encoding(this); 4032 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4033 %} 4034 ins_pipe( pipe_slow ); 4035 %} 4036 4037 // Integers vector sub 4038 instruct vsubI(vec dst, vec src) %{ 4039 predicate(UseAVX == 0); 4040 match(Set dst (SubVI dst src)); 4041 format %{ "psubd $dst,$src\t! sub packedI" %} 4042 ins_encode %{ 4043 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 4044 %} 4045 ins_pipe( pipe_slow ); 4046 %} 4047 4048 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 4049 predicate(UseAVX > 0); 4050 match(Set dst (SubVI src1 src2)); 4051 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 4052 ins_encode %{ 4053 int vector_len = vector_length_encoding(this); 4054 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4055 %} 4056 ins_pipe( pipe_slow ); 4057 %} 4058 4059 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 4060 predicate(UseAVX > 0); 4061 match(Set dst (SubVI src (LoadVector mem))); 4062 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 4063 ins_encode %{ 4064 int vector_len = vector_length_encoding(this); 4065 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4066 %} 4067 ins_pipe( pipe_slow ); 4068 %} 4069 4070 // Longs vector sub 4071 instruct vsubL(vec dst, vec src) %{ 4072 predicate(UseAVX == 0); 4073 match(Set dst (SubVL dst src)); 4074 format %{ "psubq $dst,$src\t! sub packedL" %} 4075 ins_encode %{ 4076 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 4077 %} 4078 ins_pipe( pipe_slow ); 4079 %} 4080 4081 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 4082 predicate(UseAVX > 0); 4083 match(Set dst (SubVL src1 src2)); 4084 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 4085 ins_encode %{ 4086 int vector_len = vector_length_encoding(this); 4087 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4088 %} 4089 ins_pipe( pipe_slow ); 4090 %} 4091 4092 4093 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 4094 predicate(UseAVX > 0); 4095 match(Set dst (SubVL src (LoadVector mem))); 4096 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 4097 ins_encode %{ 4098 int vector_len = vector_length_encoding(this); 4099 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4100 %} 4101 ins_pipe( pipe_slow ); 4102 %} 4103 4104 // Floats vector sub 4105 instruct vsubF(vec dst, vec src) %{ 4106 predicate(UseAVX == 0); 4107 match(Set dst (SubVF dst src)); 4108 format %{ "subps $dst,$src\t! sub packedF" %} 4109 ins_encode %{ 4110 __ subps($dst$$XMMRegister, $src$$XMMRegister); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 4115 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 4116 predicate(UseAVX > 0); 4117 match(Set dst (SubVF src1 src2)); 4118 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 4119 ins_encode %{ 4120 int vector_len = vector_length_encoding(this); 4121 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4122 %} 4123 ins_pipe( pipe_slow ); 4124 %} 4125 4126 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 4127 predicate(UseAVX > 0); 4128 match(Set dst (SubVF src (LoadVector mem))); 4129 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 4130 ins_encode %{ 4131 int vector_len = vector_length_encoding(this); 4132 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4133 %} 4134 ins_pipe( pipe_slow ); 4135 %} 4136 4137 // Doubles vector sub 4138 instruct vsubD(vec dst, vec src) %{ 4139 predicate(UseAVX == 0); 4140 match(Set dst (SubVD dst src)); 4141 format %{ "subpd $dst,$src\t! sub packedD" %} 4142 ins_encode %{ 4143 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 4144 %} 4145 ins_pipe( pipe_slow ); 4146 %} 4147 4148 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 4149 predicate(UseAVX > 0); 4150 match(Set dst (SubVD src1 src2)); 4151 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 4152 ins_encode %{ 4153 int vector_len = vector_length_encoding(this); 4154 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4155 %} 4156 ins_pipe( pipe_slow ); 4157 %} 4158 4159 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 4160 predicate(UseAVX > 0); 4161 match(Set dst (SubVD src (LoadVector mem))); 4162 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 4163 ins_encode %{ 4164 int vector_len = vector_length_encoding(this); 4165 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4166 %} 4167 ins_pipe( pipe_slow ); 4168 %} 4169 4170 // --------------------------------- MUL -------------------------------------- 4171 4172 // Byte vector mul 4173 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 4174 predicate(n->as_Vector()->length() == 4 || 4175 n->as_Vector()->length() == 8); 4176 match(Set dst (MulVB src1 src2)); 4177 effect(TEMP dst, TEMP tmp, TEMP scratch); 4178 format %{"vector_mulB $dst,$src1,$src2" %} 4179 ins_encode %{ 4180 assert(UseSSE > 3, "required"); 4181 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 4182 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 4183 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 4184 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4185 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 4186 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 4187 %} 4188 ins_pipe( pipe_slow ); 4189 %} 4190 4191 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4192 predicate(n->as_Vector()->length() == 16 && UseAVX <= 1); 4193 match(Set dst (MulVB src1 src2)); 4194 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4195 format %{"vector_mulB $dst,$src1,$src2" %} 4196 ins_encode %{ 4197 assert(UseSSE > 3, "required"); 4198 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 4199 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 4200 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 4201 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 4202 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 4203 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4204 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 4205 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 4206 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4207 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 4208 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 4209 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 4210 %} 4211 ins_pipe( pipe_slow ); 4212 %} 4213 4214 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 4215 predicate(n->as_Vector()->length() == 16 && UseAVX > 1); 4216 match(Set dst (MulVB src1 src2)); 4217 effect(TEMP dst, TEMP tmp, TEMP scratch); 4218 format %{"vector_mulB $dst,$src1,$src2" %} 4219 ins_encode %{ 4220 int vector_len = Assembler::AVX_256bit; 4221 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 4222 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4223 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); 4224 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4225 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 4226 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 4227 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 4228 %} 4229 ins_pipe( pipe_slow ); 4230 %} 4231 4232 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4233 predicate(n->as_Vector()->length() == 32); 4234 match(Set dst (MulVB src1 src2)); 4235 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4236 format %{"vector_mulB $dst,$src1,$src2" %} 4237 ins_encode %{ 4238 assert(UseAVX > 1, "required"); 4239 int vector_len = Assembler::AVX_256bit; 4240 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 4241 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 4242 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4243 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4244 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4245 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 4246 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4247 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4248 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4249 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4250 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4251 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4252 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4253 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 4254 %} 4255 ins_pipe( pipe_slow ); 4256 %} 4257 4258 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 4259 predicate(n->as_Vector()->length() == 64); 4260 match(Set dst (MulVB src1 src2)); 4261 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4262 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 4263 ins_encode %{ 4264 assert(UseAVX > 2, "required"); 4265 int vector_len = Assembler::AVX_512bit; 4266 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 4267 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 4268 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4269 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4270 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4271 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 4272 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 4273 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4274 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4275 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4276 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4277 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4278 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4279 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 4280 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4281 %} 4282 ins_pipe( pipe_slow ); 4283 %} 4284 4285 // Shorts/Chars vector mul 4286 instruct vmulS(vec dst, vec src) %{ 4287 predicate(UseAVX == 0); 4288 match(Set dst (MulVS dst src)); 4289 format %{ "pmullw $dst,$src\t! mul packedS" %} 4290 ins_encode %{ 4291 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4292 %} 4293 ins_pipe( pipe_slow ); 4294 %} 4295 4296 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 4297 predicate(UseAVX > 0); 4298 match(Set dst (MulVS src1 src2)); 4299 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 4300 ins_encode %{ 4301 int vector_len = vector_length_encoding(this); 4302 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4303 %} 4304 ins_pipe( pipe_slow ); 4305 %} 4306 4307 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 4308 predicate(UseAVX > 0); 4309 match(Set dst (MulVS src (LoadVector mem))); 4310 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 4311 ins_encode %{ 4312 int vector_len = vector_length_encoding(this); 4313 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4314 %} 4315 ins_pipe( pipe_slow ); 4316 %} 4317 4318 // Integers vector mul 4319 instruct vmulI(vec dst, vec src) %{ 4320 predicate(UseAVX == 0); 4321 match(Set dst (MulVI dst src)); 4322 format %{ "pmulld $dst,$src\t! mul packedI" %} 4323 ins_encode %{ 4324 assert(UseSSE > 3, "required"); 4325 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 4326 %} 4327 ins_pipe( pipe_slow ); 4328 %} 4329 4330 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 4331 predicate(UseAVX > 0); 4332 match(Set dst (MulVI src1 src2)); 4333 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 4334 ins_encode %{ 4335 int vector_len = vector_length_encoding(this); 4336 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4337 %} 4338 ins_pipe( pipe_slow ); 4339 %} 4340 4341 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 4342 predicate(UseAVX > 0); 4343 match(Set dst (MulVI src (LoadVector mem))); 4344 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 4345 ins_encode %{ 4346 int vector_len = vector_length_encoding(this); 4347 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 // Longs vector mul 4353 instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 4354 match(Set dst (MulVL src1 src2)); 4355 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 4356 ins_encode %{ 4357 assert(UseAVX > 2, "required"); 4358 int vector_len = vector_length_encoding(this); 4359 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4360 %} 4361 ins_pipe( pipe_slow ); 4362 %} 4363 4364 instruct vmulL_mem(vec dst, vec src, memory mem) %{ 4365 match(Set dst (MulVL src (LoadVector mem))); 4366 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 4367 ins_encode %{ 4368 assert(UseAVX > 2, "required"); 4369 int vector_len = vector_length_encoding(this); 4370 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4371 %} 4372 ins_pipe( pipe_slow ); 4373 %} 4374 4375 // Floats vector mul 4376 instruct vmulF(vec dst, vec src) %{ 4377 predicate(UseAVX == 0); 4378 match(Set dst (MulVF dst src)); 4379 format %{ "mulps $dst,$src\t! mul packedF" %} 4380 ins_encode %{ 4381 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 4382 %} 4383 ins_pipe( pipe_slow ); 4384 %} 4385 4386 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 4387 predicate(UseAVX > 0); 4388 match(Set dst (MulVF src1 src2)); 4389 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 4390 ins_encode %{ 4391 int vector_len = vector_length_encoding(this); 4392 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4393 %} 4394 ins_pipe( pipe_slow ); 4395 %} 4396 4397 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 4398 predicate(UseAVX > 0); 4399 match(Set dst (MulVF src (LoadVector mem))); 4400 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 4401 ins_encode %{ 4402 int vector_len = vector_length_encoding(this); 4403 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4404 %} 4405 ins_pipe( pipe_slow ); 4406 %} 4407 4408 // Doubles vector mul 4409 instruct vmulD(vec dst, vec src) %{ 4410 predicate(UseAVX == 0); 4411 match(Set dst (MulVD dst src)); 4412 format %{ "mulpd $dst,$src\t! mul packedD" %} 4413 ins_encode %{ 4414 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 4415 %} 4416 ins_pipe( pipe_slow ); 4417 %} 4418 4419 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 4420 predicate(UseAVX > 0); 4421 match(Set dst (MulVD src1 src2)); 4422 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 4423 ins_encode %{ 4424 int vector_len = vector_length_encoding(this); 4425 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4426 %} 4427 ins_pipe( pipe_slow ); 4428 %} 4429 4430 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 4431 predicate(UseAVX > 0); 4432 match(Set dst (MulVD src (LoadVector mem))); 4433 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 4434 ins_encode %{ 4435 int vector_len = vector_length_encoding(this); 4436 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4437 %} 4438 ins_pipe( pipe_slow ); 4439 %} 4440 4441 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 4442 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4443 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 4444 effect(TEMP dst, USE src1, USE src2); 4445 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 4446 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 4447 %} 4448 ins_encode %{ 4449 int vector_len = 1; 4450 int cond = (Assembler::Condition)($copnd$$cmpcode); 4451 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 4452 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 4453 %} 4454 ins_pipe( pipe_slow ); 4455 %} 4456 4457 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 4458 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4459 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 4460 effect(TEMP dst, USE src1, USE src2); 4461 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 4462 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 4463 %} 4464 ins_encode %{ 4465 int vector_len = 1; 4466 int cond = (Assembler::Condition)($copnd$$cmpcode); 4467 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 4468 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 4469 %} 4470 ins_pipe( pipe_slow ); 4471 %} 4472 4473 // --------------------------------- DIV -------------------------------------- 4474 4475 // Floats vector div 4476 instruct vdivF(vec dst, vec src) %{ 4477 predicate(UseAVX == 0); 4478 match(Set dst (DivVF dst src)); 4479 format %{ "divps $dst,$src\t! div packedF" %} 4480 ins_encode %{ 4481 __ divps($dst$$XMMRegister, $src$$XMMRegister); 4482 %} 4483 ins_pipe( pipe_slow ); 4484 %} 4485 4486 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 4487 predicate(UseAVX > 0); 4488 match(Set dst (DivVF src1 src2)); 4489 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 4490 ins_encode %{ 4491 int vector_len = vector_length_encoding(this); 4492 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4493 %} 4494 ins_pipe( pipe_slow ); 4495 %} 4496 4497 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 4498 predicate(UseAVX > 0); 4499 match(Set dst (DivVF src (LoadVector mem))); 4500 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 4501 ins_encode %{ 4502 int vector_len = vector_length_encoding(this); 4503 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4504 %} 4505 ins_pipe( pipe_slow ); 4506 %} 4507 4508 // Doubles vector div 4509 instruct vdivD(vec dst, vec src) %{ 4510 predicate(UseAVX == 0); 4511 match(Set dst (DivVD dst src)); 4512 format %{ "divpd $dst,$src\t! div packedD" %} 4513 ins_encode %{ 4514 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 4515 %} 4516 ins_pipe( pipe_slow ); 4517 %} 4518 4519 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 4520 predicate(UseAVX > 0); 4521 match(Set dst (DivVD src1 src2)); 4522 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 4523 ins_encode %{ 4524 int vector_len = vector_length_encoding(this); 4525 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 4531 predicate(UseAVX > 0); 4532 match(Set dst (DivVD src (LoadVector mem))); 4533 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 4534 ins_encode %{ 4535 int vector_len = vector_length_encoding(this); 4536 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4537 %} 4538 ins_pipe( pipe_slow ); 4539 %} 4540 4541 // --------------------------------- Sqrt -------------------------------------- 4542 4543 instruct vsqrtF_reg(vec dst, vec src) %{ 4544 match(Set dst (SqrtVF src)); 4545 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 4546 ins_encode %{ 4547 assert(UseAVX > 0, "required"); 4548 int vector_len = vector_length_encoding(this); 4549 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4550 %} 4551 ins_pipe( pipe_slow ); 4552 %} 4553 4554 instruct vsqrtF_mem(vec dst, memory mem) %{ 4555 match(Set dst (SqrtVF (LoadVector mem))); 4556 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 4557 ins_encode %{ 4558 assert(UseAVX > 0, "required"); 4559 int vector_len = vector_length_encoding(this); 4560 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 4561 %} 4562 ins_pipe( pipe_slow ); 4563 %} 4564 4565 // Floating point vector sqrt 4566 instruct vsqrtD_reg(vec dst, vec src) %{ 4567 match(Set dst (SqrtVD src)); 4568 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 4569 ins_encode %{ 4570 assert(UseAVX > 0, "required"); 4571 int vector_len = vector_length_encoding(this); 4572 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4573 %} 4574 ins_pipe( pipe_slow ); 4575 %} 4576 4577 instruct vsqrtD_mem(vec dst, memory mem) %{ 4578 match(Set dst (SqrtVD (LoadVector mem))); 4579 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 4580 ins_encode %{ 4581 assert(UseAVX > 0, "required"); 4582 int vector_len = vector_length_encoding(this); 4583 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 4584 %} 4585 ins_pipe( pipe_slow ); 4586 %} 4587 4588 // ------------------------------ Shift --------------------------------------- 4589 4590 // Left and right shift count vectors are the same on x86 4591 // (only lowest bits of xmm reg are used for count). 4592 instruct vshiftcnt(vec dst, rRegI cnt) %{ 4593 match(Set dst (LShiftCntV cnt)); 4594 match(Set dst (RShiftCntV cnt)); 4595 format %{ "movdl $dst,$cnt\t! load shift count" %} 4596 ins_encode %{ 4597 __ movdl($dst$$XMMRegister, $cnt$$Register); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 // Byte vector shift 4603 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4604 predicate(n->as_Vector()->length() <= 8); 4605 match(Set dst (LShiftVB src shift)); 4606 match(Set dst (RShiftVB src shift)); 4607 match(Set dst (URShiftVB src shift)); 4608 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 4609 format %{"vector_byte_shift $dst,$src,$shift" %} 4610 ins_encode %{ 4611 assert(UseSSE > 3, "required"); 4612 int opcode = this->ideal_Opcode(); 4613 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 4614 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 4615 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4616 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 4617 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 4618 %} 4619 ins_pipe( pipe_slow ); 4620 %} 4621 4622 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 4623 predicate(n->as_Vector()->length() == 16 && UseAVX <= 1); 4624 match(Set dst (LShiftVB src shift)); 4625 match(Set dst (RShiftVB src shift)); 4626 match(Set dst (URShiftVB src shift)); 4627 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 4628 format %{"vector_byte_shift $dst,$src,$shift" %} 4629 ins_encode %{ 4630 assert(UseSSE > 3, "required"); 4631 int opcode = this->ideal_Opcode(); 4632 4633 __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister); 4634 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 4635 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 4636 __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 4637 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 4638 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4639 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 4640 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 4641 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 4642 %} 4643 ins_pipe( pipe_slow ); 4644 %} 4645 4646 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4647 predicate(n->as_Vector()->length() == 16 && UseAVX > 1); 4648 match(Set dst (LShiftVB src shift)); 4649 match(Set dst (RShiftVB src shift)); 4650 match(Set dst (URShiftVB src shift)); 4651 effect(TEMP dst, TEMP tmp, TEMP scratch); 4652 format %{"vector_byte_shift $dst,$src,$shift" %} 4653 ins_encode %{ 4654 int opcode = this->ideal_Opcode(); 4655 int vector_len = Assembler::AVX_256bit; 4656 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 4657 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 4658 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4659 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 4660 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 4661 %} 4662 ins_pipe( pipe_slow ); 4663 %} 4664 4665 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4666 predicate(n->as_Vector()->length() == 32); 4667 match(Set dst (LShiftVB src shift)); 4668 match(Set dst (RShiftVB src shift)); 4669 match(Set dst (URShiftVB src shift)); 4670 effect(TEMP dst, TEMP tmp, TEMP scratch); 4671 format %{"vector_byte_shift $dst,$src,$shift" %} 4672 ins_encode %{ 4673 assert(UseAVX > 1, "required"); 4674 int opcode = this->ideal_Opcode(); 4675 int vector_len = Assembler::AVX_256bit; 4676 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 4677 __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4678 __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 4679 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 4680 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len); 4681 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4682 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 4683 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 4684 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 4690 predicate(n->as_Vector()->length() == 64); 4691 match(Set dst (LShiftVB src shift)); 4692 match(Set dst (RShiftVB src shift)); 4693 match(Set dst (URShiftVB src shift)); 4694 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 4695 format %{"vector_byte_shift $dst,$src,$shift" %} 4696 ins_encode %{ 4697 assert(UseAVX > 2, "required"); 4698 int opcode = this->ideal_Opcode(); 4699 int vector_len = Assembler::AVX_512bit; 4700 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 4701 __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 4702 __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len); 4703 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len); 4704 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); 4705 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 4706 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4707 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 4708 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4709 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4710 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 4711 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 4712 %} 4713 ins_pipe( pipe_slow ); 4714 %} 4715 4716 // Shorts vector logical right shift produces incorrect Java result 4717 // for negative data because java code convert short value into int with 4718 // sign extension before a shift. But char vectors are fine since chars are 4719 // unsigned values. 4720 // Shorts/Chars vector left shift 4721 instruct vshiftS(vec dst, vec src, vec shift) %{ 4722 match(Set dst (LShiftVS src shift)); 4723 match(Set dst (RShiftVS src shift)); 4724 match(Set dst (URShiftVS src shift)); 4725 effect(TEMP dst, USE src, USE shift); 4726 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 4727 ins_encode %{ 4728 int opcode = this->ideal_Opcode(); 4729 if (UseAVX > 0) { 4730 int vlen_enc = vector_length_encoding(this); 4731 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 4732 } else { 4733 int vlen = vector_length(this); 4734 if (vlen == 2) { 4735 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 4736 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4737 } else if (vlen == 4) { 4738 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 4739 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4740 } else { 4741 assert (vlen == 8, "sanity"); 4742 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4743 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4744 } 4745 } 4746 %} 4747 ins_pipe( pipe_slow ); 4748 %} 4749 4750 // Integers vector left shift 4751 instruct vshiftI(vec dst, vec src, vec shift) %{ 4752 match(Set dst (LShiftVI src shift)); 4753 match(Set dst (RShiftVI src shift)); 4754 match(Set dst (URShiftVI src shift)); 4755 effect(TEMP dst, USE src, USE shift); 4756 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 4757 ins_encode %{ 4758 int opcode = this->ideal_Opcode(); 4759 if (UseAVX > 0) { 4760 int vector_len = vector_length_encoding(this); 4761 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 4762 } else { 4763 int vlen = vector_length(this); 4764 if (vlen == 2) { 4765 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 4766 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4767 } else { 4768 assert(vlen == 4, "sanity"); 4769 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4770 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4771 } 4772 } 4773 %} 4774 ins_pipe( pipe_slow ); 4775 %} 4776 4777 // Longs vector shift 4778 instruct vshiftL(vec dst, vec src, vec shift) %{ 4779 match(Set dst (LShiftVL src shift)); 4780 match(Set dst (URShiftVL src shift)); 4781 effect(TEMP dst, USE src, USE shift); 4782 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 4783 ins_encode %{ 4784 int opcode = this->ideal_Opcode(); 4785 if (UseAVX > 0) { 4786 int vector_len = vector_length_encoding(this); 4787 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 4788 } else { 4789 assert(vector_length(this) == 2, ""); 4790 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4791 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 4792 } 4793 %} 4794 ins_pipe( pipe_slow ); 4795 %} 4796 4797 // -------------------ArithmeticRightShift ----------------------------------- 4798 // Long vector arithmetic right shift 4799 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 4800 predicate(UseAVX <= 2); 4801 match(Set dst (RShiftVL src shift)); 4802 effect(TEMP dst, TEMP tmp, TEMP scratch); 4803 format %{ "vshiftq $dst,$src,$shift" %} 4804 ins_encode %{ 4805 uint vlen = vector_length(this); 4806 if (vlen == 2) { 4807 assert(UseSSE >= 2, "required"); 4808 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 4809 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4810 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 4811 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 4812 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 4813 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 4814 } else { 4815 assert(vlen == 4, "sanity"); 4816 assert(UseAVX > 1, "required"); 4817 int vector_len = Assembler::AVX_256bit; 4818 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 4819 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 4820 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 4821 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 4822 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 4823 } 4824 %} 4825 ins_pipe( pipe_slow ); 4826 %} 4827 4828 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 4829 predicate(UseAVX > 2); 4830 match(Set dst (RShiftVL src shift)); 4831 format %{ "vshiftq $dst,$src,$shift" %} 4832 ins_encode %{ 4833 int vector_len = vector_length_encoding(this); 4834 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 4835 %} 4836 ins_pipe( pipe_slow ); 4837 %} 4838 4839 // --------------------------------- AND -------------------------------------- 4840 4841 instruct vand(vec dst, vec src) %{ 4842 predicate(UseAVX == 0); 4843 match(Set dst (AndV dst src)); 4844 format %{ "pand $dst,$src\t! and vectors" %} 4845 ins_encode %{ 4846 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4847 %} 4848 ins_pipe( pipe_slow ); 4849 %} 4850 4851 instruct vand_reg(vec dst, vec src1, vec src2) %{ 4852 predicate(UseAVX > 0); 4853 match(Set dst (AndV src1 src2)); 4854 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 4855 ins_encode %{ 4856 int vector_len = vector_length_encoding(this); 4857 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4858 %} 4859 ins_pipe( pipe_slow ); 4860 %} 4861 4862 instruct vand_mem(vec dst, vec src, memory mem) %{ 4863 predicate(UseAVX > 0); 4864 match(Set dst (AndV src (LoadVector mem))); 4865 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 4866 ins_encode %{ 4867 int vector_len = vector_length_encoding(this); 4868 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4869 %} 4870 ins_pipe( pipe_slow ); 4871 %} 4872 4873 // --------------------------------- OR --------------------------------------- 4874 4875 instruct vor(vec dst, vec src) %{ 4876 predicate(UseAVX == 0); 4877 match(Set dst (OrV dst src)); 4878 format %{ "por $dst,$src\t! or vectors" %} 4879 ins_encode %{ 4880 __ por($dst$$XMMRegister, $src$$XMMRegister); 4881 %} 4882 ins_pipe( pipe_slow ); 4883 %} 4884 4885 instruct vor_reg(vec dst, vec src1, vec src2) %{ 4886 predicate(UseAVX > 0); 4887 match(Set dst (OrV src1 src2)); 4888 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 4889 ins_encode %{ 4890 int vector_len = vector_length_encoding(this); 4891 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4892 %} 4893 ins_pipe( pipe_slow ); 4894 %} 4895 4896 instruct vor_mem(vec dst, vec src, memory mem) %{ 4897 predicate(UseAVX > 0); 4898 match(Set dst (OrV src (LoadVector mem))); 4899 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 4900 ins_encode %{ 4901 int vector_len = vector_length_encoding(this); 4902 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4903 %} 4904 ins_pipe( pipe_slow ); 4905 %} 4906 4907 // --------------------------------- XOR -------------------------------------- 4908 4909 instruct vxor(vec dst, vec src) %{ 4910 predicate(UseAVX == 0); 4911 match(Set dst (XorV dst src)); 4912 format %{ "pxor $dst,$src\t! xor vectors" %} 4913 ins_encode %{ 4914 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4915 %} 4916 ins_pipe( pipe_slow ); 4917 %} 4918 4919 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 4920 predicate(UseAVX > 0); 4921 match(Set dst (XorV src1 src2)); 4922 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 4923 ins_encode %{ 4924 int vector_len = vector_length_encoding(this); 4925 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4926 %} 4927 ins_pipe( pipe_slow ); 4928 %} 4929 4930 instruct vxor_mem(vec dst, vec src, memory mem) %{ 4931 predicate(UseAVX > 0); 4932 match(Set dst (XorV src (LoadVector mem))); 4933 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 4934 ins_encode %{ 4935 int vector_len = vector_length_encoding(this); 4936 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 4937 %} 4938 ins_pipe( pipe_slow ); 4939 %} 4940 4941 // --------------------------------- ABS -------------------------------------- 4942 // a = |a| 4943 instruct vabsB_reg(vec dst, vec src) %{ 4944 match(Set dst (AbsVB src)); 4945 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 4946 ins_encode %{ 4947 uint vlen = vector_length(this); 4948 if (vlen <= 16) { 4949 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 4950 } else { 4951 int vlen_enc = vector_length_encoding(this); 4952 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 4953 } 4954 %} 4955 ins_pipe( pipe_slow ); 4956 %} 4957 4958 instruct vabsS_reg(vec dst, vec src) %{ 4959 match(Set dst (AbsVS src)); 4960 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 4961 ins_encode %{ 4962 uint vlen = vector_length(this); 4963 if (vlen <= 8) { 4964 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 4965 } else { 4966 int vlen_enc = vector_length_encoding(this); 4967 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 4968 } 4969 %} 4970 ins_pipe( pipe_slow ); 4971 %} 4972 4973 instruct vabsI_reg(vec dst, vec src) %{ 4974 match(Set dst (AbsVI src)); 4975 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 4976 ins_encode %{ 4977 uint vlen = vector_length(this); 4978 if (vlen <= 4) { 4979 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 4980 } else { 4981 int vlen_enc = vector_length_encoding(this); 4982 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 4983 } 4984 %} 4985 ins_pipe( pipe_slow ); 4986 %} 4987 4988 instruct vabsL_reg(vec dst, vec src) %{ 4989 match(Set dst (AbsVL src)); 4990 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 4991 ins_encode %{ 4992 assert(UseAVX > 2, "required"); 4993 int vector_len = vector_length_encoding(this); 4994 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4995 %} 4996 ins_pipe( pipe_slow ); 4997 %} 4998 4999 // --------------------------------- ABSNEG -------------------------------------- 5000 5001 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 5002 predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F 5003 match(Set dst (AbsVF src)); 5004 match(Set dst (NegVF src)); 5005 effect(TEMP scratch); 5006 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 5007 ins_cost(150); 5008 ins_encode %{ 5009 int opcode = this->ideal_Opcode(); 5010 int vlen = vector_length(this); 5011 if (vlen == 2) { 5012 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 5013 } else { 5014 assert(vlen == 8 || vlen == 16, "required"); 5015 int vlen_enc = vector_length_encoding(this); 5016 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 5017 } 5018 %} 5019 ins_pipe( pipe_slow ); 5020 %} 5021 5022 instruct vabsneg4F(vec dst, rRegI scratch) %{ 5023 predicate(n->as_Vector()->length() == 4); 5024 match(Set dst (AbsVF dst)); 5025 match(Set dst (NegVF dst)); 5026 effect(TEMP scratch); 5027 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 5028 ins_cost(150); 5029 ins_encode %{ 5030 int opcode = this->ideal_Opcode(); 5031 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 5032 %} 5033 ins_pipe( pipe_slow ); 5034 %} 5035 5036 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 5037 match(Set dst (AbsVD src)); 5038 match(Set dst (NegVD src)); 5039 effect(TEMP scratch); 5040 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 5041 ins_encode %{ 5042 int opcode = this->ideal_Opcode(); 5043 uint vlen = vector_length(this); 5044 if (vlen == 2) { 5045 assert(UseSSE >= 2, "required"); 5046 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 5047 } else { 5048 int vlen_enc = vector_length_encoding(this); 5049 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 5050 } 5051 %} 5052 ins_pipe( pipe_slow ); 5053 %} 5054 5055 // --------------------------------- FMA -------------------------------------- 5056 // a * b + c 5057 5058 instruct vfmaF_reg(vec a, vec b, vec c) %{ 5059 match(Set c (FmaVF c (Binary a b))); 5060 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 5061 ins_cost(150); 5062 ins_encode %{ 5063 assert(UseFMA, "not enabled"); 5064 int vector_len = vector_length_encoding(this); 5065 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 5066 %} 5067 ins_pipe( pipe_slow ); 5068 %} 5069 5070 instruct vfmaF_mem(vec a, memory b, vec c) %{ 5071 match(Set c (FmaVF c (Binary a (LoadVector b)))); 5072 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 5073 ins_cost(150); 5074 ins_encode %{ 5075 assert(UseFMA, "not enabled"); 5076 int vector_len = vector_length_encoding(this); 5077 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 5078 %} 5079 ins_pipe( pipe_slow ); 5080 %} 5081 5082 instruct vfmaD_reg(vec a, vec b, vec c) %{ 5083 match(Set c (FmaVD c (Binary a b))); 5084 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 5085 ins_cost(150); 5086 ins_encode %{ 5087 assert(UseFMA, "not enabled"); 5088 int vector_len = vector_length_encoding(this); 5089 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 5090 %} 5091 ins_pipe( pipe_slow ); 5092 %} 5093 5094 instruct vfmaD_mem(vec a, memory b, vec c) %{ 5095 match(Set c (FmaVD c (Binary a (LoadVector b)))); 5096 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 5097 ins_cost(150); 5098 ins_encode %{ 5099 assert(UseFMA, "not enabled"); 5100 int vector_len = vector_length_encoding(this); 5101 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 5102 %} 5103 ins_pipe( pipe_slow ); 5104 %} 5105 5106 // --------------------------------- Vector Multiply Add -------------------------------------- 5107 5108 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 5109 predicate(UseAVX == 0); 5110 match(Set dst (MulAddVS2VI dst src1)); 5111 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %} 5112 ins_encode %{ 5113 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 5114 %} 5115 ins_pipe( pipe_slow ); 5116 %} 5117 5118 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 5119 predicate(UseAVX > 0); 5120 match(Set dst (MulAddVS2VI src1 src2)); 5121 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 5122 ins_encode %{ 5123 int vector_len = vector_length_encoding(this); 5124 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5125 %} 5126 ins_pipe( pipe_slow ); 5127 %} 5128 5129 // --------------------------------- Vector Multiply Add Add ---------------------------------- 5130 5131 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 5132 predicate(VM_Version::supports_avx512_vnni()); 5133 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 5134 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 5135 ins_encode %{ 5136 assert(UseAVX > 2, "required"); 5137 int vector_len = vector_length_encoding(this); 5138 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5139 %} 5140 ins_pipe( pipe_slow ); 5141 ins_cost(10); 5142 %} 5143 5144 // --------------------------------- PopCount -------------------------------------- 5145 5146 instruct vpopcountI(vec dst, vec src) %{ 5147 match(Set dst (PopCountVI src)); 5148 format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %} 5149 ins_encode %{ 5150 assert(UsePopCountInstruction, "not enabled"); 5151 5152 int vector_len = vector_length_encoding(this); 5153 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5154 %} 5155 ins_pipe( pipe_slow ); 5156 %}