1 // 2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340 #endif 341 ); 342 343 // flags allocation class should be last. 344 alloc_class chunk2(RFLAGS); 345 346 // Singleton class for condition codes 347 reg_class int_flags(RFLAGS); 348 349 // Class for all float registers 350 reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358 #ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367 #endif 368 ); 369 370 // Class for all double registers 371 reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379 #ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388 #endif 389 ); 390 391 // Class for all 32bit vector registers 392 reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400 #ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409 #endif 410 ); 411 412 // Class for all 64bit vector registers 413 reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421 #ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430 #endif 431 ); 432 433 // Class for all 128bit vector registers 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442 #ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451 #endif 452 ); 453 454 // Class for all 256bit vector registers 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463 #ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472 #endif 473 ); 474 475 %} 476 477 source %{ 478 // Float masks come from different places depending on platform. 479 #ifdef _LP64 480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 482 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 484 #else 485 static address float_signmask() { return (address)float_signmask_pool; } 486 static address float_signflip() { return (address)float_signflip_pool; } 487 static address double_signmask() { return (address)double_signmask_pool; } 488 static address double_signflip() { return (address)double_signflip_pool; } 489 #endif 490 491 // Map Types to machine register types 492 const int Matcher::base2reg[Type::lastype] = { 493 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, 494 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ 495 Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ 496 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ 497 0, 0/*abio*/, 498 Op_RegP /* Return address */, 0, /* the memories */ 499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, 500 0 /*bottom*/ 501 }; 502 503 const bool Matcher::match_rule_supported(int opcode) { 504 if (!has_match_rule(opcode)) 505 return false; 506 507 switch (opcode) { 508 case Op_PopCountI: 509 case Op_PopCountL: 510 if (!UsePopCountInstruction) 511 return false; 512 case Op_MulVI: 513 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 514 return false; 515 break; 516 } 517 518 return true; // Per default match rules are supported. 519 } 520 521 // Max vector size in bytes. 0 if not supported. 522 const int Matcher::vector_width_in_bytes(BasicType bt) { 523 assert(is_java_primitive(bt), "only primitive type vectors"); 524 if (UseSSE < 2) return 0; 525 // SSE2 supports 128bit vectors for all types. 526 // AVX2 supports 256bit vectors for all types. 527 int size = (UseAVX > 1) ? 32 : 16; 528 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 529 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 530 size = 32; 531 // Use flag to limit vector size. 532 size = MIN2(size,(int)MaxVectorSize); 533 // Minimum 2 values in vector (or 4 for bytes). 534 switch (bt) { 535 case T_DOUBLE: 536 case T_LONG: 537 if (size < 16) return 0; 538 case T_FLOAT: 539 case T_INT: 540 if (size < 8) return 0; 541 case T_BOOLEAN: 542 case T_BYTE: 543 case T_CHAR: 544 case T_SHORT: 545 if (size < 4) return 0; 546 break; 547 default: 548 ShouldNotReachHere(); 549 } 550 return size; 551 } 552 553 // Limits on vector size (number of elements) loaded into vector. 554 const int Matcher::max_vector_size(const BasicType bt) { 555 return vector_width_in_bytes(bt)/type2aelembytes(bt); 556 } 557 const int Matcher::min_vector_size(const BasicType bt) { 558 int max_size = max_vector_size(bt); 559 // Min size which can be loaded into vector is 4 bytes. 560 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 561 return MIN2(size,max_size); 562 } 563 564 // Vector ideal reg corresponding to specidied size in bytes 565 const int Matcher::vector_ideal_reg(int size) { 566 assert(MaxVectorSize >= size, ""); 567 switch(size) { 568 case 4: return Op_VecS; 569 case 8: return Op_VecD; 570 case 16: return Op_VecX; 571 case 32: return Op_VecY; 572 } 573 ShouldNotReachHere(); 574 return 0; 575 } 576 577 // x86 supports misaligned vectors store/load. 578 const bool Matcher::misaligned_vectors_ok() { 579 return !AlignVector; // can be changed by flag 580 } 581 582 // Helper methods for MachSpillCopyNode::implementation(). 583 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 584 int src_hi, int dst_hi, uint ireg, outputStream* st) { 585 // In 64-bit VM size calculation is very complex. Emitting instructions 586 // into scratch buffer is used to get size in 64-bit VM. 587 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 588 assert(ireg == Op_VecS || // 32bit vector 589 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 590 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 591 "no non-adjacent vector moves" ); 592 if (cbuf) { 593 MacroAssembler _masm(cbuf); 594 int offset = __ offset(); 595 switch (ireg) { 596 case Op_VecS: // copy whole register 597 case Op_VecD: 598 case Op_VecX: 599 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 600 break; 601 case Op_VecY: 602 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 603 break; 604 default: 605 ShouldNotReachHere(); 606 } 607 int size = __ offset() - offset; 608 #ifdef ASSERT 609 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 610 assert(!do_size || size == 4, "incorrect size calculattion"); 611 #endif 612 return size; 613 #ifndef PRODUCT 614 } else if (!do_size) { 615 switch (ireg) { 616 case Op_VecS: 617 case Op_VecD: 618 case Op_VecX: 619 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 620 break; 621 case Op_VecY: 622 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 623 break; 624 default: 625 ShouldNotReachHere(); 626 } 627 #endif 628 } 629 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 630 return 4; 631 } 632 633 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 634 int stack_offset, int reg, uint ireg, outputStream* st) { 635 // In 64-bit VM size calculation is very complex. Emitting instructions 636 // into scratch buffer is used to get size in 64-bit VM. 637 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 638 if (cbuf) { 639 MacroAssembler _masm(cbuf); 640 int offset = __ offset(); 641 if (is_load) { 642 switch (ireg) { 643 case Op_VecS: 644 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 645 break; 646 case Op_VecD: 647 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 648 break; 649 case Op_VecX: 650 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 651 break; 652 case Op_VecY: 653 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 654 break; 655 default: 656 ShouldNotReachHere(); 657 } 658 } else { // store 659 switch (ireg) { 660 case Op_VecS: 661 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 662 break; 663 case Op_VecD: 664 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 665 break; 666 case Op_VecX: 667 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 668 break; 669 case Op_VecY: 670 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 671 break; 672 default: 673 ShouldNotReachHere(); 674 } 675 } 676 int size = __ offset() - offset; 677 #ifdef ASSERT 678 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 679 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 680 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 681 #endif 682 return size; 683 #ifndef PRODUCT 684 } else if (!do_size) { 685 if (is_load) { 686 switch (ireg) { 687 case Op_VecS: 688 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 689 break; 690 case Op_VecD: 691 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 692 break; 693 case Op_VecX: 694 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 695 break; 696 case Op_VecY: 697 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 698 break; 699 default: 700 ShouldNotReachHere(); 701 } 702 } else { // store 703 switch (ireg) { 704 case Op_VecS: 705 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 706 break; 707 case Op_VecD: 708 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 709 break; 710 case Op_VecX: 711 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 712 break; 713 case Op_VecY: 714 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 715 break; 716 default: 717 ShouldNotReachHere(); 718 } 719 } 720 #endif 721 } 722 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 723 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 724 return 5+offset_size; 725 } 726 727 static inline jfloat replicate4_imm(int con, int width) { 728 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 729 assert(width == 1 || width == 2, "only byte or short types here"); 730 int bit_width = width * 8; 731 jint val = con; 732 val &= (1 << bit_width) - 1; // mask off sign bits 733 while(bit_width < 32) { 734 val |= (val << bit_width); 735 bit_width <<= 1; 736 } 737 jfloat fval = *((jfloat*) &val); // coerce to float type 738 return fval; 739 } 740 741 static inline jdouble replicate8_imm(int con, int width) { 742 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 743 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 744 int bit_width = width * 8; 745 jlong val = con; 746 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 747 while(bit_width < 64) { 748 val |= (val << bit_width); 749 bit_width <<= 1; 750 } 751 jdouble dval = *((jdouble*) &val); // coerce to double type 752 return dval; 753 } 754 755 #ifndef PRODUCT 756 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 757 st->print("nop \t# %d bytes pad for loops and calls", _count); 758 } 759 #endif 760 761 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 762 MacroAssembler _masm(&cbuf); 763 __ nop(_count); 764 } 765 766 uint MachNopNode::size(PhaseRegAlloc*) const { 767 return _count; 768 } 769 770 #ifndef PRODUCT 771 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 772 st->print("# breakpoint"); 773 } 774 #endif 775 776 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 777 MacroAssembler _masm(&cbuf); 778 __ int3(); 779 } 780 781 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 782 return MachNode::size(ra_); 783 } 784 785 %} 786 787 encode %{ 788 789 enc_class preserve_SP %{ 790 debug_only(int off0 = cbuf.insts_size()); 791 MacroAssembler _masm(&cbuf); 792 // RBP is preserved across all calls, even compiled calls. 793 // Use it to preserve RSP in places where the callee might change the SP. 794 __ movptr(rbp_mh_SP_save, rsp); 795 debug_only(int off1 = cbuf.insts_size()); 796 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 797 %} 798 799 enc_class restore_SP %{ 800 MacroAssembler _masm(&cbuf); 801 __ movptr(rsp, rbp_mh_SP_save); 802 %} 803 804 enc_class call_epilog %{ 805 if (VerifyStackAtCalls) { 806 // Check that stack depth is unchanged: find majik cookie on stack 807 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 808 MacroAssembler _masm(&cbuf); 809 Label L; 810 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 811 __ jccb(Assembler::equal, L); 812 // Die if stack mismatch 813 __ int3(); 814 __ bind(L); 815 } 816 %} 817 818 %} 819 820 821 //----------OPERANDS----------------------------------------------------------- 822 // Operand definitions must precede instruction definitions for correct parsing 823 // in the ADLC because operands constitute user defined types which are used in 824 // instruction definitions. 825 826 // Vectors 827 operand vecS() %{ 828 constraint(ALLOC_IN_RC(vectors_reg)); 829 match(VecS); 830 831 format %{ %} 832 interface(REG_INTER); 833 %} 834 835 operand vecD() %{ 836 constraint(ALLOC_IN_RC(vectord_reg)); 837 match(VecD); 838 839 format %{ %} 840 interface(REG_INTER); 841 %} 842 843 operand vecX() %{ 844 constraint(ALLOC_IN_RC(vectorx_reg)); 845 match(VecX); 846 847 format %{ %} 848 interface(REG_INTER); 849 %} 850 851 operand vecY() %{ 852 constraint(ALLOC_IN_RC(vectory_reg)); 853 match(VecY); 854 855 format %{ %} 856 interface(REG_INTER); 857 %} 858 859 860 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 861 862 // ============================================================================ 863 864 instruct ShouldNotReachHere() %{ 865 match(Halt); 866 format %{ "int3\t# ShouldNotReachHere" %} 867 ins_encode %{ 868 __ int3(); 869 %} 870 ins_pipe(pipe_slow); 871 %} 872 873 // ============================================================================ 874 875 instruct addF_reg(regF dst, regF src) %{ 876 predicate((UseSSE>=1) && (UseAVX == 0)); 877 match(Set dst (AddF dst src)); 878 879 format %{ "addss $dst, $src" %} 880 ins_cost(150); 881 ins_encode %{ 882 __ addss($dst$$XMMRegister, $src$$XMMRegister); 883 %} 884 ins_pipe(pipe_slow); 885 %} 886 887 instruct addF_mem(regF dst, memory src) %{ 888 predicate((UseSSE>=1) && (UseAVX == 0)); 889 match(Set dst (AddF dst (LoadF src))); 890 891 format %{ "addss $dst, $src" %} 892 ins_cost(150); 893 ins_encode %{ 894 __ addss($dst$$XMMRegister, $src$$Address); 895 %} 896 ins_pipe(pipe_slow); 897 %} 898 899 instruct addF_imm(regF dst, immF con) %{ 900 predicate((UseSSE>=1) && (UseAVX == 0)); 901 match(Set dst (AddF dst con)); 902 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 903 ins_cost(150); 904 ins_encode %{ 905 __ addss($dst$$XMMRegister, $constantaddress($con)); 906 %} 907 ins_pipe(pipe_slow); 908 %} 909 910 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 911 predicate(UseAVX > 0); 912 match(Set dst (AddF src1 src2)); 913 914 format %{ "vaddss $dst, $src1, $src2" %} 915 ins_cost(150); 916 ins_encode %{ 917 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 918 %} 919 ins_pipe(pipe_slow); 920 %} 921 922 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 923 predicate(UseAVX > 0); 924 match(Set dst (AddF src1 (LoadF src2))); 925 926 format %{ "vaddss $dst, $src1, $src2" %} 927 ins_cost(150); 928 ins_encode %{ 929 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 930 %} 931 ins_pipe(pipe_slow); 932 %} 933 934 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 935 predicate(UseAVX > 0); 936 match(Set dst (AddF src con)); 937 938 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 939 ins_cost(150); 940 ins_encode %{ 941 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 942 %} 943 ins_pipe(pipe_slow); 944 %} 945 946 instruct addD_reg(regD dst, regD src) %{ 947 predicate((UseSSE>=2) && (UseAVX == 0)); 948 match(Set dst (AddD dst src)); 949 950 format %{ "addsd $dst, $src" %} 951 ins_cost(150); 952 ins_encode %{ 953 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 954 %} 955 ins_pipe(pipe_slow); 956 %} 957 958 instruct addD_mem(regD dst, memory src) %{ 959 predicate((UseSSE>=2) && (UseAVX == 0)); 960 match(Set dst (AddD dst (LoadD src))); 961 962 format %{ "addsd $dst, $src" %} 963 ins_cost(150); 964 ins_encode %{ 965 __ addsd($dst$$XMMRegister, $src$$Address); 966 %} 967 ins_pipe(pipe_slow); 968 %} 969 970 instruct addD_imm(regD dst, immD con) %{ 971 predicate((UseSSE>=2) && (UseAVX == 0)); 972 match(Set dst (AddD dst con)); 973 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 974 ins_cost(150); 975 ins_encode %{ 976 __ addsd($dst$$XMMRegister, $constantaddress($con)); 977 %} 978 ins_pipe(pipe_slow); 979 %} 980 981 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 982 predicate(UseAVX > 0); 983 match(Set dst (AddD src1 src2)); 984 985 format %{ "vaddsd $dst, $src1, $src2" %} 986 ins_cost(150); 987 ins_encode %{ 988 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 989 %} 990 ins_pipe(pipe_slow); 991 %} 992 993 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 994 predicate(UseAVX > 0); 995 match(Set dst (AddD src1 (LoadD src2))); 996 997 format %{ "vaddsd $dst, $src1, $src2" %} 998 ins_cost(150); 999 ins_encode %{ 1000 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1001 %} 1002 ins_pipe(pipe_slow); 1003 %} 1004 1005 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1006 predicate(UseAVX > 0); 1007 match(Set dst (AddD src con)); 1008 1009 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1010 ins_cost(150); 1011 ins_encode %{ 1012 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1013 %} 1014 ins_pipe(pipe_slow); 1015 %} 1016 1017 instruct subF_reg(regF dst, regF src) %{ 1018 predicate((UseSSE>=1) && (UseAVX == 0)); 1019 match(Set dst (SubF dst src)); 1020 1021 format %{ "subss $dst, $src" %} 1022 ins_cost(150); 1023 ins_encode %{ 1024 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1025 %} 1026 ins_pipe(pipe_slow); 1027 %} 1028 1029 instruct subF_mem(regF dst, memory src) %{ 1030 predicate((UseSSE>=1) && (UseAVX == 0)); 1031 match(Set dst (SubF dst (LoadF src))); 1032 1033 format %{ "subss $dst, $src" %} 1034 ins_cost(150); 1035 ins_encode %{ 1036 __ subss($dst$$XMMRegister, $src$$Address); 1037 %} 1038 ins_pipe(pipe_slow); 1039 %} 1040 1041 instruct subF_imm(regF dst, immF con) %{ 1042 predicate((UseSSE>=1) && (UseAVX == 0)); 1043 match(Set dst (SubF dst con)); 1044 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1045 ins_cost(150); 1046 ins_encode %{ 1047 __ subss($dst$$XMMRegister, $constantaddress($con)); 1048 %} 1049 ins_pipe(pipe_slow); 1050 %} 1051 1052 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1053 predicate(UseAVX > 0); 1054 match(Set dst (SubF src1 src2)); 1055 1056 format %{ "vsubss $dst, $src1, $src2" %} 1057 ins_cost(150); 1058 ins_encode %{ 1059 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1060 %} 1061 ins_pipe(pipe_slow); 1062 %} 1063 1064 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1065 predicate(UseAVX > 0); 1066 match(Set dst (SubF src1 (LoadF src2))); 1067 1068 format %{ "vsubss $dst, $src1, $src2" %} 1069 ins_cost(150); 1070 ins_encode %{ 1071 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1072 %} 1073 ins_pipe(pipe_slow); 1074 %} 1075 1076 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1077 predicate(UseAVX > 0); 1078 match(Set dst (SubF src con)); 1079 1080 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1081 ins_cost(150); 1082 ins_encode %{ 1083 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1084 %} 1085 ins_pipe(pipe_slow); 1086 %} 1087 1088 instruct subD_reg(regD dst, regD src) %{ 1089 predicate((UseSSE>=2) && (UseAVX == 0)); 1090 match(Set dst (SubD dst src)); 1091 1092 format %{ "subsd $dst, $src" %} 1093 ins_cost(150); 1094 ins_encode %{ 1095 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1096 %} 1097 ins_pipe(pipe_slow); 1098 %} 1099 1100 instruct subD_mem(regD dst, memory src) %{ 1101 predicate((UseSSE>=2) && (UseAVX == 0)); 1102 match(Set dst (SubD dst (LoadD src))); 1103 1104 format %{ "subsd $dst, $src" %} 1105 ins_cost(150); 1106 ins_encode %{ 1107 __ subsd($dst$$XMMRegister, $src$$Address); 1108 %} 1109 ins_pipe(pipe_slow); 1110 %} 1111 1112 instruct subD_imm(regD dst, immD con) %{ 1113 predicate((UseSSE>=2) && (UseAVX == 0)); 1114 match(Set dst (SubD dst con)); 1115 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1116 ins_cost(150); 1117 ins_encode %{ 1118 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1119 %} 1120 ins_pipe(pipe_slow); 1121 %} 1122 1123 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1124 predicate(UseAVX > 0); 1125 match(Set dst (SubD src1 src2)); 1126 1127 format %{ "vsubsd $dst, $src1, $src2" %} 1128 ins_cost(150); 1129 ins_encode %{ 1130 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1131 %} 1132 ins_pipe(pipe_slow); 1133 %} 1134 1135 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1136 predicate(UseAVX > 0); 1137 match(Set dst (SubD src1 (LoadD src2))); 1138 1139 format %{ "vsubsd $dst, $src1, $src2" %} 1140 ins_cost(150); 1141 ins_encode %{ 1142 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1143 %} 1144 ins_pipe(pipe_slow); 1145 %} 1146 1147 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1148 predicate(UseAVX > 0); 1149 match(Set dst (SubD src con)); 1150 1151 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1152 ins_cost(150); 1153 ins_encode %{ 1154 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1155 %} 1156 ins_pipe(pipe_slow); 1157 %} 1158 1159 instruct mulF_reg(regF dst, regF src) %{ 1160 predicate((UseSSE>=1) && (UseAVX == 0)); 1161 match(Set dst (MulF dst src)); 1162 1163 format %{ "mulss $dst, $src" %} 1164 ins_cost(150); 1165 ins_encode %{ 1166 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1167 %} 1168 ins_pipe(pipe_slow); 1169 %} 1170 1171 instruct mulF_mem(regF dst, memory src) %{ 1172 predicate((UseSSE>=1) && (UseAVX == 0)); 1173 match(Set dst (MulF dst (LoadF src))); 1174 1175 format %{ "mulss $dst, $src" %} 1176 ins_cost(150); 1177 ins_encode %{ 1178 __ mulss($dst$$XMMRegister, $src$$Address); 1179 %} 1180 ins_pipe(pipe_slow); 1181 %} 1182 1183 instruct mulF_imm(regF dst, immF con) %{ 1184 predicate((UseSSE>=1) && (UseAVX == 0)); 1185 match(Set dst (MulF dst con)); 1186 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1187 ins_cost(150); 1188 ins_encode %{ 1189 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1190 %} 1191 ins_pipe(pipe_slow); 1192 %} 1193 1194 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1195 predicate(UseAVX > 0); 1196 match(Set dst (MulF src1 src2)); 1197 1198 format %{ "vmulss $dst, $src1, $src2" %} 1199 ins_cost(150); 1200 ins_encode %{ 1201 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1202 %} 1203 ins_pipe(pipe_slow); 1204 %} 1205 1206 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1207 predicate(UseAVX > 0); 1208 match(Set dst (MulF src1 (LoadF src2))); 1209 1210 format %{ "vmulss $dst, $src1, $src2" %} 1211 ins_cost(150); 1212 ins_encode %{ 1213 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1214 %} 1215 ins_pipe(pipe_slow); 1216 %} 1217 1218 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1219 predicate(UseAVX > 0); 1220 match(Set dst (MulF src con)); 1221 1222 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1223 ins_cost(150); 1224 ins_encode %{ 1225 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1226 %} 1227 ins_pipe(pipe_slow); 1228 %} 1229 1230 instruct mulD_reg(regD dst, regD src) %{ 1231 predicate((UseSSE>=2) && (UseAVX == 0)); 1232 match(Set dst (MulD dst src)); 1233 1234 format %{ "mulsd $dst, $src" %} 1235 ins_cost(150); 1236 ins_encode %{ 1237 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1238 %} 1239 ins_pipe(pipe_slow); 1240 %} 1241 1242 instruct mulD_mem(regD dst, memory src) %{ 1243 predicate((UseSSE>=2) && (UseAVX == 0)); 1244 match(Set dst (MulD dst (LoadD src))); 1245 1246 format %{ "mulsd $dst, $src" %} 1247 ins_cost(150); 1248 ins_encode %{ 1249 __ mulsd($dst$$XMMRegister, $src$$Address); 1250 %} 1251 ins_pipe(pipe_slow); 1252 %} 1253 1254 instruct mulD_imm(regD dst, immD con) %{ 1255 predicate((UseSSE>=2) && (UseAVX == 0)); 1256 match(Set dst (MulD dst con)); 1257 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1258 ins_cost(150); 1259 ins_encode %{ 1260 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1261 %} 1262 ins_pipe(pipe_slow); 1263 %} 1264 1265 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1266 predicate(UseAVX > 0); 1267 match(Set dst (MulD src1 src2)); 1268 1269 format %{ "vmulsd $dst, $src1, $src2" %} 1270 ins_cost(150); 1271 ins_encode %{ 1272 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1273 %} 1274 ins_pipe(pipe_slow); 1275 %} 1276 1277 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1278 predicate(UseAVX > 0); 1279 match(Set dst (MulD src1 (LoadD src2))); 1280 1281 format %{ "vmulsd $dst, $src1, $src2" %} 1282 ins_cost(150); 1283 ins_encode %{ 1284 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1285 %} 1286 ins_pipe(pipe_slow); 1287 %} 1288 1289 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1290 predicate(UseAVX > 0); 1291 match(Set dst (MulD src con)); 1292 1293 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1294 ins_cost(150); 1295 ins_encode %{ 1296 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1297 %} 1298 ins_pipe(pipe_slow); 1299 %} 1300 1301 instruct divF_reg(regF dst, regF src) %{ 1302 predicate((UseSSE>=1) && (UseAVX == 0)); 1303 match(Set dst (DivF dst src)); 1304 1305 format %{ "divss $dst, $src" %} 1306 ins_cost(150); 1307 ins_encode %{ 1308 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1309 %} 1310 ins_pipe(pipe_slow); 1311 %} 1312 1313 instruct divF_mem(regF dst, memory src) %{ 1314 predicate((UseSSE>=1) && (UseAVX == 0)); 1315 match(Set dst (DivF dst (LoadF src))); 1316 1317 format %{ "divss $dst, $src" %} 1318 ins_cost(150); 1319 ins_encode %{ 1320 __ divss($dst$$XMMRegister, $src$$Address); 1321 %} 1322 ins_pipe(pipe_slow); 1323 %} 1324 1325 instruct divF_imm(regF dst, immF con) %{ 1326 predicate((UseSSE>=1) && (UseAVX == 0)); 1327 match(Set dst (DivF dst con)); 1328 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1329 ins_cost(150); 1330 ins_encode %{ 1331 __ divss($dst$$XMMRegister, $constantaddress($con)); 1332 %} 1333 ins_pipe(pipe_slow); 1334 %} 1335 1336 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1337 predicate(UseAVX > 0); 1338 match(Set dst (DivF src1 src2)); 1339 1340 format %{ "vdivss $dst, $src1, $src2" %} 1341 ins_cost(150); 1342 ins_encode %{ 1343 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1344 %} 1345 ins_pipe(pipe_slow); 1346 %} 1347 1348 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1349 predicate(UseAVX > 0); 1350 match(Set dst (DivF src1 (LoadF src2))); 1351 1352 format %{ "vdivss $dst, $src1, $src2" %} 1353 ins_cost(150); 1354 ins_encode %{ 1355 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1356 %} 1357 ins_pipe(pipe_slow); 1358 %} 1359 1360 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1361 predicate(UseAVX > 0); 1362 match(Set dst (DivF src con)); 1363 1364 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1365 ins_cost(150); 1366 ins_encode %{ 1367 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1368 %} 1369 ins_pipe(pipe_slow); 1370 %} 1371 1372 instruct divD_reg(regD dst, regD src) %{ 1373 predicate((UseSSE>=2) && (UseAVX == 0)); 1374 match(Set dst (DivD dst src)); 1375 1376 format %{ "divsd $dst, $src" %} 1377 ins_cost(150); 1378 ins_encode %{ 1379 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1380 %} 1381 ins_pipe(pipe_slow); 1382 %} 1383 1384 instruct divD_mem(regD dst, memory src) %{ 1385 predicate((UseSSE>=2) && (UseAVX == 0)); 1386 match(Set dst (DivD dst (LoadD src))); 1387 1388 format %{ "divsd $dst, $src" %} 1389 ins_cost(150); 1390 ins_encode %{ 1391 __ divsd($dst$$XMMRegister, $src$$Address); 1392 %} 1393 ins_pipe(pipe_slow); 1394 %} 1395 1396 instruct divD_imm(regD dst, immD con) %{ 1397 predicate((UseSSE>=2) && (UseAVX == 0)); 1398 match(Set dst (DivD dst con)); 1399 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1400 ins_cost(150); 1401 ins_encode %{ 1402 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1403 %} 1404 ins_pipe(pipe_slow); 1405 %} 1406 1407 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1408 predicate(UseAVX > 0); 1409 match(Set dst (DivD src1 src2)); 1410 1411 format %{ "vdivsd $dst, $src1, $src2" %} 1412 ins_cost(150); 1413 ins_encode %{ 1414 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1415 %} 1416 ins_pipe(pipe_slow); 1417 %} 1418 1419 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1420 predicate(UseAVX > 0); 1421 match(Set dst (DivD src1 (LoadD src2))); 1422 1423 format %{ "vdivsd $dst, $src1, $src2" %} 1424 ins_cost(150); 1425 ins_encode %{ 1426 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1427 %} 1428 ins_pipe(pipe_slow); 1429 %} 1430 1431 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1432 predicate(UseAVX > 0); 1433 match(Set dst (DivD src con)); 1434 1435 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1436 ins_cost(150); 1437 ins_encode %{ 1438 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1439 %} 1440 ins_pipe(pipe_slow); 1441 %} 1442 1443 instruct absF_reg(regF dst) %{ 1444 predicate((UseSSE>=1) && (UseAVX == 0)); 1445 match(Set dst (AbsF dst)); 1446 ins_cost(150); 1447 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1448 ins_encode %{ 1449 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1450 %} 1451 ins_pipe(pipe_slow); 1452 %} 1453 1454 instruct absF_reg_reg(regF dst, regF src) %{ 1455 predicate(UseAVX > 0); 1456 match(Set dst (AbsF src)); 1457 ins_cost(150); 1458 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1459 ins_encode %{ 1460 bool vector256 = false; 1461 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1462 ExternalAddress(float_signmask()), vector256); 1463 %} 1464 ins_pipe(pipe_slow); 1465 %} 1466 1467 instruct absD_reg(regD dst) %{ 1468 predicate((UseSSE>=2) && (UseAVX == 0)); 1469 match(Set dst (AbsD dst)); 1470 ins_cost(150); 1471 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1472 "# abs double by sign masking" %} 1473 ins_encode %{ 1474 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1475 %} 1476 ins_pipe(pipe_slow); 1477 %} 1478 1479 instruct absD_reg_reg(regD dst, regD src) %{ 1480 predicate(UseAVX > 0); 1481 match(Set dst (AbsD src)); 1482 ins_cost(150); 1483 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1484 "# abs double by sign masking" %} 1485 ins_encode %{ 1486 bool vector256 = false; 1487 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1488 ExternalAddress(double_signmask()), vector256); 1489 %} 1490 ins_pipe(pipe_slow); 1491 %} 1492 1493 instruct negF_reg(regF dst) %{ 1494 predicate((UseSSE>=1) && (UseAVX == 0)); 1495 match(Set dst (NegF dst)); 1496 ins_cost(150); 1497 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1498 ins_encode %{ 1499 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1500 %} 1501 ins_pipe(pipe_slow); 1502 %} 1503 1504 instruct negF_reg_reg(regF dst, regF src) %{ 1505 predicate(UseAVX > 0); 1506 match(Set dst (NegF src)); 1507 ins_cost(150); 1508 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1509 ins_encode %{ 1510 bool vector256 = false; 1511 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1512 ExternalAddress(float_signflip()), vector256); 1513 %} 1514 ins_pipe(pipe_slow); 1515 %} 1516 1517 instruct negD_reg(regD dst) %{ 1518 predicate((UseSSE>=2) && (UseAVX == 0)); 1519 match(Set dst (NegD dst)); 1520 ins_cost(150); 1521 format %{ "xorpd $dst, [0x8000000000000000]\t" 1522 "# neg double by sign flipping" %} 1523 ins_encode %{ 1524 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1525 %} 1526 ins_pipe(pipe_slow); 1527 %} 1528 1529 instruct negD_reg_reg(regD dst, regD src) %{ 1530 predicate(UseAVX > 0); 1531 match(Set dst (NegD src)); 1532 ins_cost(150); 1533 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1534 "# neg double by sign flipping" %} 1535 ins_encode %{ 1536 bool vector256 = false; 1537 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1538 ExternalAddress(double_signflip()), vector256); 1539 %} 1540 ins_pipe(pipe_slow); 1541 %} 1542 1543 instruct sqrtF_reg(regF dst, regF src) %{ 1544 predicate(UseSSE>=1); 1545 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1546 1547 format %{ "sqrtss $dst, $src" %} 1548 ins_cost(150); 1549 ins_encode %{ 1550 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1551 %} 1552 ins_pipe(pipe_slow); 1553 %} 1554 1555 instruct sqrtF_mem(regF dst, memory src) %{ 1556 predicate(UseSSE>=1); 1557 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1558 1559 format %{ "sqrtss $dst, $src" %} 1560 ins_cost(150); 1561 ins_encode %{ 1562 __ sqrtss($dst$$XMMRegister, $src$$Address); 1563 %} 1564 ins_pipe(pipe_slow); 1565 %} 1566 1567 instruct sqrtF_imm(regF dst, immF con) %{ 1568 predicate(UseSSE>=1); 1569 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1570 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1571 ins_cost(150); 1572 ins_encode %{ 1573 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1574 %} 1575 ins_pipe(pipe_slow); 1576 %} 1577 1578 instruct sqrtD_reg(regD dst, regD src) %{ 1579 predicate(UseSSE>=2); 1580 match(Set dst (SqrtD src)); 1581 1582 format %{ "sqrtsd $dst, $src" %} 1583 ins_cost(150); 1584 ins_encode %{ 1585 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1586 %} 1587 ins_pipe(pipe_slow); 1588 %} 1589 1590 instruct sqrtD_mem(regD dst, memory src) %{ 1591 predicate(UseSSE>=2); 1592 match(Set dst (SqrtD (LoadD src))); 1593 1594 format %{ "sqrtsd $dst, $src" %} 1595 ins_cost(150); 1596 ins_encode %{ 1597 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1598 %} 1599 ins_pipe(pipe_slow); 1600 %} 1601 1602 instruct sqrtD_imm(regD dst, immD con) %{ 1603 predicate(UseSSE>=2); 1604 match(Set dst (SqrtD con)); 1605 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1606 ins_cost(150); 1607 ins_encode %{ 1608 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1609 %} 1610 ins_pipe(pipe_slow); 1611 %} 1612 1613 1614 // ====================VECTOR INSTRUCTIONS===================================== 1615 1616 // Load vectors (4 bytes long) 1617 instruct loadV4(vecS dst, memory mem) %{ 1618 predicate(n->as_LoadVector()->memory_size() == 4); 1619 match(Set dst (LoadVector mem)); 1620 ins_cost(125); 1621 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1622 ins_encode %{ 1623 __ movdl($dst$$XMMRegister, $mem$$Address); 1624 %} 1625 ins_pipe( pipe_slow ); 1626 %} 1627 1628 // Load vectors (8 bytes long) 1629 instruct loadV8(vecD dst, memory mem) %{ 1630 predicate(n->as_LoadVector()->memory_size() == 8); 1631 match(Set dst (LoadVector mem)); 1632 ins_cost(125); 1633 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1634 ins_encode %{ 1635 __ movq($dst$$XMMRegister, $mem$$Address); 1636 %} 1637 ins_pipe( pipe_slow ); 1638 %} 1639 1640 // Load vectors (16 bytes long) 1641 instruct loadV16(vecX dst, memory mem) %{ 1642 predicate(n->as_LoadVector()->memory_size() == 16); 1643 match(Set dst (LoadVector mem)); 1644 ins_cost(125); 1645 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1646 ins_encode %{ 1647 __ movdqu($dst$$XMMRegister, $mem$$Address); 1648 %} 1649 ins_pipe( pipe_slow ); 1650 %} 1651 1652 // Load vectors (32 bytes long) 1653 instruct loadV32(vecY dst, memory mem) %{ 1654 predicate(n->as_LoadVector()->memory_size() == 32); 1655 match(Set dst (LoadVector mem)); 1656 ins_cost(125); 1657 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1658 ins_encode %{ 1659 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1660 %} 1661 ins_pipe( pipe_slow ); 1662 %} 1663 1664 // Store vectors 1665 instruct storeV4(memory mem, vecS src) %{ 1666 predicate(n->as_StoreVector()->memory_size() == 4); 1667 match(Set mem (StoreVector mem src)); 1668 ins_cost(145); 1669 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1670 ins_encode %{ 1671 __ movdl($mem$$Address, $src$$XMMRegister); 1672 %} 1673 ins_pipe( pipe_slow ); 1674 %} 1675 1676 instruct storeV8(memory mem, vecD src) %{ 1677 predicate(n->as_StoreVector()->memory_size() == 8); 1678 match(Set mem (StoreVector mem src)); 1679 ins_cost(145); 1680 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1681 ins_encode %{ 1682 __ movq($mem$$Address, $src$$XMMRegister); 1683 %} 1684 ins_pipe( pipe_slow ); 1685 %} 1686 1687 instruct storeV16(memory mem, vecX src) %{ 1688 predicate(n->as_StoreVector()->memory_size() == 16); 1689 match(Set mem (StoreVector mem src)); 1690 ins_cost(145); 1691 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1692 ins_encode %{ 1693 __ movdqu($mem$$Address, $src$$XMMRegister); 1694 %} 1695 ins_pipe( pipe_slow ); 1696 %} 1697 1698 instruct storeV32(memory mem, vecY src) %{ 1699 predicate(n->as_StoreVector()->memory_size() == 32); 1700 match(Set mem (StoreVector mem src)); 1701 ins_cost(145); 1702 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1703 ins_encode %{ 1704 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1705 %} 1706 ins_pipe( pipe_slow ); 1707 %} 1708 1709 // Replicate byte scalar to be vector 1710 instruct Repl4B(vecS dst, rRegI src) %{ 1711 predicate(n->as_Vector()->length() == 4); 1712 match(Set dst (ReplicateB src)); 1713 format %{ "movd $dst,$src\n\t" 1714 "punpcklbw $dst,$dst\n\t" 1715 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1716 ins_encode %{ 1717 __ movdl($dst$$XMMRegister, $src$$Register); 1718 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1719 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1720 %} 1721 ins_pipe( pipe_slow ); 1722 %} 1723 1724 instruct Repl8B(vecD dst, rRegI src) %{ 1725 predicate(n->as_Vector()->length() == 8); 1726 match(Set dst (ReplicateB src)); 1727 format %{ "movd $dst,$src\n\t" 1728 "punpcklbw $dst,$dst\n\t" 1729 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1730 ins_encode %{ 1731 __ movdl($dst$$XMMRegister, $src$$Register); 1732 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1733 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1734 %} 1735 ins_pipe( pipe_slow ); 1736 %} 1737 1738 instruct Repl16B(vecX dst, rRegI src) %{ 1739 predicate(n->as_Vector()->length() == 16); 1740 match(Set dst (ReplicateB src)); 1741 format %{ "movd $dst,$src\n\t" 1742 "punpcklbw $dst,$dst\n\t" 1743 "pshuflw $dst,$dst,0x00\n\t" 1744 "punpcklqdq $dst,$dst\t! replicate16B" %} 1745 ins_encode %{ 1746 __ movdl($dst$$XMMRegister, $src$$Register); 1747 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1748 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1749 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1750 %} 1751 ins_pipe( pipe_slow ); 1752 %} 1753 1754 instruct Repl32B(vecY dst, rRegI src) %{ 1755 predicate(n->as_Vector()->length() == 32); 1756 match(Set dst (ReplicateB src)); 1757 format %{ "movd $dst,$src\n\t" 1758 "punpcklbw $dst,$dst\n\t" 1759 "pshuflw $dst,$dst,0x00\n\t" 1760 "punpcklqdq $dst,$dst\n\t" 1761 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1762 ins_encode %{ 1763 __ movdl($dst$$XMMRegister, $src$$Register); 1764 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1765 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1766 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1767 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1768 %} 1769 ins_pipe( pipe_slow ); 1770 %} 1771 1772 // Replicate byte scalar immediate to be vector by loading from const table. 1773 instruct Repl4B_imm(vecS dst, immI con) %{ 1774 predicate(n->as_Vector()->length() == 4); 1775 match(Set dst (ReplicateB con)); 1776 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1777 ins_encode %{ 1778 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1779 %} 1780 ins_pipe( pipe_slow ); 1781 %} 1782 1783 instruct Repl8B_imm(vecD dst, immI con) %{ 1784 predicate(n->as_Vector()->length() == 8); 1785 match(Set dst (ReplicateB con)); 1786 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1787 ins_encode %{ 1788 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1789 %} 1790 ins_pipe( pipe_slow ); 1791 %} 1792 1793 instruct Repl16B_imm(vecX dst, immI con) %{ 1794 predicate(n->as_Vector()->length() == 16); 1795 match(Set dst (ReplicateB con)); 1796 format %{ "movq $dst,[$constantaddress]\n\t" 1797 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1798 ins_encode %{ 1799 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1800 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1801 %} 1802 ins_pipe( pipe_slow ); 1803 %} 1804 1805 instruct Repl32B_imm(vecY dst, immI con) %{ 1806 predicate(n->as_Vector()->length() == 32); 1807 match(Set dst (ReplicateB con)); 1808 format %{ "movq $dst,[$constantaddress]\n\t" 1809 "punpcklqdq $dst,$dst\n\t" 1810 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1811 ins_encode %{ 1812 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1813 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1814 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1815 %} 1816 ins_pipe( pipe_slow ); 1817 %} 1818 1819 // Replicate byte scalar zero to be vector 1820 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1821 predicate(n->as_Vector()->length() == 4); 1822 match(Set dst (ReplicateB zero)); 1823 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1824 ins_encode %{ 1825 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1826 %} 1827 ins_pipe( fpu_reg_reg ); 1828 %} 1829 1830 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1831 predicate(n->as_Vector()->length() == 8); 1832 match(Set dst (ReplicateB zero)); 1833 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1834 ins_encode %{ 1835 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1836 %} 1837 ins_pipe( fpu_reg_reg ); 1838 %} 1839 1840 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1841 predicate(n->as_Vector()->length() == 16); 1842 match(Set dst (ReplicateB zero)); 1843 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1844 ins_encode %{ 1845 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1846 %} 1847 ins_pipe( fpu_reg_reg ); 1848 %} 1849 1850 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1851 predicate(n->as_Vector()->length() == 32); 1852 match(Set dst (ReplicateB zero)); 1853 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1854 ins_encode %{ 1855 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1856 bool vector256 = true; 1857 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1858 %} 1859 ins_pipe( fpu_reg_reg ); 1860 %} 1861 1862 // Replicate char/short (2 byte) scalar to be vector 1863 instruct Repl2S(vecS dst, rRegI src) %{ 1864 predicate(n->as_Vector()->length() == 2); 1865 match(Set dst (ReplicateS src)); 1866 format %{ "movd $dst,$src\n\t" 1867 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1868 ins_encode %{ 1869 __ movdl($dst$$XMMRegister, $src$$Register); 1870 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1871 %} 1872 ins_pipe( fpu_reg_reg ); 1873 %} 1874 1875 instruct Repl4S(vecD dst, rRegI src) %{ 1876 predicate(n->as_Vector()->length() == 4); 1877 match(Set dst (ReplicateS src)); 1878 format %{ "movd $dst,$src\n\t" 1879 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1880 ins_encode %{ 1881 __ movdl($dst$$XMMRegister, $src$$Register); 1882 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1883 %} 1884 ins_pipe( fpu_reg_reg ); 1885 %} 1886 1887 instruct Repl8S(vecX dst, rRegI src) %{ 1888 predicate(n->as_Vector()->length() == 8); 1889 match(Set dst (ReplicateS src)); 1890 format %{ "movd $dst,$src\n\t" 1891 "pshuflw $dst,$dst,0x00\n\t" 1892 "punpcklqdq $dst,$dst\t! replicate8S" %} 1893 ins_encode %{ 1894 __ movdl($dst$$XMMRegister, $src$$Register); 1895 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1896 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1897 %} 1898 ins_pipe( pipe_slow ); 1899 %} 1900 1901 instruct Repl16S(vecY dst, rRegI src) %{ 1902 predicate(n->as_Vector()->length() == 16); 1903 match(Set dst (ReplicateS src)); 1904 format %{ "movd $dst,$src\n\t" 1905 "pshuflw $dst,$dst,0x00\n\t" 1906 "punpcklqdq $dst,$dst\n\t" 1907 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 1908 ins_encode %{ 1909 __ movdl($dst$$XMMRegister, $src$$Register); 1910 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1911 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1912 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1913 %} 1914 ins_pipe( pipe_slow ); 1915 %} 1916 1917 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 1918 instruct Repl2S_imm(vecS dst, immI con) %{ 1919 predicate(n->as_Vector()->length() == 2); 1920 match(Set dst (ReplicateS con)); 1921 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 1922 ins_encode %{ 1923 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 1924 %} 1925 ins_pipe( fpu_reg_reg ); 1926 %} 1927 1928 instruct Repl4S_imm(vecD dst, immI con) %{ 1929 predicate(n->as_Vector()->length() == 4); 1930 match(Set dst (ReplicateS con)); 1931 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 1932 ins_encode %{ 1933 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1934 %} 1935 ins_pipe( fpu_reg_reg ); 1936 %} 1937 1938 instruct Repl8S_imm(vecX dst, immI con) %{ 1939 predicate(n->as_Vector()->length() == 8); 1940 match(Set dst (ReplicateS con)); 1941 format %{ "movq $dst,[$constantaddress]\n\t" 1942 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 1943 ins_encode %{ 1944 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1945 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1946 %} 1947 ins_pipe( pipe_slow ); 1948 %} 1949 1950 instruct Repl16S_imm(vecY dst, immI con) %{ 1951 predicate(n->as_Vector()->length() == 16); 1952 match(Set dst (ReplicateS con)); 1953 format %{ "movq $dst,[$constantaddress]\n\t" 1954 "punpcklqdq $dst,$dst\n\t" 1955 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 1956 ins_encode %{ 1957 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1958 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1959 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1960 %} 1961 ins_pipe( pipe_slow ); 1962 %} 1963 1964 // Replicate char/short (2 byte) scalar zero to be vector 1965 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 1966 predicate(n->as_Vector()->length() == 2); 1967 match(Set dst (ReplicateS zero)); 1968 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 1969 ins_encode %{ 1970 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1971 %} 1972 ins_pipe( fpu_reg_reg ); 1973 %} 1974 1975 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 1976 predicate(n->as_Vector()->length() == 4); 1977 match(Set dst (ReplicateS zero)); 1978 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 1979 ins_encode %{ 1980 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1981 %} 1982 ins_pipe( fpu_reg_reg ); 1983 %} 1984 1985 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 1986 predicate(n->as_Vector()->length() == 8); 1987 match(Set dst (ReplicateS zero)); 1988 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 1989 ins_encode %{ 1990 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1991 %} 1992 ins_pipe( fpu_reg_reg ); 1993 %} 1994 1995 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 1996 predicate(n->as_Vector()->length() == 16); 1997 match(Set dst (ReplicateS zero)); 1998 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 1999 ins_encode %{ 2000 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2001 bool vector256 = true; 2002 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2003 %} 2004 ins_pipe( fpu_reg_reg ); 2005 %} 2006 2007 // Replicate integer (4 byte) scalar to be vector 2008 instruct Repl2I(vecD dst, rRegI src) %{ 2009 predicate(n->as_Vector()->length() == 2); 2010 match(Set dst (ReplicateI src)); 2011 format %{ "movd $dst,$src\n\t" 2012 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2013 ins_encode %{ 2014 __ movdl($dst$$XMMRegister, $src$$Register); 2015 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2016 %} 2017 ins_pipe( fpu_reg_reg ); 2018 %} 2019 2020 instruct Repl4I(vecX dst, rRegI src) %{ 2021 predicate(n->as_Vector()->length() == 4); 2022 match(Set dst (ReplicateI src)); 2023 format %{ "movd $dst,$src\n\t" 2024 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2025 ins_encode %{ 2026 __ movdl($dst$$XMMRegister, $src$$Register); 2027 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2028 %} 2029 ins_pipe( pipe_slow ); 2030 %} 2031 2032 instruct Repl8I(vecY dst, rRegI src) %{ 2033 predicate(n->as_Vector()->length() == 8); 2034 match(Set dst (ReplicateI src)); 2035 format %{ "movd $dst,$src\n\t" 2036 "pshufd $dst,$dst,0x00\n\t" 2037 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2038 ins_encode %{ 2039 __ movdl($dst$$XMMRegister, $src$$Register); 2040 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2041 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2042 %} 2043 ins_pipe( pipe_slow ); 2044 %} 2045 2046 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2047 instruct Repl2I_imm(vecD dst, immI con) %{ 2048 predicate(n->as_Vector()->length() == 2); 2049 match(Set dst (ReplicateI con)); 2050 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2051 ins_encode %{ 2052 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2053 %} 2054 ins_pipe( fpu_reg_reg ); 2055 %} 2056 2057 instruct Repl4I_imm(vecX dst, immI con) %{ 2058 predicate(n->as_Vector()->length() == 4); 2059 match(Set dst (ReplicateI con)); 2060 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2061 "punpcklqdq $dst,$dst" %} 2062 ins_encode %{ 2063 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2064 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2065 %} 2066 ins_pipe( pipe_slow ); 2067 %} 2068 2069 instruct Repl8I_imm(vecY dst, immI con) %{ 2070 predicate(n->as_Vector()->length() == 8); 2071 match(Set dst (ReplicateI con)); 2072 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2073 "punpcklqdq $dst,$dst\n\t" 2074 "vinserti128h $dst,$dst,$dst" %} 2075 ins_encode %{ 2076 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2077 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2078 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2079 %} 2080 ins_pipe( pipe_slow ); 2081 %} 2082 2083 // Integer could be loaded into xmm register directly from memory. 2084 instruct Repl2I_mem(vecD dst, memory mem) %{ 2085 predicate(n->as_Vector()->length() == 2); 2086 match(Set dst (ReplicateI (LoadI mem))); 2087 format %{ "movd $dst,$mem\n\t" 2088 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2089 ins_encode %{ 2090 __ movdl($dst$$XMMRegister, $mem$$Address); 2091 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2092 %} 2093 ins_pipe( fpu_reg_reg ); 2094 %} 2095 2096 instruct Repl4I_mem(vecX dst, memory mem) %{ 2097 predicate(n->as_Vector()->length() == 4); 2098 match(Set dst (ReplicateI (LoadI mem))); 2099 format %{ "movd $dst,$mem\n\t" 2100 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2101 ins_encode %{ 2102 __ movdl($dst$$XMMRegister, $mem$$Address); 2103 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2104 %} 2105 ins_pipe( pipe_slow ); 2106 %} 2107 2108 instruct Repl8I_mem(vecY dst, memory mem) %{ 2109 predicate(n->as_Vector()->length() == 8); 2110 match(Set dst (ReplicateI (LoadI mem))); 2111 format %{ "movd $dst,$mem\n\t" 2112 "pshufd $dst,$dst,0x00\n\t" 2113 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2114 ins_encode %{ 2115 __ movdl($dst$$XMMRegister, $mem$$Address); 2116 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2117 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2118 %} 2119 ins_pipe( pipe_slow ); 2120 %} 2121 2122 // Replicate integer (4 byte) scalar zero to be vector 2123 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2124 predicate(n->as_Vector()->length() == 2); 2125 match(Set dst (ReplicateI zero)); 2126 format %{ "pxor $dst,$dst\t! replicate2I" %} 2127 ins_encode %{ 2128 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2129 %} 2130 ins_pipe( fpu_reg_reg ); 2131 %} 2132 2133 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2134 predicate(n->as_Vector()->length() == 4); 2135 match(Set dst (ReplicateI zero)); 2136 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2137 ins_encode %{ 2138 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2139 %} 2140 ins_pipe( fpu_reg_reg ); 2141 %} 2142 2143 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2144 predicate(n->as_Vector()->length() == 8); 2145 match(Set dst (ReplicateI zero)); 2146 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2147 ins_encode %{ 2148 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2149 bool vector256 = true; 2150 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2151 %} 2152 ins_pipe( fpu_reg_reg ); 2153 %} 2154 2155 // Replicate long (8 byte) scalar to be vector 2156 #ifdef _LP64 2157 instruct Repl2L(vecX dst, rRegL src) %{ 2158 predicate(n->as_Vector()->length() == 2); 2159 match(Set dst (ReplicateL src)); 2160 format %{ "movdq $dst,$src\n\t" 2161 "punpcklqdq $dst,$dst\t! replicate2L" %} 2162 ins_encode %{ 2163 __ movdq($dst$$XMMRegister, $src$$Register); 2164 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2165 %} 2166 ins_pipe( pipe_slow ); 2167 %} 2168 2169 instruct Repl4L(vecY dst, rRegL src) %{ 2170 predicate(n->as_Vector()->length() == 4); 2171 match(Set dst (ReplicateL src)); 2172 format %{ "movdq $dst,$src\n\t" 2173 "punpcklqdq $dst,$dst\n\t" 2174 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2175 ins_encode %{ 2176 __ movdq($dst$$XMMRegister, $src$$Register); 2177 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2178 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2179 %} 2180 ins_pipe( pipe_slow ); 2181 %} 2182 #else // _LP64 2183 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2184 predicate(n->as_Vector()->length() == 2); 2185 match(Set dst (ReplicateL src)); 2186 effect(TEMP dst, USE src, TEMP tmp); 2187 format %{ "movdl $dst,$src.lo\n\t" 2188 "movdl $tmp,$src.hi\n\t" 2189 "punpckldq $dst,$tmp\n\t" 2190 "punpcklqdq $dst,$dst\t! replicate2L"%} 2191 ins_encode %{ 2192 __ movdl($dst$$XMMRegister, $src$$Register); 2193 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2194 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2195 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2196 %} 2197 ins_pipe( pipe_slow ); 2198 %} 2199 2200 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2201 predicate(n->as_Vector()->length() == 4); 2202 match(Set dst (ReplicateL src)); 2203 effect(TEMP dst, USE src, TEMP tmp); 2204 format %{ "movdl $dst,$src.lo\n\t" 2205 "movdl $tmp,$src.hi\n\t" 2206 "punpckldq $dst,$tmp\n\t" 2207 "punpcklqdq $dst,$dst\n\t" 2208 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2209 ins_encode %{ 2210 __ movdl($dst$$XMMRegister, $src$$Register); 2211 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2212 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2213 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2214 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2215 %} 2216 ins_pipe( pipe_slow ); 2217 %} 2218 #endif // _LP64 2219 2220 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2221 instruct Repl2L_imm(vecX dst, immL con) %{ 2222 predicate(n->as_Vector()->length() == 2); 2223 match(Set dst (ReplicateL con)); 2224 format %{ "movq $dst,[$constantaddress]\n\t" 2225 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2226 ins_encode %{ 2227 __ movq($dst$$XMMRegister, $constantaddress($con)); 2228 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2229 %} 2230 ins_pipe( pipe_slow ); 2231 %} 2232 2233 instruct Repl4L_imm(vecY dst, immL con) %{ 2234 predicate(n->as_Vector()->length() == 4); 2235 match(Set dst (ReplicateL con)); 2236 format %{ "movq $dst,[$constantaddress]\n\t" 2237 "punpcklqdq $dst,$dst\n\t" 2238 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2239 ins_encode %{ 2240 __ movq($dst$$XMMRegister, $constantaddress($con)); 2241 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2242 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2243 %} 2244 ins_pipe( pipe_slow ); 2245 %} 2246 2247 // Long could be loaded into xmm register directly from memory. 2248 instruct Repl2L_mem(vecX dst, memory mem) %{ 2249 predicate(n->as_Vector()->length() == 2); 2250 match(Set dst (ReplicateL (LoadL mem))); 2251 format %{ "movq $dst,$mem\n\t" 2252 "punpcklqdq $dst,$dst\t! replicate2L" %} 2253 ins_encode %{ 2254 __ movq($dst$$XMMRegister, $mem$$Address); 2255 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2256 %} 2257 ins_pipe( pipe_slow ); 2258 %} 2259 2260 instruct Repl4L_mem(vecY dst, memory mem) %{ 2261 predicate(n->as_Vector()->length() == 4); 2262 match(Set dst (ReplicateL (LoadL mem))); 2263 format %{ "movq $dst,$mem\n\t" 2264 "punpcklqdq $dst,$dst\n\t" 2265 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2266 ins_encode %{ 2267 __ movq($dst$$XMMRegister, $mem$$Address); 2268 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2269 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2270 %} 2271 ins_pipe( pipe_slow ); 2272 %} 2273 2274 // Replicate long (8 byte) scalar zero to be vector 2275 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2276 predicate(n->as_Vector()->length() == 2); 2277 match(Set dst (ReplicateL zero)); 2278 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2279 ins_encode %{ 2280 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2281 %} 2282 ins_pipe( fpu_reg_reg ); 2283 %} 2284 2285 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2286 predicate(n->as_Vector()->length() == 4); 2287 match(Set dst (ReplicateL zero)); 2288 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2289 ins_encode %{ 2290 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2291 bool vector256 = true; 2292 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2293 %} 2294 ins_pipe( fpu_reg_reg ); 2295 %} 2296 2297 // Replicate float (4 byte) scalar to be vector 2298 instruct Repl2F(vecD dst, regF src) %{ 2299 predicate(n->as_Vector()->length() == 2); 2300 match(Set dst (ReplicateF src)); 2301 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2302 ins_encode %{ 2303 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2304 %} 2305 ins_pipe( fpu_reg_reg ); 2306 %} 2307 2308 instruct Repl4F(vecX dst, regF src) %{ 2309 predicate(n->as_Vector()->length() == 4); 2310 match(Set dst (ReplicateF src)); 2311 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2312 ins_encode %{ 2313 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2314 %} 2315 ins_pipe( pipe_slow ); 2316 %} 2317 2318 instruct Repl8F(vecY dst, regF src) %{ 2319 predicate(n->as_Vector()->length() == 8); 2320 match(Set dst (ReplicateF src)); 2321 format %{ "pshufd $dst,$src,0x00\n\t" 2322 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2323 ins_encode %{ 2324 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2325 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2326 %} 2327 ins_pipe( pipe_slow ); 2328 %} 2329 2330 // Replicate float (4 byte) scalar zero to be vector 2331 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2332 predicate(n->as_Vector()->length() == 2); 2333 match(Set dst (ReplicateF zero)); 2334 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2335 ins_encode %{ 2336 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2337 %} 2338 ins_pipe( fpu_reg_reg ); 2339 %} 2340 2341 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2342 predicate(n->as_Vector()->length() == 4); 2343 match(Set dst (ReplicateF zero)); 2344 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2345 ins_encode %{ 2346 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2347 %} 2348 ins_pipe( fpu_reg_reg ); 2349 %} 2350 2351 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2352 predicate(n->as_Vector()->length() == 8); 2353 match(Set dst (ReplicateF zero)); 2354 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2355 ins_encode %{ 2356 bool vector256 = true; 2357 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2358 %} 2359 ins_pipe( fpu_reg_reg ); 2360 %} 2361 2362 // Replicate double (8 bytes) scalar to be vector 2363 instruct Repl2D(vecX dst, regD src) %{ 2364 predicate(n->as_Vector()->length() == 2); 2365 match(Set dst (ReplicateD src)); 2366 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2367 ins_encode %{ 2368 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2369 %} 2370 ins_pipe( pipe_slow ); 2371 %} 2372 2373 instruct Repl4D(vecY dst, regD src) %{ 2374 predicate(n->as_Vector()->length() == 4); 2375 match(Set dst (ReplicateD src)); 2376 format %{ "pshufd $dst,$src,0x44\n\t" 2377 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2378 ins_encode %{ 2379 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2380 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2381 %} 2382 ins_pipe( pipe_slow ); 2383 %} 2384 2385 // Replicate double (8 byte) scalar zero to be vector 2386 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2387 predicate(n->as_Vector()->length() == 2); 2388 match(Set dst (ReplicateD zero)); 2389 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2390 ins_encode %{ 2391 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2392 %} 2393 ins_pipe( fpu_reg_reg ); 2394 %} 2395 2396 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2397 predicate(n->as_Vector()->length() == 4); 2398 match(Set dst (ReplicateD zero)); 2399 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2400 ins_encode %{ 2401 bool vector256 = true; 2402 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2403 %} 2404 ins_pipe( fpu_reg_reg ); 2405 %} 2406 2407 // ====================VECTOR ARITHMETIC======================================= 2408 2409 // --------------------------------- ADD -------------------------------------- 2410 2411 // Bytes vector add 2412 instruct vadd4B(vecS dst, vecS src) %{ 2413 predicate(n->as_Vector()->length() == 4); 2414 match(Set dst (AddVB dst src)); 2415 format %{ "paddb $dst,$src\t! add packed4B" %} 2416 ins_encode %{ 2417 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2418 %} 2419 ins_pipe( pipe_slow ); 2420 %} 2421 2422 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 2423 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2424 match(Set dst (AddVB src1 src2)); 2425 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 2426 ins_encode %{ 2427 bool vector256 = false; 2428 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2429 %} 2430 ins_pipe( pipe_slow ); 2431 %} 2432 2433 instruct vadd8B(vecD dst, vecD src) %{ 2434 predicate(n->as_Vector()->length() == 8); 2435 match(Set dst (AddVB dst src)); 2436 format %{ "paddb $dst,$src\t! add packed8B" %} 2437 ins_encode %{ 2438 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2439 %} 2440 ins_pipe( pipe_slow ); 2441 %} 2442 2443 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 2444 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2445 match(Set dst (AddVB src1 src2)); 2446 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 2447 ins_encode %{ 2448 bool vector256 = false; 2449 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2450 %} 2451 ins_pipe( pipe_slow ); 2452 %} 2453 2454 instruct vadd16B(vecX dst, vecX src) %{ 2455 predicate(n->as_Vector()->length() == 16); 2456 match(Set dst (AddVB dst src)); 2457 format %{ "paddb $dst,$src\t! add packed16B" %} 2458 ins_encode %{ 2459 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2460 %} 2461 ins_pipe( pipe_slow ); 2462 %} 2463 2464 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 2465 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2466 match(Set dst (AddVB src1 src2)); 2467 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 2468 ins_encode %{ 2469 bool vector256 = false; 2470 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2471 %} 2472 ins_pipe( pipe_slow ); 2473 %} 2474 2475 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 2476 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2477 match(Set dst (AddVB src (LoadVector mem))); 2478 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 2479 ins_encode %{ 2480 bool vector256 = false; 2481 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2482 %} 2483 ins_pipe( pipe_slow ); 2484 %} 2485 2486 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 2487 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2488 match(Set dst (AddVB src1 src2)); 2489 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 2490 ins_encode %{ 2491 bool vector256 = true; 2492 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2493 %} 2494 ins_pipe( pipe_slow ); 2495 %} 2496 2497 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 2498 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2499 match(Set dst (AddVB src (LoadVector mem))); 2500 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 2501 ins_encode %{ 2502 bool vector256 = true; 2503 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2504 %} 2505 ins_pipe( pipe_slow ); 2506 %} 2507 2508 // Shorts/Chars vector add 2509 instruct vadd2S(vecS dst, vecS src) %{ 2510 predicate(n->as_Vector()->length() == 2); 2511 match(Set dst (AddVS dst src)); 2512 format %{ "paddw $dst,$src\t! add packed2S" %} 2513 ins_encode %{ 2514 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2515 %} 2516 ins_pipe( pipe_slow ); 2517 %} 2518 2519 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 2520 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2521 match(Set dst (AddVS src1 src2)); 2522 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 2523 ins_encode %{ 2524 bool vector256 = false; 2525 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2526 %} 2527 ins_pipe( pipe_slow ); 2528 %} 2529 2530 instruct vadd4S(vecD dst, vecD src) %{ 2531 predicate(n->as_Vector()->length() == 4); 2532 match(Set dst (AddVS dst src)); 2533 format %{ "paddw $dst,$src\t! add packed4S" %} 2534 ins_encode %{ 2535 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2536 %} 2537 ins_pipe( pipe_slow ); 2538 %} 2539 2540 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 2541 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2542 match(Set dst (AddVS src1 src2)); 2543 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 2544 ins_encode %{ 2545 bool vector256 = false; 2546 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2547 %} 2548 ins_pipe( pipe_slow ); 2549 %} 2550 2551 instruct vadd8S(vecX dst, vecX src) %{ 2552 predicate(n->as_Vector()->length() == 8); 2553 match(Set dst (AddVS dst src)); 2554 format %{ "paddw $dst,$src\t! add packed8S" %} 2555 ins_encode %{ 2556 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2557 %} 2558 ins_pipe( pipe_slow ); 2559 %} 2560 2561 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 2562 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2563 match(Set dst (AddVS src1 src2)); 2564 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 2565 ins_encode %{ 2566 bool vector256 = false; 2567 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2568 %} 2569 ins_pipe( pipe_slow ); 2570 %} 2571 2572 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 2573 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2574 match(Set dst (AddVS src (LoadVector mem))); 2575 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 2576 ins_encode %{ 2577 bool vector256 = false; 2578 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2579 %} 2580 ins_pipe( pipe_slow ); 2581 %} 2582 2583 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 2584 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2585 match(Set dst (AddVS src1 src2)); 2586 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 2587 ins_encode %{ 2588 bool vector256 = true; 2589 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2590 %} 2591 ins_pipe( pipe_slow ); 2592 %} 2593 2594 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 2595 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2596 match(Set dst (AddVS src (LoadVector mem))); 2597 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 2598 ins_encode %{ 2599 bool vector256 = true; 2600 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2601 %} 2602 ins_pipe( pipe_slow ); 2603 %} 2604 2605 // Integers vector add 2606 instruct vadd2I(vecD dst, vecD src) %{ 2607 predicate(n->as_Vector()->length() == 2); 2608 match(Set dst (AddVI dst src)); 2609 format %{ "paddd $dst,$src\t! add packed2I" %} 2610 ins_encode %{ 2611 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2612 %} 2613 ins_pipe( pipe_slow ); 2614 %} 2615 2616 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 2617 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2618 match(Set dst (AddVI src1 src2)); 2619 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 2620 ins_encode %{ 2621 bool vector256 = false; 2622 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2623 %} 2624 ins_pipe( pipe_slow ); 2625 %} 2626 2627 instruct vadd4I(vecX dst, vecX src) %{ 2628 predicate(n->as_Vector()->length() == 4); 2629 match(Set dst (AddVI dst src)); 2630 format %{ "paddd $dst,$src\t! add packed4I" %} 2631 ins_encode %{ 2632 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2633 %} 2634 ins_pipe( pipe_slow ); 2635 %} 2636 2637 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 2638 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2639 match(Set dst (AddVI src1 src2)); 2640 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 2641 ins_encode %{ 2642 bool vector256 = false; 2643 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2644 %} 2645 ins_pipe( pipe_slow ); 2646 %} 2647 2648 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 2649 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2650 match(Set dst (AddVI src (LoadVector mem))); 2651 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 2652 ins_encode %{ 2653 bool vector256 = false; 2654 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2655 %} 2656 ins_pipe( pipe_slow ); 2657 %} 2658 2659 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 2660 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2661 match(Set dst (AddVI src1 src2)); 2662 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 2663 ins_encode %{ 2664 bool vector256 = true; 2665 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2666 %} 2667 ins_pipe( pipe_slow ); 2668 %} 2669 2670 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 2671 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2672 match(Set dst (AddVI src (LoadVector mem))); 2673 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 2674 ins_encode %{ 2675 bool vector256 = true; 2676 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2677 %} 2678 ins_pipe( pipe_slow ); 2679 %} 2680 2681 // Longs vector add 2682 instruct vadd2L(vecX dst, vecX src) %{ 2683 predicate(n->as_Vector()->length() == 2); 2684 match(Set dst (AddVL dst src)); 2685 format %{ "paddq $dst,$src\t! add packed2L" %} 2686 ins_encode %{ 2687 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 2688 %} 2689 ins_pipe( pipe_slow ); 2690 %} 2691 2692 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 2693 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2694 match(Set dst (AddVL src1 src2)); 2695 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 2696 ins_encode %{ 2697 bool vector256 = false; 2698 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2699 %} 2700 ins_pipe( pipe_slow ); 2701 %} 2702 2703 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 2704 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2705 match(Set dst (AddVL src (LoadVector mem))); 2706 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 2707 ins_encode %{ 2708 bool vector256 = false; 2709 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2710 %} 2711 ins_pipe( pipe_slow ); 2712 %} 2713 2714 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 2715 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2716 match(Set dst (AddVL src1 src2)); 2717 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 2718 ins_encode %{ 2719 bool vector256 = true; 2720 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2721 %} 2722 ins_pipe( pipe_slow ); 2723 %} 2724 2725 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 2726 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2727 match(Set dst (AddVL src (LoadVector mem))); 2728 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 2729 ins_encode %{ 2730 bool vector256 = true; 2731 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2732 %} 2733 ins_pipe( pipe_slow ); 2734 %} 2735 2736 // Floats vector add 2737 instruct vadd2F(vecD dst, vecD src) %{ 2738 predicate(n->as_Vector()->length() == 2); 2739 match(Set dst (AddVF dst src)); 2740 format %{ "addps $dst,$src\t! add packed2F" %} 2741 ins_encode %{ 2742 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2743 %} 2744 ins_pipe( pipe_slow ); 2745 %} 2746 2747 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 2748 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2749 match(Set dst (AddVF src1 src2)); 2750 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 2751 ins_encode %{ 2752 bool vector256 = false; 2753 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2754 %} 2755 ins_pipe( pipe_slow ); 2756 %} 2757 2758 instruct vadd4F(vecX dst, vecX src) %{ 2759 predicate(n->as_Vector()->length() == 4); 2760 match(Set dst (AddVF dst src)); 2761 format %{ "addps $dst,$src\t! add packed4F" %} 2762 ins_encode %{ 2763 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2764 %} 2765 ins_pipe( pipe_slow ); 2766 %} 2767 2768 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 2769 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2770 match(Set dst (AddVF src1 src2)); 2771 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 2772 ins_encode %{ 2773 bool vector256 = false; 2774 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2775 %} 2776 ins_pipe( pipe_slow ); 2777 %} 2778 2779 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 2780 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2781 match(Set dst (AddVF src (LoadVector mem))); 2782 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 2783 ins_encode %{ 2784 bool vector256 = false; 2785 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2786 %} 2787 ins_pipe( pipe_slow ); 2788 %} 2789 2790 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 2791 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2792 match(Set dst (AddVF src1 src2)); 2793 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 2794 ins_encode %{ 2795 bool vector256 = true; 2796 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2797 %} 2798 ins_pipe( pipe_slow ); 2799 %} 2800 2801 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 2802 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2803 match(Set dst (AddVF src (LoadVector mem))); 2804 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 2805 ins_encode %{ 2806 bool vector256 = true; 2807 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2808 %} 2809 ins_pipe( pipe_slow ); 2810 %} 2811 2812 // Doubles vector add 2813 instruct vadd2D(vecX dst, vecX src) %{ 2814 predicate(n->as_Vector()->length() == 2); 2815 match(Set dst (AddVD dst src)); 2816 format %{ "addpd $dst,$src\t! add packed2D" %} 2817 ins_encode %{ 2818 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 2819 %} 2820 ins_pipe( pipe_slow ); 2821 %} 2822 2823 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 2824 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2825 match(Set dst (AddVD src1 src2)); 2826 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 2827 ins_encode %{ 2828 bool vector256 = false; 2829 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2830 %} 2831 ins_pipe( pipe_slow ); 2832 %} 2833 2834 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 2835 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2836 match(Set dst (AddVD src (LoadVector mem))); 2837 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 2838 ins_encode %{ 2839 bool vector256 = false; 2840 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2841 %} 2842 ins_pipe( pipe_slow ); 2843 %} 2844 2845 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 2846 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2847 match(Set dst (AddVD src1 src2)); 2848 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 2849 ins_encode %{ 2850 bool vector256 = true; 2851 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2852 %} 2853 ins_pipe( pipe_slow ); 2854 %} 2855 2856 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 2857 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2858 match(Set dst (AddVD src (LoadVector mem))); 2859 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 2860 ins_encode %{ 2861 bool vector256 = true; 2862 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2863 %} 2864 ins_pipe( pipe_slow ); 2865 %} 2866 2867 // --------------------------------- SUB -------------------------------------- 2868 2869 // Bytes vector sub 2870 instruct vsub4B(vecS dst, vecS src) %{ 2871 predicate(n->as_Vector()->length() == 4); 2872 match(Set dst (SubVB dst src)); 2873 format %{ "psubb $dst,$src\t! sub packed4B" %} 2874 ins_encode %{ 2875 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2876 %} 2877 ins_pipe( pipe_slow ); 2878 %} 2879 2880 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 2881 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2882 match(Set dst (SubVB src1 src2)); 2883 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 2884 ins_encode %{ 2885 bool vector256 = false; 2886 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2887 %} 2888 ins_pipe( pipe_slow ); 2889 %} 2890 2891 instruct vsub8B(vecD dst, vecD src) %{ 2892 predicate(n->as_Vector()->length() == 8); 2893 match(Set dst (SubVB dst src)); 2894 format %{ "psubb $dst,$src\t! sub packed8B" %} 2895 ins_encode %{ 2896 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2897 %} 2898 ins_pipe( pipe_slow ); 2899 %} 2900 2901 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 2902 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2903 match(Set dst (SubVB src1 src2)); 2904 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 2905 ins_encode %{ 2906 bool vector256 = false; 2907 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 instruct vsub16B(vecX dst, vecX src) %{ 2913 predicate(n->as_Vector()->length() == 16); 2914 match(Set dst (SubVB dst src)); 2915 format %{ "psubb $dst,$src\t! sub packed16B" %} 2916 ins_encode %{ 2917 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2918 %} 2919 ins_pipe( pipe_slow ); 2920 %} 2921 2922 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 2923 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2924 match(Set dst (SubVB src1 src2)); 2925 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 2926 ins_encode %{ 2927 bool vector256 = false; 2928 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2929 %} 2930 ins_pipe( pipe_slow ); 2931 %} 2932 2933 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 2934 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2935 match(Set dst (SubVB src (LoadVector mem))); 2936 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 2937 ins_encode %{ 2938 bool vector256 = false; 2939 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2940 %} 2941 ins_pipe( pipe_slow ); 2942 %} 2943 2944 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 2945 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2946 match(Set dst (SubVB src1 src2)); 2947 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 2948 ins_encode %{ 2949 bool vector256 = true; 2950 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2951 %} 2952 ins_pipe( pipe_slow ); 2953 %} 2954 2955 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 2956 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2957 match(Set dst (SubVB src (LoadVector mem))); 2958 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 2959 ins_encode %{ 2960 bool vector256 = true; 2961 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2962 %} 2963 ins_pipe( pipe_slow ); 2964 %} 2965 2966 // Shorts/Chars vector sub 2967 instruct vsub2S(vecS dst, vecS src) %{ 2968 predicate(n->as_Vector()->length() == 2); 2969 match(Set dst (SubVS dst src)); 2970 format %{ "psubw $dst,$src\t! sub packed2S" %} 2971 ins_encode %{ 2972 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 2973 %} 2974 ins_pipe( pipe_slow ); 2975 %} 2976 2977 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 2978 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2979 match(Set dst (SubVS src1 src2)); 2980 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 2981 ins_encode %{ 2982 bool vector256 = false; 2983 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2984 %} 2985 ins_pipe( pipe_slow ); 2986 %} 2987 2988 instruct vsub4S(vecD dst, vecD src) %{ 2989 predicate(n->as_Vector()->length() == 4); 2990 match(Set dst (SubVS dst src)); 2991 format %{ "psubw $dst,$src\t! sub packed4S" %} 2992 ins_encode %{ 2993 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 2994 %} 2995 ins_pipe( pipe_slow ); 2996 %} 2997 2998 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 2999 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3000 match(Set dst (SubVS src1 src2)); 3001 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 3002 ins_encode %{ 3003 bool vector256 = false; 3004 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3005 %} 3006 ins_pipe( pipe_slow ); 3007 %} 3008 3009 instruct vsub8S(vecX dst, vecX src) %{ 3010 predicate(n->as_Vector()->length() == 8); 3011 match(Set dst (SubVS dst src)); 3012 format %{ "psubw $dst,$src\t! sub packed8S" %} 3013 ins_encode %{ 3014 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3015 %} 3016 ins_pipe( pipe_slow ); 3017 %} 3018 3019 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3020 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3021 match(Set dst (SubVS src1 src2)); 3022 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3023 ins_encode %{ 3024 bool vector256 = false; 3025 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3026 %} 3027 ins_pipe( pipe_slow ); 3028 %} 3029 3030 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3031 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3032 match(Set dst (SubVS src (LoadVector mem))); 3033 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3034 ins_encode %{ 3035 bool vector256 = false; 3036 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3037 %} 3038 ins_pipe( pipe_slow ); 3039 %} 3040 3041 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3042 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3043 match(Set dst (SubVS src1 src2)); 3044 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3045 ins_encode %{ 3046 bool vector256 = true; 3047 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3048 %} 3049 ins_pipe( pipe_slow ); 3050 %} 3051 3052 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3053 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3054 match(Set dst (SubVS src (LoadVector mem))); 3055 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3056 ins_encode %{ 3057 bool vector256 = true; 3058 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3059 %} 3060 ins_pipe( pipe_slow ); 3061 %} 3062 3063 // Integers vector sub 3064 instruct vsub2I(vecD dst, vecD src) %{ 3065 predicate(n->as_Vector()->length() == 2); 3066 match(Set dst (SubVI dst src)); 3067 format %{ "psubd $dst,$src\t! sub packed2I" %} 3068 ins_encode %{ 3069 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3070 %} 3071 ins_pipe( pipe_slow ); 3072 %} 3073 3074 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3075 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3076 match(Set dst (SubVI src1 src2)); 3077 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3078 ins_encode %{ 3079 bool vector256 = false; 3080 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3081 %} 3082 ins_pipe( pipe_slow ); 3083 %} 3084 3085 instruct vsub4I(vecX dst, vecX src) %{ 3086 predicate(n->as_Vector()->length() == 4); 3087 match(Set dst (SubVI dst src)); 3088 format %{ "psubd $dst,$src\t! sub packed4I" %} 3089 ins_encode %{ 3090 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3091 %} 3092 ins_pipe( pipe_slow ); 3093 %} 3094 3095 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3096 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3097 match(Set dst (SubVI src1 src2)); 3098 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3099 ins_encode %{ 3100 bool vector256 = false; 3101 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3102 %} 3103 ins_pipe( pipe_slow ); 3104 %} 3105 3106 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3107 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3108 match(Set dst (SubVI src (LoadVector mem))); 3109 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3110 ins_encode %{ 3111 bool vector256 = false; 3112 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3113 %} 3114 ins_pipe( pipe_slow ); 3115 %} 3116 3117 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3118 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3119 match(Set dst (SubVI src1 src2)); 3120 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3121 ins_encode %{ 3122 bool vector256 = true; 3123 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3124 %} 3125 ins_pipe( pipe_slow ); 3126 %} 3127 3128 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3129 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3130 match(Set dst (SubVI src (LoadVector mem))); 3131 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3132 ins_encode %{ 3133 bool vector256 = true; 3134 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3135 %} 3136 ins_pipe( pipe_slow ); 3137 %} 3138 3139 // Longs vector sub 3140 instruct vsub2L(vecX dst, vecX src) %{ 3141 predicate(n->as_Vector()->length() == 2); 3142 match(Set dst (SubVL dst src)); 3143 format %{ "psubq $dst,$src\t! sub packed2L" %} 3144 ins_encode %{ 3145 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3146 %} 3147 ins_pipe( pipe_slow ); 3148 %} 3149 3150 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3151 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3152 match(Set dst (SubVL src1 src2)); 3153 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3154 ins_encode %{ 3155 bool vector256 = false; 3156 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3157 %} 3158 ins_pipe( pipe_slow ); 3159 %} 3160 3161 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3162 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3163 match(Set dst (SubVL src (LoadVector mem))); 3164 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3165 ins_encode %{ 3166 bool vector256 = false; 3167 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3168 %} 3169 ins_pipe( pipe_slow ); 3170 %} 3171 3172 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3173 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3174 match(Set dst (SubVL src1 src2)); 3175 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3176 ins_encode %{ 3177 bool vector256 = true; 3178 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3179 %} 3180 ins_pipe( pipe_slow ); 3181 %} 3182 3183 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3184 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3185 match(Set dst (SubVL src (LoadVector mem))); 3186 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3187 ins_encode %{ 3188 bool vector256 = true; 3189 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3190 %} 3191 ins_pipe( pipe_slow ); 3192 %} 3193 3194 // Floats vector sub 3195 instruct vsub2F(vecD dst, vecD src) %{ 3196 predicate(n->as_Vector()->length() == 2); 3197 match(Set dst (SubVF dst src)); 3198 format %{ "subps $dst,$src\t! sub packed2F" %} 3199 ins_encode %{ 3200 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3201 %} 3202 ins_pipe( pipe_slow ); 3203 %} 3204 3205 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3206 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3207 match(Set dst (SubVF src1 src2)); 3208 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3209 ins_encode %{ 3210 bool vector256 = false; 3211 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3212 %} 3213 ins_pipe( pipe_slow ); 3214 %} 3215 3216 instruct vsub4F(vecX dst, vecX src) %{ 3217 predicate(n->as_Vector()->length() == 4); 3218 match(Set dst (SubVF dst src)); 3219 format %{ "subps $dst,$src\t! sub packed4F" %} 3220 ins_encode %{ 3221 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3222 %} 3223 ins_pipe( pipe_slow ); 3224 %} 3225 3226 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3227 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3228 match(Set dst (SubVF src1 src2)); 3229 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3230 ins_encode %{ 3231 bool vector256 = false; 3232 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3233 %} 3234 ins_pipe( pipe_slow ); 3235 %} 3236 3237 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3238 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3239 match(Set dst (SubVF src (LoadVector mem))); 3240 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3241 ins_encode %{ 3242 bool vector256 = false; 3243 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3244 %} 3245 ins_pipe( pipe_slow ); 3246 %} 3247 3248 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3249 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3250 match(Set dst (SubVF src1 src2)); 3251 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3252 ins_encode %{ 3253 bool vector256 = true; 3254 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3255 %} 3256 ins_pipe( pipe_slow ); 3257 %} 3258 3259 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3260 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3261 match(Set dst (SubVF src (LoadVector mem))); 3262 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3263 ins_encode %{ 3264 bool vector256 = true; 3265 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3266 %} 3267 ins_pipe( pipe_slow ); 3268 %} 3269 3270 // Doubles vector sub 3271 instruct vsub2D(vecX dst, vecX src) %{ 3272 predicate(n->as_Vector()->length() == 2); 3273 match(Set dst (SubVD dst src)); 3274 format %{ "subpd $dst,$src\t! sub packed2D" %} 3275 ins_encode %{ 3276 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3277 %} 3278 ins_pipe( pipe_slow ); 3279 %} 3280 3281 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3282 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3283 match(Set dst (SubVD src1 src2)); 3284 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3285 ins_encode %{ 3286 bool vector256 = false; 3287 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3288 %} 3289 ins_pipe( pipe_slow ); 3290 %} 3291 3292 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3293 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3294 match(Set dst (SubVD src (LoadVector mem))); 3295 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3296 ins_encode %{ 3297 bool vector256 = false; 3298 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3299 %} 3300 ins_pipe( pipe_slow ); 3301 %} 3302 3303 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 3304 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3305 match(Set dst (SubVD src1 src2)); 3306 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 3307 ins_encode %{ 3308 bool vector256 = true; 3309 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3310 %} 3311 ins_pipe( pipe_slow ); 3312 %} 3313 3314 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 3315 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3316 match(Set dst (SubVD src (LoadVector mem))); 3317 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 3318 ins_encode %{ 3319 bool vector256 = true; 3320 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3321 %} 3322 ins_pipe( pipe_slow ); 3323 %} 3324 3325 // --------------------------------- MUL -------------------------------------- 3326 3327 // Shorts/Chars vector mul 3328 instruct vmul2S(vecS dst, vecS src) %{ 3329 predicate(n->as_Vector()->length() == 2); 3330 match(Set dst (MulVS dst src)); 3331 format %{ "pmullw $dst,$src\t! mul packed2S" %} 3332 ins_encode %{ 3333 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3334 %} 3335 ins_pipe( pipe_slow ); 3336 %} 3337 3338 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 3339 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3340 match(Set dst (MulVS src1 src2)); 3341 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 3342 ins_encode %{ 3343 bool vector256 = false; 3344 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3345 %} 3346 ins_pipe( pipe_slow ); 3347 %} 3348 3349 instruct vmul4S(vecD dst, vecD src) %{ 3350 predicate(n->as_Vector()->length() == 4); 3351 match(Set dst (MulVS dst src)); 3352 format %{ "pmullw $dst,$src\t! mul packed4S" %} 3353 ins_encode %{ 3354 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3355 %} 3356 ins_pipe( pipe_slow ); 3357 %} 3358 3359 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 3360 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3361 match(Set dst (MulVS src1 src2)); 3362 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 3363 ins_encode %{ 3364 bool vector256 = false; 3365 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3366 %} 3367 ins_pipe( pipe_slow ); 3368 %} 3369 3370 instruct vmul8S(vecX dst, vecX src) %{ 3371 predicate(n->as_Vector()->length() == 8); 3372 match(Set dst (MulVS dst src)); 3373 format %{ "pmullw $dst,$src\t! mul packed8S" %} 3374 ins_encode %{ 3375 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3376 %} 3377 ins_pipe( pipe_slow ); 3378 %} 3379 3380 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 3381 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3382 match(Set dst (MulVS src1 src2)); 3383 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 3384 ins_encode %{ 3385 bool vector256 = false; 3386 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3387 %} 3388 ins_pipe( pipe_slow ); 3389 %} 3390 3391 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 3392 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3393 match(Set dst (MulVS src (LoadVector mem))); 3394 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 3395 ins_encode %{ 3396 bool vector256 = false; 3397 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3398 %} 3399 ins_pipe( pipe_slow ); 3400 %} 3401 3402 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 3403 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3404 match(Set dst (MulVS src1 src2)); 3405 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 3406 ins_encode %{ 3407 bool vector256 = true; 3408 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3409 %} 3410 ins_pipe( pipe_slow ); 3411 %} 3412 3413 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 3414 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3415 match(Set dst (MulVS src (LoadVector mem))); 3416 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 3417 ins_encode %{ 3418 bool vector256 = true; 3419 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3420 %} 3421 ins_pipe( pipe_slow ); 3422 %} 3423 3424 // Integers vector mul (sse4_1) 3425 instruct vmul2I(vecD dst, vecD src) %{ 3426 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 3427 match(Set dst (MulVI dst src)); 3428 format %{ "pmulld $dst,$src\t! mul packed2I" %} 3429 ins_encode %{ 3430 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3431 %} 3432 ins_pipe( pipe_slow ); 3433 %} 3434 3435 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 3436 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3437 match(Set dst (MulVI src1 src2)); 3438 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 3439 ins_encode %{ 3440 bool vector256 = false; 3441 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3442 %} 3443 ins_pipe( pipe_slow ); 3444 %} 3445 3446 instruct vmul4I(vecX dst, vecX src) %{ 3447 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 3448 match(Set dst (MulVI dst src)); 3449 format %{ "pmulld $dst,$src\t! mul packed4I" %} 3450 ins_encode %{ 3451 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3452 %} 3453 ins_pipe( pipe_slow ); 3454 %} 3455 3456 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 3457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3458 match(Set dst (MulVI src1 src2)); 3459 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 3460 ins_encode %{ 3461 bool vector256 = false; 3462 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3463 %} 3464 ins_pipe( pipe_slow ); 3465 %} 3466 3467 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 3468 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3469 match(Set dst (MulVI src (LoadVector mem))); 3470 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 3471 ins_encode %{ 3472 bool vector256 = false; 3473 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3474 %} 3475 ins_pipe( pipe_slow ); 3476 %} 3477 3478 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 3479 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3480 match(Set dst (MulVI src1 src2)); 3481 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 3482 ins_encode %{ 3483 bool vector256 = true; 3484 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3485 %} 3486 ins_pipe( pipe_slow ); 3487 %} 3488 3489 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 3490 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3491 match(Set dst (MulVI src (LoadVector mem))); 3492 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 3493 ins_encode %{ 3494 bool vector256 = true; 3495 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3496 %} 3497 ins_pipe( pipe_slow ); 3498 %} 3499 3500 // Floats vector mul 3501 instruct vmul2F(vecD dst, vecD src) %{ 3502 predicate(n->as_Vector()->length() == 2); 3503 match(Set dst (MulVF dst src)); 3504 format %{ "mulps $dst,$src\t! mul packed2F" %} 3505 ins_encode %{ 3506 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3507 %} 3508 ins_pipe( pipe_slow ); 3509 %} 3510 3511 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 3512 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3513 match(Set dst (MulVF src1 src2)); 3514 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 3515 ins_encode %{ 3516 bool vector256 = false; 3517 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3518 %} 3519 ins_pipe( pipe_slow ); 3520 %} 3521 3522 instruct vmul4F(vecX dst, vecX src) %{ 3523 predicate(n->as_Vector()->length() == 4); 3524 match(Set dst (MulVF dst src)); 3525 format %{ "mulps $dst,$src\t! mul packed4F" %} 3526 ins_encode %{ 3527 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3528 %} 3529 ins_pipe( pipe_slow ); 3530 %} 3531 3532 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 3533 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3534 match(Set dst (MulVF src1 src2)); 3535 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 3536 ins_encode %{ 3537 bool vector256 = false; 3538 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3539 %} 3540 ins_pipe( pipe_slow ); 3541 %} 3542 3543 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 3544 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3545 match(Set dst (MulVF src (LoadVector mem))); 3546 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 3547 ins_encode %{ 3548 bool vector256 = false; 3549 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3550 %} 3551 ins_pipe( pipe_slow ); 3552 %} 3553 3554 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 3555 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3556 match(Set dst (MulVF src1 src2)); 3557 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 3558 ins_encode %{ 3559 bool vector256 = true; 3560 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3561 %} 3562 ins_pipe( pipe_slow ); 3563 %} 3564 3565 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 3566 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3567 match(Set dst (MulVF src (LoadVector mem))); 3568 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 3569 ins_encode %{ 3570 bool vector256 = true; 3571 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3572 %} 3573 ins_pipe( pipe_slow ); 3574 %} 3575 3576 // Doubles vector mul 3577 instruct vmul2D(vecX dst, vecX src) %{ 3578 predicate(n->as_Vector()->length() == 2); 3579 match(Set dst (MulVD dst src)); 3580 format %{ "mulpd $dst,$src\t! mul packed2D" %} 3581 ins_encode %{ 3582 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 3583 %} 3584 ins_pipe( pipe_slow ); 3585 %} 3586 3587 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 3588 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3589 match(Set dst (MulVD src1 src2)); 3590 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 3591 ins_encode %{ 3592 bool vector256 = false; 3593 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3594 %} 3595 ins_pipe( pipe_slow ); 3596 %} 3597 3598 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 3599 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3600 match(Set dst (MulVD src (LoadVector mem))); 3601 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 3602 ins_encode %{ 3603 bool vector256 = false; 3604 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3605 %} 3606 ins_pipe( pipe_slow ); 3607 %} 3608 3609 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 3610 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3611 match(Set dst (MulVD src1 src2)); 3612 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 3613 ins_encode %{ 3614 bool vector256 = true; 3615 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3616 %} 3617 ins_pipe( pipe_slow ); 3618 %} 3619 3620 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 3621 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3622 match(Set dst (MulVD src (LoadVector mem))); 3623 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 3624 ins_encode %{ 3625 bool vector256 = true; 3626 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3627 %} 3628 ins_pipe( pipe_slow ); 3629 %} 3630 3631 // --------------------------------- DIV -------------------------------------- 3632 3633 // Floats vector div 3634 instruct vdiv2F(vecD dst, vecD src) %{ 3635 predicate(n->as_Vector()->length() == 2); 3636 match(Set dst (DivVF dst src)); 3637 format %{ "divps $dst,$src\t! div packed2F" %} 3638 ins_encode %{ 3639 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3640 %} 3641 ins_pipe( pipe_slow ); 3642 %} 3643 3644 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 3645 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3646 match(Set dst (DivVF src1 src2)); 3647 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 3648 ins_encode %{ 3649 bool vector256 = false; 3650 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3651 %} 3652 ins_pipe( pipe_slow ); 3653 %} 3654 3655 instruct vdiv4F(vecX dst, vecX src) %{ 3656 predicate(n->as_Vector()->length() == 4); 3657 match(Set dst (DivVF dst src)); 3658 format %{ "divps $dst,$src\t! div packed4F" %} 3659 ins_encode %{ 3660 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3661 %} 3662 ins_pipe( pipe_slow ); 3663 %} 3664 3665 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 3666 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3667 match(Set dst (DivVF src1 src2)); 3668 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 3669 ins_encode %{ 3670 bool vector256 = false; 3671 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3672 %} 3673 ins_pipe( pipe_slow ); 3674 %} 3675 3676 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 3677 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3678 match(Set dst (DivVF src (LoadVector mem))); 3679 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 3680 ins_encode %{ 3681 bool vector256 = false; 3682 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 3688 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3689 match(Set dst (DivVF src1 src2)); 3690 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 3691 ins_encode %{ 3692 bool vector256 = true; 3693 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 3699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3700 match(Set dst (DivVF src (LoadVector mem))); 3701 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 3702 ins_encode %{ 3703 bool vector256 = true; 3704 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3705 %} 3706 ins_pipe( pipe_slow ); 3707 %} 3708 3709 // Doubles vector div 3710 instruct vdiv2D(vecX dst, vecX src) %{ 3711 predicate(n->as_Vector()->length() == 2); 3712 match(Set dst (DivVD dst src)); 3713 format %{ "divpd $dst,$src\t! div packed2D" %} 3714 ins_encode %{ 3715 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 3721 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3722 match(Set dst (DivVD src1 src2)); 3723 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 3724 ins_encode %{ 3725 bool vector256 = false; 3726 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3733 match(Set dst (DivVD src (LoadVector mem))); 3734 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 3735 ins_encode %{ 3736 bool vector256 = false; 3737 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3738 %} 3739 ins_pipe( pipe_slow ); 3740 %} 3741 3742 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 3743 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3744 match(Set dst (DivVD src1 src2)); 3745 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 3746 ins_encode %{ 3747 bool vector256 = true; 3748 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3749 %} 3750 ins_pipe( pipe_slow ); 3751 %} 3752 3753 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 3754 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3755 match(Set dst (DivVD src (LoadVector mem))); 3756 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 3757 ins_encode %{ 3758 bool vector256 = true; 3759 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3760 %} 3761 ins_pipe( pipe_slow ); 3762 %} 3763 3764 // ------------------------------ LeftShift ----------------------------------- 3765 3766 // Shorts/Chars vector left shift 3767 instruct vsll2S(vecS dst, regF shift) %{ 3768 predicate(n->as_Vector()->length() == 2); 3769 match(Set dst (LShiftVS dst shift)); 3770 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3771 ins_encode %{ 3772 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3773 %} 3774 ins_pipe( pipe_slow ); 3775 %} 3776 3777 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 3778 predicate(n->as_Vector()->length() == 2); 3779 match(Set dst (LShiftVS dst shift)); 3780 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3781 ins_encode %{ 3782 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3783 %} 3784 ins_pipe( pipe_slow ); 3785 %} 3786 3787 instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{ 3788 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3789 match(Set dst (LShiftVS src shift)); 3790 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3791 ins_encode %{ 3792 bool vector256 = false; 3793 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 3799 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3800 match(Set dst (LShiftVS src shift)); 3801 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3802 ins_encode %{ 3803 bool vector256 = false; 3804 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3805 %} 3806 ins_pipe( pipe_slow ); 3807 %} 3808 3809 instruct vsll4S(vecD dst, regF shift) %{ 3810 predicate(n->as_Vector()->length() == 4); 3811 match(Set dst (LShiftVS dst shift)); 3812 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3813 ins_encode %{ 3814 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3815 %} 3816 ins_pipe( pipe_slow ); 3817 %} 3818 3819 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 3820 predicate(n->as_Vector()->length() == 4); 3821 match(Set dst (LShiftVS dst shift)); 3822 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3823 ins_encode %{ 3824 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3825 %} 3826 ins_pipe( pipe_slow ); 3827 %} 3828 3829 instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{ 3830 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3831 match(Set dst (LShiftVS src shift)); 3832 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3833 ins_encode %{ 3834 bool vector256 = false; 3835 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3836 %} 3837 ins_pipe( pipe_slow ); 3838 %} 3839 3840 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3841 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3842 match(Set dst (LShiftVS src shift)); 3843 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3844 ins_encode %{ 3845 bool vector256 = false; 3846 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3847 %} 3848 ins_pipe( pipe_slow ); 3849 %} 3850 3851 instruct vsll8S(vecX dst, regF shift) %{ 3852 predicate(n->as_Vector()->length() == 8); 3853 match(Set dst (LShiftVS dst shift)); 3854 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3855 ins_encode %{ 3856 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3857 %} 3858 ins_pipe( pipe_slow ); 3859 %} 3860 3861 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 3862 predicate(n->as_Vector()->length() == 8); 3863 match(Set dst (LShiftVS dst shift)); 3864 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3865 ins_encode %{ 3866 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3867 %} 3868 ins_pipe( pipe_slow ); 3869 %} 3870 3871 instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{ 3872 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3873 match(Set dst (LShiftVS src shift)); 3874 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 3875 ins_encode %{ 3876 bool vector256 = false; 3877 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3878 %} 3879 ins_pipe( pipe_slow ); 3880 %} 3881 3882 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 3883 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3884 match(Set dst (LShiftVS src shift)); 3885 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 3886 ins_encode %{ 3887 bool vector256 = false; 3888 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3889 %} 3890 ins_pipe( pipe_slow ); 3891 %} 3892 3893 instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{ 3894 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3895 match(Set dst (LShiftVS src shift)); 3896 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 3897 ins_encode %{ 3898 bool vector256 = true; 3899 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3900 %} 3901 ins_pipe( pipe_slow ); 3902 %} 3903 3904 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 3905 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3906 match(Set dst (LShiftVS src shift)); 3907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 3908 ins_encode %{ 3909 bool vector256 = true; 3910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3911 %} 3912 ins_pipe( pipe_slow ); 3913 %} 3914 3915 // Integers vector left shift 3916 instruct vsll2I(vecD dst, regF shift) %{ 3917 predicate(n->as_Vector()->length() == 2); 3918 match(Set dst (LShiftVI dst shift)); 3919 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 3920 ins_encode %{ 3921 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 3922 %} 3923 ins_pipe( pipe_slow ); 3924 %} 3925 3926 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 3927 predicate(n->as_Vector()->length() == 2); 3928 match(Set dst (LShiftVI dst shift)); 3929 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 3930 ins_encode %{ 3931 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 3932 %} 3933 ins_pipe( pipe_slow ); 3934 %} 3935 3936 instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{ 3937 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3938 match(Set dst (LShiftVI src shift)); 3939 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 3940 ins_encode %{ 3941 bool vector256 = false; 3942 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3943 %} 3944 ins_pipe( pipe_slow ); 3945 %} 3946 3947 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3948 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3949 match(Set dst (LShiftVI src shift)); 3950 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 3951 ins_encode %{ 3952 bool vector256 = false; 3953 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3954 %} 3955 ins_pipe( pipe_slow ); 3956 %} 3957 3958 instruct vsll4I(vecX dst, regF shift) %{ 3959 predicate(n->as_Vector()->length() == 4); 3960 match(Set dst (LShiftVI dst shift)); 3961 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 3962 ins_encode %{ 3963 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 3964 %} 3965 ins_pipe( pipe_slow ); 3966 %} 3967 3968 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 3969 predicate(n->as_Vector()->length() == 4); 3970 match(Set dst (LShiftVI dst shift)); 3971 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 3972 ins_encode %{ 3973 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 3974 %} 3975 ins_pipe( pipe_slow ); 3976 %} 3977 3978 instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{ 3979 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3980 match(Set dst (LShiftVI src shift)); 3981 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 3982 ins_encode %{ 3983 bool vector256 = false; 3984 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3985 %} 3986 ins_pipe( pipe_slow ); 3987 %} 3988 3989 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 3990 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3991 match(Set dst (LShiftVI src shift)); 3992 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 3993 ins_encode %{ 3994 bool vector256 = false; 3995 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3996 %} 3997 ins_pipe( pipe_slow ); 3998 %} 3999 4000 instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{ 4001 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4002 match(Set dst (LShiftVI src shift)); 4003 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4004 ins_encode %{ 4005 bool vector256 = true; 4006 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4007 %} 4008 ins_pipe( pipe_slow ); 4009 %} 4010 4011 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4012 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4013 match(Set dst (LShiftVI src shift)); 4014 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4015 ins_encode %{ 4016 bool vector256 = true; 4017 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4018 %} 4019 ins_pipe( pipe_slow ); 4020 %} 4021 4022 // Longs vector left shift 4023 instruct vsll2L(vecX dst, regF shift) %{ 4024 predicate(n->as_Vector()->length() == 2); 4025 match(Set dst (LShiftVL dst shift)); 4026 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4027 ins_encode %{ 4028 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4029 %} 4030 ins_pipe( pipe_slow ); 4031 %} 4032 4033 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4034 predicate(n->as_Vector()->length() == 2); 4035 match(Set dst (LShiftVL dst shift)); 4036 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4037 ins_encode %{ 4038 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4039 %} 4040 ins_pipe( pipe_slow ); 4041 %} 4042 4043 instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{ 4044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4045 match(Set dst (LShiftVL src shift)); 4046 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4047 ins_encode %{ 4048 bool vector256 = false; 4049 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4050 %} 4051 ins_pipe( pipe_slow ); 4052 %} 4053 4054 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4055 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4056 match(Set dst (LShiftVL src shift)); 4057 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4058 ins_encode %{ 4059 bool vector256 = false; 4060 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4061 %} 4062 ins_pipe( pipe_slow ); 4063 %} 4064 4065 instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{ 4066 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4067 match(Set dst (LShiftVL src shift)); 4068 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4069 ins_encode %{ 4070 bool vector256 = true; 4071 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4072 %} 4073 ins_pipe( pipe_slow ); 4074 %} 4075 4076 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4077 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4078 match(Set dst (LShiftVL src shift)); 4079 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4080 ins_encode %{ 4081 bool vector256 = true; 4082 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4083 %} 4084 ins_pipe( pipe_slow ); 4085 %} 4086 4087 // ----------------------- LogicalRightShift ----------------------------------- 4088 4089 // Shorts/Chars vector logical right shift produces incorrect Java result 4090 // for negative data because java code convert short value into int with 4091 // sign extension before a shift. 4092 4093 // Integers vector logical right shift 4094 instruct vsrl2I(vecD dst, regF shift) %{ 4095 predicate(n->as_Vector()->length() == 2); 4096 match(Set dst (URShiftVI dst shift)); 4097 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4098 ins_encode %{ 4099 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4100 %} 4101 ins_pipe( pipe_slow ); 4102 %} 4103 4104 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4105 predicate(n->as_Vector()->length() == 2); 4106 match(Set dst (URShiftVI dst shift)); 4107 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4108 ins_encode %{ 4109 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 4114 instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{ 4115 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4116 match(Set dst (URShiftVI src shift)); 4117 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4118 ins_encode %{ 4119 bool vector256 = false; 4120 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4121 %} 4122 ins_pipe( pipe_slow ); 4123 %} 4124 4125 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4126 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4127 match(Set dst (URShiftVI src shift)); 4128 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4129 ins_encode %{ 4130 bool vector256 = false; 4131 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4132 %} 4133 ins_pipe( pipe_slow ); 4134 %} 4135 4136 instruct vsrl4I(vecX dst, regF shift) %{ 4137 predicate(n->as_Vector()->length() == 4); 4138 match(Set dst (URShiftVI dst shift)); 4139 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4140 ins_encode %{ 4141 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4142 %} 4143 ins_pipe( pipe_slow ); 4144 %} 4145 4146 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 4147 predicate(n->as_Vector()->length() == 4); 4148 match(Set dst (URShiftVI dst shift)); 4149 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4150 ins_encode %{ 4151 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4152 %} 4153 ins_pipe( pipe_slow ); 4154 %} 4155 4156 instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{ 4157 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4158 match(Set dst (URShiftVI src shift)); 4159 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4160 ins_encode %{ 4161 bool vector256 = false; 4162 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4163 %} 4164 ins_pipe( pipe_slow ); 4165 %} 4166 4167 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4168 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4169 match(Set dst (URShiftVI src shift)); 4170 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4171 ins_encode %{ 4172 bool vector256 = false; 4173 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4174 %} 4175 ins_pipe( pipe_slow ); 4176 %} 4177 4178 instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{ 4179 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4180 match(Set dst (URShiftVI src shift)); 4181 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4182 ins_encode %{ 4183 bool vector256 = true; 4184 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4185 %} 4186 ins_pipe( pipe_slow ); 4187 %} 4188 4189 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4190 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4191 match(Set dst (URShiftVI src shift)); 4192 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4193 ins_encode %{ 4194 bool vector256 = true; 4195 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4196 %} 4197 ins_pipe( pipe_slow ); 4198 %} 4199 4200 // Longs vector logical right shift 4201 instruct vsrl2L(vecX dst, regF shift) %{ 4202 predicate(n->as_Vector()->length() == 2); 4203 match(Set dst (URShiftVL dst shift)); 4204 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4205 ins_encode %{ 4206 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4207 %} 4208 ins_pipe( pipe_slow ); 4209 %} 4210 4211 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 4212 predicate(n->as_Vector()->length() == 2); 4213 match(Set dst (URShiftVL dst shift)); 4214 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4215 ins_encode %{ 4216 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 4217 %} 4218 ins_pipe( pipe_slow ); 4219 %} 4220 4221 instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{ 4222 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4223 match(Set dst (URShiftVL src shift)); 4224 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4225 ins_encode %{ 4226 bool vector256 = false; 4227 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4228 %} 4229 ins_pipe( pipe_slow ); 4230 %} 4231 4232 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4234 match(Set dst (URShiftVL src shift)); 4235 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4236 ins_encode %{ 4237 bool vector256 = false; 4238 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4239 %} 4240 ins_pipe( pipe_slow ); 4241 %} 4242 4243 instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{ 4244 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4245 match(Set dst (URShiftVL src shift)); 4246 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4247 ins_encode %{ 4248 bool vector256 = true; 4249 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4250 %} 4251 ins_pipe( pipe_slow ); 4252 %} 4253 4254 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4255 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4256 match(Set dst (URShiftVL src shift)); 4257 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4258 ins_encode %{ 4259 bool vector256 = true; 4260 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4261 %} 4262 ins_pipe( pipe_slow ); 4263 %} 4264 4265 // ------------------- ArithmeticRightShift ----------------------------------- 4266 4267 // Shorts/Chars vector arithmetic right shift 4268 instruct vsra2S(vecS dst, regF shift) %{ 4269 predicate(n->as_Vector()->length() == 2); 4270 match(Set dst (RShiftVS dst shift)); 4271 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4272 ins_encode %{ 4273 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4274 %} 4275 ins_pipe( pipe_slow ); 4276 %} 4277 4278 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 4279 predicate(n->as_Vector()->length() == 2); 4280 match(Set dst (RShiftVS dst shift)); 4281 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4282 ins_encode %{ 4283 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4284 %} 4285 ins_pipe( pipe_slow ); 4286 %} 4287 4288 instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{ 4289 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4290 match(Set dst (RShiftVS src shift)); 4291 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4292 ins_encode %{ 4293 bool vector256 = false; 4294 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4295 %} 4296 ins_pipe( pipe_slow ); 4297 %} 4298 4299 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4300 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4301 match(Set dst (RShiftVS src shift)); 4302 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4303 ins_encode %{ 4304 bool vector256 = false; 4305 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4306 %} 4307 ins_pipe( pipe_slow ); 4308 %} 4309 4310 instruct vsra4S(vecD dst, regF shift) %{ 4311 predicate(n->as_Vector()->length() == 4); 4312 match(Set dst (RShiftVS dst shift)); 4313 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4314 ins_encode %{ 4315 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4316 %} 4317 ins_pipe( pipe_slow ); 4318 %} 4319 4320 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 4321 predicate(n->as_Vector()->length() == 4); 4322 match(Set dst (RShiftVS dst shift)); 4323 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4324 ins_encode %{ 4325 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4326 %} 4327 ins_pipe( pipe_slow ); 4328 %} 4329 4330 instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{ 4331 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4332 match(Set dst (RShiftVS src shift)); 4333 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4334 ins_encode %{ 4335 bool vector256 = false; 4336 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4337 %} 4338 ins_pipe( pipe_slow ); 4339 %} 4340 4341 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4342 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4343 match(Set dst (RShiftVS src shift)); 4344 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4345 ins_encode %{ 4346 bool vector256 = false; 4347 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct vsra8S(vecX dst, regF shift) %{ 4353 predicate(n->as_Vector()->length() == 8); 4354 match(Set dst (RShiftVS dst shift)); 4355 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4356 ins_encode %{ 4357 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4358 %} 4359 ins_pipe( pipe_slow ); 4360 %} 4361 4362 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 4363 predicate(n->as_Vector()->length() == 8); 4364 match(Set dst (RShiftVS dst shift)); 4365 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4366 ins_encode %{ 4367 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4368 %} 4369 ins_pipe( pipe_slow ); 4370 %} 4371 4372 instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{ 4373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4374 match(Set dst (RShiftVS src shift)); 4375 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4376 ins_encode %{ 4377 bool vector256 = false; 4378 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4379 %} 4380 ins_pipe( pipe_slow ); 4381 %} 4382 4383 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4384 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4385 match(Set dst (RShiftVS src shift)); 4386 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4387 ins_encode %{ 4388 bool vector256 = false; 4389 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4390 %} 4391 ins_pipe( pipe_slow ); 4392 %} 4393 4394 instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{ 4395 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4396 match(Set dst (RShiftVS src shift)); 4397 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4398 ins_encode %{ 4399 bool vector256 = true; 4400 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4401 %} 4402 ins_pipe( pipe_slow ); 4403 %} 4404 4405 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4406 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4407 match(Set dst (RShiftVS src shift)); 4408 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4409 ins_encode %{ 4410 bool vector256 = true; 4411 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4412 %} 4413 ins_pipe( pipe_slow ); 4414 %} 4415 4416 // Integers vector arithmetic right shift 4417 instruct vsra2I(vecD dst, regF shift) %{ 4418 predicate(n->as_Vector()->length() == 2); 4419 match(Set dst (RShiftVI dst shift)); 4420 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4421 ins_encode %{ 4422 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4423 %} 4424 ins_pipe( pipe_slow ); 4425 %} 4426 4427 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 4428 predicate(n->as_Vector()->length() == 2); 4429 match(Set dst (RShiftVI dst shift)); 4430 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4431 ins_encode %{ 4432 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4433 %} 4434 ins_pipe( pipe_slow ); 4435 %} 4436 4437 instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{ 4438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4439 match(Set dst (RShiftVI src shift)); 4440 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4441 ins_encode %{ 4442 bool vector256 = false; 4443 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4444 %} 4445 ins_pipe( pipe_slow ); 4446 %} 4447 4448 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4450 match(Set dst (RShiftVI src shift)); 4451 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4452 ins_encode %{ 4453 bool vector256 = false; 4454 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4455 %} 4456 ins_pipe( pipe_slow ); 4457 %} 4458 4459 instruct vsra4I(vecX dst, regF shift) %{ 4460 predicate(n->as_Vector()->length() == 4); 4461 match(Set dst (RShiftVI dst shift)); 4462 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4463 ins_encode %{ 4464 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 4470 predicate(n->as_Vector()->length() == 4); 4471 match(Set dst (RShiftVI dst shift)); 4472 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4473 ins_encode %{ 4474 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4475 %} 4476 ins_pipe( pipe_slow ); 4477 %} 4478 4479 instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{ 4480 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4481 match(Set dst (RShiftVI src shift)); 4482 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4483 ins_encode %{ 4484 bool vector256 = false; 4485 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4486 %} 4487 ins_pipe( pipe_slow ); 4488 %} 4489 4490 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4491 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4492 match(Set dst (RShiftVI src shift)); 4493 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4494 ins_encode %{ 4495 bool vector256 = false; 4496 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4497 %} 4498 ins_pipe( pipe_slow ); 4499 %} 4500 4501 instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{ 4502 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4503 match(Set dst (RShiftVI src shift)); 4504 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4505 ins_encode %{ 4506 bool vector256 = true; 4507 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4508 %} 4509 ins_pipe( pipe_slow ); 4510 %} 4511 4512 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4513 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4514 match(Set dst (RShiftVI src shift)); 4515 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4516 ins_encode %{ 4517 bool vector256 = true; 4518 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4519 %} 4520 ins_pipe( pipe_slow ); 4521 %} 4522 4523 // There are no longs vector arithmetic right shift instructions. 4524 4525 4526 // --------------------------------- AND -------------------------------------- 4527 4528 instruct vand4B(vecS dst, vecS src) %{ 4529 predicate(n->as_Vector()->length_in_bytes() == 4); 4530 match(Set dst (AndV dst src)); 4531 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 4532 ins_encode %{ 4533 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4534 %} 4535 ins_pipe( pipe_slow ); 4536 %} 4537 4538 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 4539 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4540 match(Set dst (AndV src1 src2)); 4541 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 4542 ins_encode %{ 4543 bool vector256 = false; 4544 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4545 %} 4546 ins_pipe( pipe_slow ); 4547 %} 4548 4549 instruct vand8B(vecD dst, vecD src) %{ 4550 predicate(n->as_Vector()->length_in_bytes() == 8); 4551 match(Set dst (AndV dst src)); 4552 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 4553 ins_encode %{ 4554 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4555 %} 4556 ins_pipe( pipe_slow ); 4557 %} 4558 4559 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 4560 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4561 match(Set dst (AndV src1 src2)); 4562 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 4563 ins_encode %{ 4564 bool vector256 = false; 4565 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4566 %} 4567 ins_pipe( pipe_slow ); 4568 %} 4569 4570 instruct vand16B(vecX dst, vecX src) %{ 4571 predicate(n->as_Vector()->length_in_bytes() == 16); 4572 match(Set dst (AndV dst src)); 4573 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 4574 ins_encode %{ 4575 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4576 %} 4577 ins_pipe( pipe_slow ); 4578 %} 4579 4580 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 4581 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4582 match(Set dst (AndV src1 src2)); 4583 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 4584 ins_encode %{ 4585 bool vector256 = false; 4586 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4587 %} 4588 ins_pipe( pipe_slow ); 4589 %} 4590 4591 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 4592 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4593 match(Set dst (AndV src (LoadVector mem))); 4594 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 4595 ins_encode %{ 4596 bool vector256 = false; 4597 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 4603 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4604 match(Set dst (AndV src1 src2)); 4605 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 4606 ins_encode %{ 4607 bool vector256 = true; 4608 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4609 %} 4610 ins_pipe( pipe_slow ); 4611 %} 4612 4613 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 4614 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4615 match(Set dst (AndV src (LoadVector mem))); 4616 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 4617 ins_encode %{ 4618 bool vector256 = true; 4619 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4620 %} 4621 ins_pipe( pipe_slow ); 4622 %} 4623 4624 // --------------------------------- OR --------------------------------------- 4625 4626 instruct vor4B(vecS dst, vecS src) %{ 4627 predicate(n->as_Vector()->length_in_bytes() == 4); 4628 match(Set dst (OrV dst src)); 4629 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 4630 ins_encode %{ 4631 __ por($dst$$XMMRegister, $src$$XMMRegister); 4632 %} 4633 ins_pipe( pipe_slow ); 4634 %} 4635 4636 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4637 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4638 match(Set dst (OrV src1 src2)); 4639 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 4640 ins_encode %{ 4641 bool vector256 = false; 4642 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4643 %} 4644 ins_pipe( pipe_slow ); 4645 %} 4646 4647 instruct vor8B(vecD dst, vecD src) %{ 4648 predicate(n->as_Vector()->length_in_bytes() == 8); 4649 match(Set dst (OrV dst src)); 4650 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 4651 ins_encode %{ 4652 __ por($dst$$XMMRegister, $src$$XMMRegister); 4653 %} 4654 ins_pipe( pipe_slow ); 4655 %} 4656 4657 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4658 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4659 match(Set dst (OrV src1 src2)); 4660 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 4661 ins_encode %{ 4662 bool vector256 = false; 4663 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4664 %} 4665 ins_pipe( pipe_slow ); 4666 %} 4667 4668 instruct vor16B(vecX dst, vecX src) %{ 4669 predicate(n->as_Vector()->length_in_bytes() == 16); 4670 match(Set dst (OrV dst src)); 4671 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 4672 ins_encode %{ 4673 __ por($dst$$XMMRegister, $src$$XMMRegister); 4674 %} 4675 ins_pipe( pipe_slow ); 4676 %} 4677 4678 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4679 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4680 match(Set dst (OrV src1 src2)); 4681 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 4682 ins_encode %{ 4683 bool vector256 = false; 4684 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 4690 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4691 match(Set dst (OrV src (LoadVector mem))); 4692 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 4693 ins_encode %{ 4694 bool vector256 = false; 4695 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4696 %} 4697 ins_pipe( pipe_slow ); 4698 %} 4699 4700 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4701 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4702 match(Set dst (OrV src1 src2)); 4703 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 4704 ins_encode %{ 4705 bool vector256 = true; 4706 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 4712 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4713 match(Set dst (OrV src (LoadVector mem))); 4714 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 4715 ins_encode %{ 4716 bool vector256 = true; 4717 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 // --------------------------------- XOR -------------------------------------- 4723 4724 instruct vxor4B(vecS dst, vecS src) %{ 4725 predicate(n->as_Vector()->length_in_bytes() == 4); 4726 match(Set dst (XorV dst src)); 4727 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 4728 ins_encode %{ 4729 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4730 %} 4731 ins_pipe( pipe_slow ); 4732 %} 4733 4734 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4735 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4736 match(Set dst (XorV src1 src2)); 4737 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 4738 ins_encode %{ 4739 bool vector256 = false; 4740 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4741 %} 4742 ins_pipe( pipe_slow ); 4743 %} 4744 4745 instruct vxor8B(vecD dst, vecD src) %{ 4746 predicate(n->as_Vector()->length_in_bytes() == 8); 4747 match(Set dst (XorV dst src)); 4748 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 4749 ins_encode %{ 4750 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4751 %} 4752 ins_pipe( pipe_slow ); 4753 %} 4754 4755 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4756 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4757 match(Set dst (XorV src1 src2)); 4758 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 4759 ins_encode %{ 4760 bool vector256 = false; 4761 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4762 %} 4763 ins_pipe( pipe_slow ); 4764 %} 4765 4766 instruct vxor16B(vecX dst, vecX src) %{ 4767 predicate(n->as_Vector()->length_in_bytes() == 16); 4768 match(Set dst (XorV dst src)); 4769 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 4770 ins_encode %{ 4771 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4772 %} 4773 ins_pipe( pipe_slow ); 4774 %} 4775 4776 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4777 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4778 match(Set dst (XorV src1 src2)); 4779 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 4780 ins_encode %{ 4781 bool vector256 = false; 4782 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4783 %} 4784 ins_pipe( pipe_slow ); 4785 %} 4786 4787 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 4788 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4789 match(Set dst (XorV src (LoadVector mem))); 4790 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 4791 ins_encode %{ 4792 bool vector256 = false; 4793 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4794 %} 4795 ins_pipe( pipe_slow ); 4796 %} 4797 4798 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4799 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4800 match(Set dst (XorV src1 src2)); 4801 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 4802 ins_encode %{ 4803 bool vector256 = true; 4804 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4805 %} 4806 ins_pipe( pipe_slow ); 4807 %} 4808 4809 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 4810 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4811 match(Set dst (XorV src (LoadVector mem))); 4812 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 4813 ins_encode %{ 4814 bool vector256 = true; 4815 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4816 %} 4817 ins_pipe( pipe_slow ); 4818 %} 4819