1 // 2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340 #endif 341 ); 342 343 // flags allocation class should be last. 344 alloc_class chunk2(RFLAGS); 345 346 // Singleton class for condition codes 347 reg_class int_flags(RFLAGS); 348 349 // Class for all float registers 350 reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358 #ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367 #endif 368 ); 369 370 // Class for all double registers 371 reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379 #ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388 #endif 389 ); 390 391 // Class for all 32bit vector registers 392 reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400 #ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409 #endif 410 ); 411 412 // Class for all 64bit vector registers 413 reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421 #ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430 #endif 431 ); 432 433 // Class for all 128bit vector registers 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442 #ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451 #endif 452 ); 453 454 // Class for all 256bit vector registers 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463 #ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472 #endif 473 ); 474 475 %} 476 477 source %{ 478 // Float masks come from different places depending on platform. 479 #ifdef _LP64 480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 482 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 484 #else 485 static address float_signmask() { return (address)float_signmask_pool; } 486 static address float_signflip() { return (address)float_signflip_pool; } 487 static address double_signmask() { return (address)double_signmask_pool; } 488 static address double_signflip() { return (address)double_signflip_pool; } 489 #endif 490 491 492 const bool Matcher::match_rule_supported(int opcode) { 493 if (!has_match_rule(opcode)) 494 return false; 495 496 switch (opcode) { 497 case Op_PopCountI: 498 case Op_PopCountL: 499 if (!UsePopCountInstruction) 500 return false; 501 break; 502 case Op_MulVI: 503 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 504 return false; 505 break; 506 case Op_CompareAndSwapL: 507 #ifdef _LP64 508 case Op_CompareAndSwapP: 509 #endif 510 if (!VM_Version::supports_cx8()) 511 return false; 512 break; 513 } 514 515 return true; // Per default match rules are supported. 516 } 517 518 // Max vector size in bytes. 0 if not supported. 519 const int Matcher::vector_width_in_bytes(BasicType bt) { 520 assert(is_java_primitive(bt), "only primitive type vectors"); 521 if (UseSSE < 2) return 0; 522 // SSE2 supports 128bit vectors for all types. 523 // AVX2 supports 256bit vectors for all types. 524 int size = (UseAVX > 1) ? 32 : 16; 525 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 526 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 527 size = 32; 528 // Use flag to limit vector size. 529 size = MIN2(size,(int)MaxVectorSize); 530 // Minimum 2 values in vector (or 4 for bytes). 531 switch (bt) { 532 case T_DOUBLE: 533 case T_LONG: 534 if (size < 16) return 0; 535 case T_FLOAT: 536 case T_INT: 537 if (size < 8) return 0; 538 case T_BOOLEAN: 539 case T_BYTE: 540 case T_CHAR: 541 case T_SHORT: 542 if (size < 4) return 0; 543 break; 544 default: 545 ShouldNotReachHere(); 546 } 547 return size; 548 } 549 550 // Limits on vector size (number of elements) loaded into vector. 551 const int Matcher::max_vector_size(const BasicType bt) { 552 return vector_width_in_bytes(bt)/type2aelembytes(bt); 553 } 554 const int Matcher::min_vector_size(const BasicType bt) { 555 int max_size = max_vector_size(bt); 556 // Min size which can be loaded into vector is 4 bytes. 557 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 558 return MIN2(size,max_size); 559 } 560 561 // Vector ideal reg corresponding to specidied size in bytes 562 const int Matcher::vector_ideal_reg(int size) { 563 assert(MaxVectorSize >= size, ""); 564 switch(size) { 565 case 4: return Op_VecS; 566 case 8: return Op_VecD; 567 case 16: return Op_VecX; 568 case 32: return Op_VecY; 569 } 570 ShouldNotReachHere(); 571 return 0; 572 } 573 574 // x86 supports misaligned vectors store/load. 575 const bool Matcher::misaligned_vectors_ok() { 576 return !AlignVector; // can be changed by flag 577 } 578 579 // Helper methods for MachSpillCopyNode::implementation(). 580 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 581 int src_hi, int dst_hi, uint ireg, outputStream* st) { 582 // In 64-bit VM size calculation is very complex. Emitting instructions 583 // into scratch buffer is used to get size in 64-bit VM. 584 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 585 assert(ireg == Op_VecS || // 32bit vector 586 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 587 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 588 "no non-adjacent vector moves" ); 589 if (cbuf) { 590 MacroAssembler _masm(cbuf); 591 int offset = __ offset(); 592 switch (ireg) { 593 case Op_VecS: // copy whole register 594 case Op_VecD: 595 case Op_VecX: 596 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 597 break; 598 case Op_VecY: 599 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 600 break; 601 default: 602 ShouldNotReachHere(); 603 } 604 int size = __ offset() - offset; 605 #ifdef ASSERT 606 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 607 assert(!do_size || size == 4, "incorrect size calculattion"); 608 #endif 609 return size; 610 #ifndef PRODUCT 611 } else if (!do_size) { 612 switch (ireg) { 613 case Op_VecS: 614 case Op_VecD: 615 case Op_VecX: 616 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 617 break; 618 case Op_VecY: 619 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 620 break; 621 default: 622 ShouldNotReachHere(); 623 } 624 #endif 625 } 626 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 627 return 4; 628 } 629 630 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 631 int stack_offset, int reg, uint ireg, outputStream* st) { 632 // In 64-bit VM size calculation is very complex. Emitting instructions 633 // into scratch buffer is used to get size in 64-bit VM. 634 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 635 if (cbuf) { 636 MacroAssembler _masm(cbuf); 637 int offset = __ offset(); 638 if (is_load) { 639 switch (ireg) { 640 case Op_VecS: 641 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 642 break; 643 case Op_VecD: 644 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 645 break; 646 case Op_VecX: 647 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 648 break; 649 case Op_VecY: 650 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 651 break; 652 default: 653 ShouldNotReachHere(); 654 } 655 } else { // store 656 switch (ireg) { 657 case Op_VecS: 658 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 659 break; 660 case Op_VecD: 661 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 662 break; 663 case Op_VecX: 664 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 665 break; 666 case Op_VecY: 667 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 668 break; 669 default: 670 ShouldNotReachHere(); 671 } 672 } 673 int size = __ offset() - offset; 674 #ifdef ASSERT 675 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 676 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 677 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 678 #endif 679 return size; 680 #ifndef PRODUCT 681 } else if (!do_size) { 682 if (is_load) { 683 switch (ireg) { 684 case Op_VecS: 685 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 686 break; 687 case Op_VecD: 688 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 689 break; 690 case Op_VecX: 691 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 692 break; 693 case Op_VecY: 694 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 695 break; 696 default: 697 ShouldNotReachHere(); 698 } 699 } else { // store 700 switch (ireg) { 701 case Op_VecS: 702 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 703 break; 704 case Op_VecD: 705 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 706 break; 707 case Op_VecX: 708 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 709 break; 710 case Op_VecY: 711 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 712 break; 713 default: 714 ShouldNotReachHere(); 715 } 716 } 717 #endif 718 } 719 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 720 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 721 return 5+offset_size; 722 } 723 724 static inline jfloat replicate4_imm(int con, int width) { 725 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 726 assert(width == 1 || width == 2, "only byte or short types here"); 727 int bit_width = width * 8; 728 jint val = con; 729 val &= (1 << bit_width) - 1; // mask off sign bits 730 while(bit_width < 32) { 731 val |= (val << bit_width); 732 bit_width <<= 1; 733 } 734 jfloat fval = *((jfloat*) &val); // coerce to float type 735 return fval; 736 } 737 738 static inline jdouble replicate8_imm(int con, int width) { 739 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 740 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 741 int bit_width = width * 8; 742 jlong val = con; 743 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 744 while(bit_width < 64) { 745 val |= (val << bit_width); 746 bit_width <<= 1; 747 } 748 jdouble dval = *((jdouble*) &val); // coerce to double type 749 return dval; 750 } 751 752 #ifndef PRODUCT 753 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 754 st->print("nop \t# %d bytes pad for loops and calls", _count); 755 } 756 #endif 757 758 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 759 MacroAssembler _masm(&cbuf); 760 __ nop(_count); 761 } 762 763 uint MachNopNode::size(PhaseRegAlloc*) const { 764 return _count; 765 } 766 767 #ifndef PRODUCT 768 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 769 st->print("# breakpoint"); 770 } 771 #endif 772 773 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 774 MacroAssembler _masm(&cbuf); 775 __ int3(); 776 } 777 778 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 779 return MachNode::size(ra_); 780 } 781 782 %} 783 784 encode %{ 785 786 enc_class preserve_SP %{ 787 debug_only(int off0 = cbuf.insts_size()); 788 MacroAssembler _masm(&cbuf); 789 // RBP is preserved across all calls, even compiled calls. 790 // Use it to preserve RSP in places where the callee might change the SP. 791 __ movptr(rbp_mh_SP_save, rsp); 792 debug_only(int off1 = cbuf.insts_size()); 793 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 794 %} 795 796 enc_class restore_SP %{ 797 MacroAssembler _masm(&cbuf); 798 __ movptr(rsp, rbp_mh_SP_save); 799 %} 800 801 enc_class call_epilog %{ 802 if (VerifyStackAtCalls) { 803 // Check that stack depth is unchanged: find majik cookie on stack 804 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 805 MacroAssembler _masm(&cbuf); 806 Label L; 807 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 808 __ jccb(Assembler::equal, L); 809 // Die if stack mismatch 810 __ int3(); 811 __ bind(L); 812 } 813 %} 814 815 %} 816 817 818 //----------OPERANDS----------------------------------------------------------- 819 // Operand definitions must precede instruction definitions for correct parsing 820 // in the ADLC because operands constitute user defined types which are used in 821 // instruction definitions. 822 823 // Vectors 824 operand vecS() %{ 825 constraint(ALLOC_IN_RC(vectors_reg)); 826 match(VecS); 827 828 format %{ %} 829 interface(REG_INTER); 830 %} 831 832 operand vecD() %{ 833 constraint(ALLOC_IN_RC(vectord_reg)); 834 match(VecD); 835 836 format %{ %} 837 interface(REG_INTER); 838 %} 839 840 operand vecX() %{ 841 constraint(ALLOC_IN_RC(vectorx_reg)); 842 match(VecX); 843 844 format %{ %} 845 interface(REG_INTER); 846 %} 847 848 operand vecY() %{ 849 constraint(ALLOC_IN_RC(vectory_reg)); 850 match(VecY); 851 852 format %{ %} 853 interface(REG_INTER); 854 %} 855 856 857 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 858 859 // ============================================================================ 860 861 instruct ShouldNotReachHere() %{ 862 match(Halt); 863 format %{ "int3\t# ShouldNotReachHere" %} 864 ins_encode %{ 865 __ int3(); 866 %} 867 ins_pipe(pipe_slow); 868 %} 869 870 // ============================================================================ 871 872 instruct addF_reg(regF dst, regF src) %{ 873 predicate((UseSSE>=1) && (UseAVX == 0)); 874 match(Set dst (AddF dst src)); 875 876 format %{ "addss $dst, $src" %} 877 ins_cost(150); 878 ins_encode %{ 879 __ addss($dst$$XMMRegister, $src$$XMMRegister); 880 %} 881 ins_pipe(pipe_slow); 882 %} 883 884 instruct addF_mem(regF dst, memory src) %{ 885 predicate((UseSSE>=1) && (UseAVX == 0)); 886 match(Set dst (AddF dst (LoadF src))); 887 888 format %{ "addss $dst, $src" %} 889 ins_cost(150); 890 ins_encode %{ 891 __ addss($dst$$XMMRegister, $src$$Address); 892 %} 893 ins_pipe(pipe_slow); 894 %} 895 896 instruct addF_imm(regF dst, immF con) %{ 897 predicate((UseSSE>=1) && (UseAVX == 0)); 898 match(Set dst (AddF dst con)); 899 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 900 ins_cost(150); 901 ins_encode %{ 902 __ addss($dst$$XMMRegister, $constantaddress($con)); 903 %} 904 ins_pipe(pipe_slow); 905 %} 906 907 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 908 predicate(UseAVX > 0); 909 match(Set dst (AddF src1 src2)); 910 911 format %{ "vaddss $dst, $src1, $src2" %} 912 ins_cost(150); 913 ins_encode %{ 914 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 915 %} 916 ins_pipe(pipe_slow); 917 %} 918 919 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 920 predicate(UseAVX > 0); 921 match(Set dst (AddF src1 (LoadF src2))); 922 923 format %{ "vaddss $dst, $src1, $src2" %} 924 ins_cost(150); 925 ins_encode %{ 926 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 927 %} 928 ins_pipe(pipe_slow); 929 %} 930 931 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 932 predicate(UseAVX > 0); 933 match(Set dst (AddF src con)); 934 935 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 936 ins_cost(150); 937 ins_encode %{ 938 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 939 %} 940 ins_pipe(pipe_slow); 941 %} 942 943 instruct addD_reg(regD dst, regD src) %{ 944 predicate((UseSSE>=2) && (UseAVX == 0)); 945 match(Set dst (AddD dst src)); 946 947 format %{ "addsd $dst, $src" %} 948 ins_cost(150); 949 ins_encode %{ 950 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 951 %} 952 ins_pipe(pipe_slow); 953 %} 954 955 instruct addD_mem(regD dst, memory src) %{ 956 predicate((UseSSE>=2) && (UseAVX == 0)); 957 match(Set dst (AddD dst (LoadD src))); 958 959 format %{ "addsd $dst, $src" %} 960 ins_cost(150); 961 ins_encode %{ 962 __ addsd($dst$$XMMRegister, $src$$Address); 963 %} 964 ins_pipe(pipe_slow); 965 %} 966 967 instruct addD_imm(regD dst, immD con) %{ 968 predicate((UseSSE>=2) && (UseAVX == 0)); 969 match(Set dst (AddD dst con)); 970 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 971 ins_cost(150); 972 ins_encode %{ 973 __ addsd($dst$$XMMRegister, $constantaddress($con)); 974 %} 975 ins_pipe(pipe_slow); 976 %} 977 978 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 979 predicate(UseAVX > 0); 980 match(Set dst (AddD src1 src2)); 981 982 format %{ "vaddsd $dst, $src1, $src2" %} 983 ins_cost(150); 984 ins_encode %{ 985 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 986 %} 987 ins_pipe(pipe_slow); 988 %} 989 990 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 991 predicate(UseAVX > 0); 992 match(Set dst (AddD src1 (LoadD src2))); 993 994 format %{ "vaddsd $dst, $src1, $src2" %} 995 ins_cost(150); 996 ins_encode %{ 997 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 998 %} 999 ins_pipe(pipe_slow); 1000 %} 1001 1002 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1003 predicate(UseAVX > 0); 1004 match(Set dst (AddD src con)); 1005 1006 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1007 ins_cost(150); 1008 ins_encode %{ 1009 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1010 %} 1011 ins_pipe(pipe_slow); 1012 %} 1013 1014 instruct subF_reg(regF dst, regF src) %{ 1015 predicate((UseSSE>=1) && (UseAVX == 0)); 1016 match(Set dst (SubF dst src)); 1017 1018 format %{ "subss $dst, $src" %} 1019 ins_cost(150); 1020 ins_encode %{ 1021 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1022 %} 1023 ins_pipe(pipe_slow); 1024 %} 1025 1026 instruct subF_mem(regF dst, memory src) %{ 1027 predicate((UseSSE>=1) && (UseAVX == 0)); 1028 match(Set dst (SubF dst (LoadF src))); 1029 1030 format %{ "subss $dst, $src" %} 1031 ins_cost(150); 1032 ins_encode %{ 1033 __ subss($dst$$XMMRegister, $src$$Address); 1034 %} 1035 ins_pipe(pipe_slow); 1036 %} 1037 1038 instruct subF_imm(regF dst, immF con) %{ 1039 predicate((UseSSE>=1) && (UseAVX == 0)); 1040 match(Set dst (SubF dst con)); 1041 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1042 ins_cost(150); 1043 ins_encode %{ 1044 __ subss($dst$$XMMRegister, $constantaddress($con)); 1045 %} 1046 ins_pipe(pipe_slow); 1047 %} 1048 1049 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1050 predicate(UseAVX > 0); 1051 match(Set dst (SubF src1 src2)); 1052 1053 format %{ "vsubss $dst, $src1, $src2" %} 1054 ins_cost(150); 1055 ins_encode %{ 1056 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1057 %} 1058 ins_pipe(pipe_slow); 1059 %} 1060 1061 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1062 predicate(UseAVX > 0); 1063 match(Set dst (SubF src1 (LoadF src2))); 1064 1065 format %{ "vsubss $dst, $src1, $src2" %} 1066 ins_cost(150); 1067 ins_encode %{ 1068 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1069 %} 1070 ins_pipe(pipe_slow); 1071 %} 1072 1073 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1074 predicate(UseAVX > 0); 1075 match(Set dst (SubF src con)); 1076 1077 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1078 ins_cost(150); 1079 ins_encode %{ 1080 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1081 %} 1082 ins_pipe(pipe_slow); 1083 %} 1084 1085 instruct subD_reg(regD dst, regD src) %{ 1086 predicate((UseSSE>=2) && (UseAVX == 0)); 1087 match(Set dst (SubD dst src)); 1088 1089 format %{ "subsd $dst, $src" %} 1090 ins_cost(150); 1091 ins_encode %{ 1092 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1093 %} 1094 ins_pipe(pipe_slow); 1095 %} 1096 1097 instruct subD_mem(regD dst, memory src) %{ 1098 predicate((UseSSE>=2) && (UseAVX == 0)); 1099 match(Set dst (SubD dst (LoadD src))); 1100 1101 format %{ "subsd $dst, $src" %} 1102 ins_cost(150); 1103 ins_encode %{ 1104 __ subsd($dst$$XMMRegister, $src$$Address); 1105 %} 1106 ins_pipe(pipe_slow); 1107 %} 1108 1109 instruct subD_imm(regD dst, immD con) %{ 1110 predicate((UseSSE>=2) && (UseAVX == 0)); 1111 match(Set dst (SubD dst con)); 1112 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1113 ins_cost(150); 1114 ins_encode %{ 1115 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1116 %} 1117 ins_pipe(pipe_slow); 1118 %} 1119 1120 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1121 predicate(UseAVX > 0); 1122 match(Set dst (SubD src1 src2)); 1123 1124 format %{ "vsubsd $dst, $src1, $src2" %} 1125 ins_cost(150); 1126 ins_encode %{ 1127 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1128 %} 1129 ins_pipe(pipe_slow); 1130 %} 1131 1132 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1133 predicate(UseAVX > 0); 1134 match(Set dst (SubD src1 (LoadD src2))); 1135 1136 format %{ "vsubsd $dst, $src1, $src2" %} 1137 ins_cost(150); 1138 ins_encode %{ 1139 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1140 %} 1141 ins_pipe(pipe_slow); 1142 %} 1143 1144 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1145 predicate(UseAVX > 0); 1146 match(Set dst (SubD src con)); 1147 1148 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1149 ins_cost(150); 1150 ins_encode %{ 1151 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1152 %} 1153 ins_pipe(pipe_slow); 1154 %} 1155 1156 instruct mulF_reg(regF dst, regF src) %{ 1157 predicate((UseSSE>=1) && (UseAVX == 0)); 1158 match(Set dst (MulF dst src)); 1159 1160 format %{ "mulss $dst, $src" %} 1161 ins_cost(150); 1162 ins_encode %{ 1163 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1164 %} 1165 ins_pipe(pipe_slow); 1166 %} 1167 1168 instruct mulF_mem(regF dst, memory src) %{ 1169 predicate((UseSSE>=1) && (UseAVX == 0)); 1170 match(Set dst (MulF dst (LoadF src))); 1171 1172 format %{ "mulss $dst, $src" %} 1173 ins_cost(150); 1174 ins_encode %{ 1175 __ mulss($dst$$XMMRegister, $src$$Address); 1176 %} 1177 ins_pipe(pipe_slow); 1178 %} 1179 1180 instruct mulF_imm(regF dst, immF con) %{ 1181 predicate((UseSSE>=1) && (UseAVX == 0)); 1182 match(Set dst (MulF dst con)); 1183 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1184 ins_cost(150); 1185 ins_encode %{ 1186 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1187 %} 1188 ins_pipe(pipe_slow); 1189 %} 1190 1191 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1192 predicate(UseAVX > 0); 1193 match(Set dst (MulF src1 src2)); 1194 1195 format %{ "vmulss $dst, $src1, $src2" %} 1196 ins_cost(150); 1197 ins_encode %{ 1198 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1199 %} 1200 ins_pipe(pipe_slow); 1201 %} 1202 1203 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1204 predicate(UseAVX > 0); 1205 match(Set dst (MulF src1 (LoadF src2))); 1206 1207 format %{ "vmulss $dst, $src1, $src2" %} 1208 ins_cost(150); 1209 ins_encode %{ 1210 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1211 %} 1212 ins_pipe(pipe_slow); 1213 %} 1214 1215 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1216 predicate(UseAVX > 0); 1217 match(Set dst (MulF src con)); 1218 1219 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1220 ins_cost(150); 1221 ins_encode %{ 1222 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1223 %} 1224 ins_pipe(pipe_slow); 1225 %} 1226 1227 instruct mulD_reg(regD dst, regD src) %{ 1228 predicate((UseSSE>=2) && (UseAVX == 0)); 1229 match(Set dst (MulD dst src)); 1230 1231 format %{ "mulsd $dst, $src" %} 1232 ins_cost(150); 1233 ins_encode %{ 1234 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1235 %} 1236 ins_pipe(pipe_slow); 1237 %} 1238 1239 instruct mulD_mem(regD dst, memory src) %{ 1240 predicate((UseSSE>=2) && (UseAVX == 0)); 1241 match(Set dst (MulD dst (LoadD src))); 1242 1243 format %{ "mulsd $dst, $src" %} 1244 ins_cost(150); 1245 ins_encode %{ 1246 __ mulsd($dst$$XMMRegister, $src$$Address); 1247 %} 1248 ins_pipe(pipe_slow); 1249 %} 1250 1251 instruct mulD_imm(regD dst, immD con) %{ 1252 predicate((UseSSE>=2) && (UseAVX == 0)); 1253 match(Set dst (MulD dst con)); 1254 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1255 ins_cost(150); 1256 ins_encode %{ 1257 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1258 %} 1259 ins_pipe(pipe_slow); 1260 %} 1261 1262 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1263 predicate(UseAVX > 0); 1264 match(Set dst (MulD src1 src2)); 1265 1266 format %{ "vmulsd $dst, $src1, $src2" %} 1267 ins_cost(150); 1268 ins_encode %{ 1269 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1270 %} 1271 ins_pipe(pipe_slow); 1272 %} 1273 1274 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1275 predicate(UseAVX > 0); 1276 match(Set dst (MulD src1 (LoadD src2))); 1277 1278 format %{ "vmulsd $dst, $src1, $src2" %} 1279 ins_cost(150); 1280 ins_encode %{ 1281 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1282 %} 1283 ins_pipe(pipe_slow); 1284 %} 1285 1286 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1287 predicate(UseAVX > 0); 1288 match(Set dst (MulD src con)); 1289 1290 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1291 ins_cost(150); 1292 ins_encode %{ 1293 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1294 %} 1295 ins_pipe(pipe_slow); 1296 %} 1297 1298 instruct divF_reg(regF dst, regF src) %{ 1299 predicate((UseSSE>=1) && (UseAVX == 0)); 1300 match(Set dst (DivF dst src)); 1301 1302 format %{ "divss $dst, $src" %} 1303 ins_cost(150); 1304 ins_encode %{ 1305 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1306 %} 1307 ins_pipe(pipe_slow); 1308 %} 1309 1310 instruct divF_mem(regF dst, memory src) %{ 1311 predicate((UseSSE>=1) && (UseAVX == 0)); 1312 match(Set dst (DivF dst (LoadF src))); 1313 1314 format %{ "divss $dst, $src" %} 1315 ins_cost(150); 1316 ins_encode %{ 1317 __ divss($dst$$XMMRegister, $src$$Address); 1318 %} 1319 ins_pipe(pipe_slow); 1320 %} 1321 1322 instruct divF_imm(regF dst, immF con) %{ 1323 predicate((UseSSE>=1) && (UseAVX == 0)); 1324 match(Set dst (DivF dst con)); 1325 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1326 ins_cost(150); 1327 ins_encode %{ 1328 __ divss($dst$$XMMRegister, $constantaddress($con)); 1329 %} 1330 ins_pipe(pipe_slow); 1331 %} 1332 1333 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1334 predicate(UseAVX > 0); 1335 match(Set dst (DivF src1 src2)); 1336 1337 format %{ "vdivss $dst, $src1, $src2" %} 1338 ins_cost(150); 1339 ins_encode %{ 1340 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1341 %} 1342 ins_pipe(pipe_slow); 1343 %} 1344 1345 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1346 predicate(UseAVX > 0); 1347 match(Set dst (DivF src1 (LoadF src2))); 1348 1349 format %{ "vdivss $dst, $src1, $src2" %} 1350 ins_cost(150); 1351 ins_encode %{ 1352 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1353 %} 1354 ins_pipe(pipe_slow); 1355 %} 1356 1357 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1358 predicate(UseAVX > 0); 1359 match(Set dst (DivF src con)); 1360 1361 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1362 ins_cost(150); 1363 ins_encode %{ 1364 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1365 %} 1366 ins_pipe(pipe_slow); 1367 %} 1368 1369 instruct divD_reg(regD dst, regD src) %{ 1370 predicate((UseSSE>=2) && (UseAVX == 0)); 1371 match(Set dst (DivD dst src)); 1372 1373 format %{ "divsd $dst, $src" %} 1374 ins_cost(150); 1375 ins_encode %{ 1376 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1377 %} 1378 ins_pipe(pipe_slow); 1379 %} 1380 1381 instruct divD_mem(regD dst, memory src) %{ 1382 predicate((UseSSE>=2) && (UseAVX == 0)); 1383 match(Set dst (DivD dst (LoadD src))); 1384 1385 format %{ "divsd $dst, $src" %} 1386 ins_cost(150); 1387 ins_encode %{ 1388 __ divsd($dst$$XMMRegister, $src$$Address); 1389 %} 1390 ins_pipe(pipe_slow); 1391 %} 1392 1393 instruct divD_imm(regD dst, immD con) %{ 1394 predicate((UseSSE>=2) && (UseAVX == 0)); 1395 match(Set dst (DivD dst con)); 1396 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1397 ins_cost(150); 1398 ins_encode %{ 1399 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1400 %} 1401 ins_pipe(pipe_slow); 1402 %} 1403 1404 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1405 predicate(UseAVX > 0); 1406 match(Set dst (DivD src1 src2)); 1407 1408 format %{ "vdivsd $dst, $src1, $src2" %} 1409 ins_cost(150); 1410 ins_encode %{ 1411 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1412 %} 1413 ins_pipe(pipe_slow); 1414 %} 1415 1416 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1417 predicate(UseAVX > 0); 1418 match(Set dst (DivD src1 (LoadD src2))); 1419 1420 format %{ "vdivsd $dst, $src1, $src2" %} 1421 ins_cost(150); 1422 ins_encode %{ 1423 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1424 %} 1425 ins_pipe(pipe_slow); 1426 %} 1427 1428 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1429 predicate(UseAVX > 0); 1430 match(Set dst (DivD src con)); 1431 1432 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1433 ins_cost(150); 1434 ins_encode %{ 1435 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1436 %} 1437 ins_pipe(pipe_slow); 1438 %} 1439 1440 instruct absF_reg(regF dst) %{ 1441 predicate((UseSSE>=1) && (UseAVX == 0)); 1442 match(Set dst (AbsF dst)); 1443 ins_cost(150); 1444 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1445 ins_encode %{ 1446 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1447 %} 1448 ins_pipe(pipe_slow); 1449 %} 1450 1451 instruct absF_reg_reg(regF dst, regF src) %{ 1452 predicate(UseAVX > 0); 1453 match(Set dst (AbsF src)); 1454 ins_cost(150); 1455 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1456 ins_encode %{ 1457 bool vector256 = false; 1458 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1459 ExternalAddress(float_signmask()), vector256); 1460 %} 1461 ins_pipe(pipe_slow); 1462 %} 1463 1464 instruct absD_reg(regD dst) %{ 1465 predicate((UseSSE>=2) && (UseAVX == 0)); 1466 match(Set dst (AbsD dst)); 1467 ins_cost(150); 1468 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1469 "# abs double by sign masking" %} 1470 ins_encode %{ 1471 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1472 %} 1473 ins_pipe(pipe_slow); 1474 %} 1475 1476 instruct absD_reg_reg(regD dst, regD src) %{ 1477 predicate(UseAVX > 0); 1478 match(Set dst (AbsD src)); 1479 ins_cost(150); 1480 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1481 "# abs double by sign masking" %} 1482 ins_encode %{ 1483 bool vector256 = false; 1484 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1485 ExternalAddress(double_signmask()), vector256); 1486 %} 1487 ins_pipe(pipe_slow); 1488 %} 1489 1490 instruct negF_reg(regF dst) %{ 1491 predicate((UseSSE>=1) && (UseAVX == 0)); 1492 match(Set dst (NegF dst)); 1493 ins_cost(150); 1494 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1495 ins_encode %{ 1496 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1497 %} 1498 ins_pipe(pipe_slow); 1499 %} 1500 1501 instruct negF_reg_reg(regF dst, regF src) %{ 1502 predicate(UseAVX > 0); 1503 match(Set dst (NegF src)); 1504 ins_cost(150); 1505 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1506 ins_encode %{ 1507 bool vector256 = false; 1508 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1509 ExternalAddress(float_signflip()), vector256); 1510 %} 1511 ins_pipe(pipe_slow); 1512 %} 1513 1514 instruct negD_reg(regD dst) %{ 1515 predicate((UseSSE>=2) && (UseAVX == 0)); 1516 match(Set dst (NegD dst)); 1517 ins_cost(150); 1518 format %{ "xorpd $dst, [0x8000000000000000]\t" 1519 "# neg double by sign flipping" %} 1520 ins_encode %{ 1521 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1522 %} 1523 ins_pipe(pipe_slow); 1524 %} 1525 1526 instruct negD_reg_reg(regD dst, regD src) %{ 1527 predicate(UseAVX > 0); 1528 match(Set dst (NegD src)); 1529 ins_cost(150); 1530 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1531 "# neg double by sign flipping" %} 1532 ins_encode %{ 1533 bool vector256 = false; 1534 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1535 ExternalAddress(double_signflip()), vector256); 1536 %} 1537 ins_pipe(pipe_slow); 1538 %} 1539 1540 instruct sqrtF_reg(regF dst, regF src) %{ 1541 predicate(UseSSE>=1); 1542 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1543 1544 format %{ "sqrtss $dst, $src" %} 1545 ins_cost(150); 1546 ins_encode %{ 1547 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1548 %} 1549 ins_pipe(pipe_slow); 1550 %} 1551 1552 instruct sqrtF_mem(regF dst, memory src) %{ 1553 predicate(UseSSE>=1); 1554 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1555 1556 format %{ "sqrtss $dst, $src" %} 1557 ins_cost(150); 1558 ins_encode %{ 1559 __ sqrtss($dst$$XMMRegister, $src$$Address); 1560 %} 1561 ins_pipe(pipe_slow); 1562 %} 1563 1564 instruct sqrtF_imm(regF dst, immF con) %{ 1565 predicate(UseSSE>=1); 1566 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1567 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1568 ins_cost(150); 1569 ins_encode %{ 1570 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1571 %} 1572 ins_pipe(pipe_slow); 1573 %} 1574 1575 instruct sqrtD_reg(regD dst, regD src) %{ 1576 predicate(UseSSE>=2); 1577 match(Set dst (SqrtD src)); 1578 1579 format %{ "sqrtsd $dst, $src" %} 1580 ins_cost(150); 1581 ins_encode %{ 1582 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1583 %} 1584 ins_pipe(pipe_slow); 1585 %} 1586 1587 instruct sqrtD_mem(regD dst, memory src) %{ 1588 predicate(UseSSE>=2); 1589 match(Set dst (SqrtD (LoadD src))); 1590 1591 format %{ "sqrtsd $dst, $src" %} 1592 ins_cost(150); 1593 ins_encode %{ 1594 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1595 %} 1596 ins_pipe(pipe_slow); 1597 %} 1598 1599 instruct sqrtD_imm(regD dst, immD con) %{ 1600 predicate(UseSSE>=2); 1601 match(Set dst (SqrtD con)); 1602 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1603 ins_cost(150); 1604 ins_encode %{ 1605 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1606 %} 1607 ins_pipe(pipe_slow); 1608 %} 1609 1610 1611 // ====================VECTOR INSTRUCTIONS===================================== 1612 1613 // Load vectors (4 bytes long) 1614 instruct loadV4(vecS dst, memory mem) %{ 1615 predicate(n->as_LoadVector()->memory_size() == 4); 1616 match(Set dst (LoadVector mem)); 1617 ins_cost(125); 1618 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1619 ins_encode %{ 1620 __ movdl($dst$$XMMRegister, $mem$$Address); 1621 %} 1622 ins_pipe( pipe_slow ); 1623 %} 1624 1625 // Load vectors (8 bytes long) 1626 instruct loadV8(vecD dst, memory mem) %{ 1627 predicate(n->as_LoadVector()->memory_size() == 8); 1628 match(Set dst (LoadVector mem)); 1629 ins_cost(125); 1630 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1631 ins_encode %{ 1632 __ movq($dst$$XMMRegister, $mem$$Address); 1633 %} 1634 ins_pipe( pipe_slow ); 1635 %} 1636 1637 // Load vectors (16 bytes long) 1638 instruct loadV16(vecX dst, memory mem) %{ 1639 predicate(n->as_LoadVector()->memory_size() == 16); 1640 match(Set dst (LoadVector mem)); 1641 ins_cost(125); 1642 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1643 ins_encode %{ 1644 __ movdqu($dst$$XMMRegister, $mem$$Address); 1645 %} 1646 ins_pipe( pipe_slow ); 1647 %} 1648 1649 // Load vectors (32 bytes long) 1650 instruct loadV32(vecY dst, memory mem) %{ 1651 predicate(n->as_LoadVector()->memory_size() == 32); 1652 match(Set dst (LoadVector mem)); 1653 ins_cost(125); 1654 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1655 ins_encode %{ 1656 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1657 %} 1658 ins_pipe( pipe_slow ); 1659 %} 1660 1661 // Store vectors 1662 instruct storeV4(memory mem, vecS src) %{ 1663 predicate(n->as_StoreVector()->memory_size() == 4); 1664 match(Set mem (StoreVector mem src)); 1665 ins_cost(145); 1666 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1667 ins_encode %{ 1668 __ movdl($mem$$Address, $src$$XMMRegister); 1669 %} 1670 ins_pipe( pipe_slow ); 1671 %} 1672 1673 instruct storeV8(memory mem, vecD src) %{ 1674 predicate(n->as_StoreVector()->memory_size() == 8); 1675 match(Set mem (StoreVector mem src)); 1676 ins_cost(145); 1677 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1678 ins_encode %{ 1679 __ movq($mem$$Address, $src$$XMMRegister); 1680 %} 1681 ins_pipe( pipe_slow ); 1682 %} 1683 1684 instruct storeV16(memory mem, vecX src) %{ 1685 predicate(n->as_StoreVector()->memory_size() == 16); 1686 match(Set mem (StoreVector mem src)); 1687 ins_cost(145); 1688 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1689 ins_encode %{ 1690 __ movdqu($mem$$Address, $src$$XMMRegister); 1691 %} 1692 ins_pipe( pipe_slow ); 1693 %} 1694 1695 instruct storeV32(memory mem, vecY src) %{ 1696 predicate(n->as_StoreVector()->memory_size() == 32); 1697 match(Set mem (StoreVector mem src)); 1698 ins_cost(145); 1699 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1700 ins_encode %{ 1701 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1702 %} 1703 ins_pipe( pipe_slow ); 1704 %} 1705 1706 // Replicate byte scalar to be vector 1707 instruct Repl4B(vecS dst, rRegI src) %{ 1708 predicate(n->as_Vector()->length() == 4); 1709 match(Set dst (ReplicateB src)); 1710 format %{ "movd $dst,$src\n\t" 1711 "punpcklbw $dst,$dst\n\t" 1712 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1713 ins_encode %{ 1714 __ movdl($dst$$XMMRegister, $src$$Register); 1715 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1716 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1717 %} 1718 ins_pipe( pipe_slow ); 1719 %} 1720 1721 instruct Repl8B(vecD dst, rRegI src) %{ 1722 predicate(n->as_Vector()->length() == 8); 1723 match(Set dst (ReplicateB src)); 1724 format %{ "movd $dst,$src\n\t" 1725 "punpcklbw $dst,$dst\n\t" 1726 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1727 ins_encode %{ 1728 __ movdl($dst$$XMMRegister, $src$$Register); 1729 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1730 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1731 %} 1732 ins_pipe( pipe_slow ); 1733 %} 1734 1735 instruct Repl16B(vecX dst, rRegI src) %{ 1736 predicate(n->as_Vector()->length() == 16); 1737 match(Set dst (ReplicateB src)); 1738 format %{ "movd $dst,$src\n\t" 1739 "punpcklbw $dst,$dst\n\t" 1740 "pshuflw $dst,$dst,0x00\n\t" 1741 "punpcklqdq $dst,$dst\t! replicate16B" %} 1742 ins_encode %{ 1743 __ movdl($dst$$XMMRegister, $src$$Register); 1744 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1745 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1746 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1747 %} 1748 ins_pipe( pipe_slow ); 1749 %} 1750 1751 instruct Repl32B(vecY dst, rRegI src) %{ 1752 predicate(n->as_Vector()->length() == 32); 1753 match(Set dst (ReplicateB src)); 1754 format %{ "movd $dst,$src\n\t" 1755 "punpcklbw $dst,$dst\n\t" 1756 "pshuflw $dst,$dst,0x00\n\t" 1757 "punpcklqdq $dst,$dst\n\t" 1758 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1759 ins_encode %{ 1760 __ movdl($dst$$XMMRegister, $src$$Register); 1761 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1762 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1763 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1764 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1765 %} 1766 ins_pipe( pipe_slow ); 1767 %} 1768 1769 // Replicate byte scalar immediate to be vector by loading from const table. 1770 instruct Repl4B_imm(vecS dst, immI con) %{ 1771 predicate(n->as_Vector()->length() == 4); 1772 match(Set dst (ReplicateB con)); 1773 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1774 ins_encode %{ 1775 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1776 %} 1777 ins_pipe( pipe_slow ); 1778 %} 1779 1780 instruct Repl8B_imm(vecD dst, immI con) %{ 1781 predicate(n->as_Vector()->length() == 8); 1782 match(Set dst (ReplicateB con)); 1783 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1784 ins_encode %{ 1785 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1786 %} 1787 ins_pipe( pipe_slow ); 1788 %} 1789 1790 instruct Repl16B_imm(vecX dst, immI con) %{ 1791 predicate(n->as_Vector()->length() == 16); 1792 match(Set dst (ReplicateB con)); 1793 format %{ "movq $dst,[$constantaddress]\n\t" 1794 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1795 ins_encode %{ 1796 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1797 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1798 %} 1799 ins_pipe( pipe_slow ); 1800 %} 1801 1802 instruct Repl32B_imm(vecY dst, immI con) %{ 1803 predicate(n->as_Vector()->length() == 32); 1804 match(Set dst (ReplicateB con)); 1805 format %{ "movq $dst,[$constantaddress]\n\t" 1806 "punpcklqdq $dst,$dst\n\t" 1807 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1808 ins_encode %{ 1809 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1810 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1811 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1812 %} 1813 ins_pipe( pipe_slow ); 1814 %} 1815 1816 // Replicate byte scalar zero to be vector 1817 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1818 predicate(n->as_Vector()->length() == 4); 1819 match(Set dst (ReplicateB zero)); 1820 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1821 ins_encode %{ 1822 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1823 %} 1824 ins_pipe( fpu_reg_reg ); 1825 %} 1826 1827 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1828 predicate(n->as_Vector()->length() == 8); 1829 match(Set dst (ReplicateB zero)); 1830 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1831 ins_encode %{ 1832 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1833 %} 1834 ins_pipe( fpu_reg_reg ); 1835 %} 1836 1837 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1838 predicate(n->as_Vector()->length() == 16); 1839 match(Set dst (ReplicateB zero)); 1840 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1841 ins_encode %{ 1842 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1843 %} 1844 ins_pipe( fpu_reg_reg ); 1845 %} 1846 1847 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1848 predicate(n->as_Vector()->length() == 32); 1849 match(Set dst (ReplicateB zero)); 1850 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1851 ins_encode %{ 1852 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1853 bool vector256 = true; 1854 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1855 %} 1856 ins_pipe( fpu_reg_reg ); 1857 %} 1858 1859 // Replicate char/short (2 byte) scalar to be vector 1860 instruct Repl2S(vecS dst, rRegI src) %{ 1861 predicate(n->as_Vector()->length() == 2); 1862 match(Set dst (ReplicateS src)); 1863 format %{ "movd $dst,$src\n\t" 1864 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1865 ins_encode %{ 1866 __ movdl($dst$$XMMRegister, $src$$Register); 1867 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1868 %} 1869 ins_pipe( fpu_reg_reg ); 1870 %} 1871 1872 instruct Repl4S(vecD dst, rRegI src) %{ 1873 predicate(n->as_Vector()->length() == 4); 1874 match(Set dst (ReplicateS src)); 1875 format %{ "movd $dst,$src\n\t" 1876 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1877 ins_encode %{ 1878 __ movdl($dst$$XMMRegister, $src$$Register); 1879 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1880 %} 1881 ins_pipe( fpu_reg_reg ); 1882 %} 1883 1884 instruct Repl8S(vecX dst, rRegI src) %{ 1885 predicate(n->as_Vector()->length() == 8); 1886 match(Set dst (ReplicateS src)); 1887 format %{ "movd $dst,$src\n\t" 1888 "pshuflw $dst,$dst,0x00\n\t" 1889 "punpcklqdq $dst,$dst\t! replicate8S" %} 1890 ins_encode %{ 1891 __ movdl($dst$$XMMRegister, $src$$Register); 1892 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1893 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1894 %} 1895 ins_pipe( pipe_slow ); 1896 %} 1897 1898 instruct Repl16S(vecY dst, rRegI src) %{ 1899 predicate(n->as_Vector()->length() == 16); 1900 match(Set dst (ReplicateS src)); 1901 format %{ "movd $dst,$src\n\t" 1902 "pshuflw $dst,$dst,0x00\n\t" 1903 "punpcklqdq $dst,$dst\n\t" 1904 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 1905 ins_encode %{ 1906 __ movdl($dst$$XMMRegister, $src$$Register); 1907 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1908 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1909 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1910 %} 1911 ins_pipe( pipe_slow ); 1912 %} 1913 1914 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 1915 instruct Repl2S_imm(vecS dst, immI con) %{ 1916 predicate(n->as_Vector()->length() == 2); 1917 match(Set dst (ReplicateS con)); 1918 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 1919 ins_encode %{ 1920 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 1921 %} 1922 ins_pipe( fpu_reg_reg ); 1923 %} 1924 1925 instruct Repl4S_imm(vecD dst, immI con) %{ 1926 predicate(n->as_Vector()->length() == 4); 1927 match(Set dst (ReplicateS con)); 1928 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 1929 ins_encode %{ 1930 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1931 %} 1932 ins_pipe( fpu_reg_reg ); 1933 %} 1934 1935 instruct Repl8S_imm(vecX dst, immI con) %{ 1936 predicate(n->as_Vector()->length() == 8); 1937 match(Set dst (ReplicateS con)); 1938 format %{ "movq $dst,[$constantaddress]\n\t" 1939 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 1940 ins_encode %{ 1941 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1942 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1943 %} 1944 ins_pipe( pipe_slow ); 1945 %} 1946 1947 instruct Repl16S_imm(vecY dst, immI con) %{ 1948 predicate(n->as_Vector()->length() == 16); 1949 match(Set dst (ReplicateS con)); 1950 format %{ "movq $dst,[$constantaddress]\n\t" 1951 "punpcklqdq $dst,$dst\n\t" 1952 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 1953 ins_encode %{ 1954 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1955 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1956 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1957 %} 1958 ins_pipe( pipe_slow ); 1959 %} 1960 1961 // Replicate char/short (2 byte) scalar zero to be vector 1962 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 1963 predicate(n->as_Vector()->length() == 2); 1964 match(Set dst (ReplicateS zero)); 1965 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 1966 ins_encode %{ 1967 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1968 %} 1969 ins_pipe( fpu_reg_reg ); 1970 %} 1971 1972 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 1973 predicate(n->as_Vector()->length() == 4); 1974 match(Set dst (ReplicateS zero)); 1975 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 1976 ins_encode %{ 1977 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1978 %} 1979 ins_pipe( fpu_reg_reg ); 1980 %} 1981 1982 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 1983 predicate(n->as_Vector()->length() == 8); 1984 match(Set dst (ReplicateS zero)); 1985 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 1986 ins_encode %{ 1987 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1988 %} 1989 ins_pipe( fpu_reg_reg ); 1990 %} 1991 1992 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 1993 predicate(n->as_Vector()->length() == 16); 1994 match(Set dst (ReplicateS zero)); 1995 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 1996 ins_encode %{ 1997 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1998 bool vector256 = true; 1999 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2000 %} 2001 ins_pipe( fpu_reg_reg ); 2002 %} 2003 2004 // Replicate integer (4 byte) scalar to be vector 2005 instruct Repl2I(vecD dst, rRegI src) %{ 2006 predicate(n->as_Vector()->length() == 2); 2007 match(Set dst (ReplicateI src)); 2008 format %{ "movd $dst,$src\n\t" 2009 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2010 ins_encode %{ 2011 __ movdl($dst$$XMMRegister, $src$$Register); 2012 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2013 %} 2014 ins_pipe( fpu_reg_reg ); 2015 %} 2016 2017 instruct Repl4I(vecX dst, rRegI src) %{ 2018 predicate(n->as_Vector()->length() == 4); 2019 match(Set dst (ReplicateI src)); 2020 format %{ "movd $dst,$src\n\t" 2021 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2022 ins_encode %{ 2023 __ movdl($dst$$XMMRegister, $src$$Register); 2024 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2025 %} 2026 ins_pipe( pipe_slow ); 2027 %} 2028 2029 instruct Repl8I(vecY dst, rRegI src) %{ 2030 predicate(n->as_Vector()->length() == 8); 2031 match(Set dst (ReplicateI src)); 2032 format %{ "movd $dst,$src\n\t" 2033 "pshufd $dst,$dst,0x00\n\t" 2034 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2035 ins_encode %{ 2036 __ movdl($dst$$XMMRegister, $src$$Register); 2037 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2038 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2039 %} 2040 ins_pipe( pipe_slow ); 2041 %} 2042 2043 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2044 instruct Repl2I_imm(vecD dst, immI con) %{ 2045 predicate(n->as_Vector()->length() == 2); 2046 match(Set dst (ReplicateI con)); 2047 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2048 ins_encode %{ 2049 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2050 %} 2051 ins_pipe( fpu_reg_reg ); 2052 %} 2053 2054 instruct Repl4I_imm(vecX dst, immI con) %{ 2055 predicate(n->as_Vector()->length() == 4); 2056 match(Set dst (ReplicateI con)); 2057 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2058 "punpcklqdq $dst,$dst" %} 2059 ins_encode %{ 2060 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2061 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2062 %} 2063 ins_pipe( pipe_slow ); 2064 %} 2065 2066 instruct Repl8I_imm(vecY dst, immI con) %{ 2067 predicate(n->as_Vector()->length() == 8); 2068 match(Set dst (ReplicateI con)); 2069 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2070 "punpcklqdq $dst,$dst\n\t" 2071 "vinserti128h $dst,$dst,$dst" %} 2072 ins_encode %{ 2073 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2074 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2075 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2076 %} 2077 ins_pipe( pipe_slow ); 2078 %} 2079 2080 // Integer could be loaded into xmm register directly from memory. 2081 instruct Repl2I_mem(vecD dst, memory mem) %{ 2082 predicate(n->as_Vector()->length() == 2); 2083 match(Set dst (ReplicateI (LoadI mem))); 2084 format %{ "movd $dst,$mem\n\t" 2085 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2086 ins_encode %{ 2087 __ movdl($dst$$XMMRegister, $mem$$Address); 2088 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2089 %} 2090 ins_pipe( fpu_reg_reg ); 2091 %} 2092 2093 instruct Repl4I_mem(vecX dst, memory mem) %{ 2094 predicate(n->as_Vector()->length() == 4); 2095 match(Set dst (ReplicateI (LoadI mem))); 2096 format %{ "movd $dst,$mem\n\t" 2097 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2098 ins_encode %{ 2099 __ movdl($dst$$XMMRegister, $mem$$Address); 2100 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2101 %} 2102 ins_pipe( pipe_slow ); 2103 %} 2104 2105 instruct Repl8I_mem(vecY dst, memory mem) %{ 2106 predicate(n->as_Vector()->length() == 8); 2107 match(Set dst (ReplicateI (LoadI mem))); 2108 format %{ "movd $dst,$mem\n\t" 2109 "pshufd $dst,$dst,0x00\n\t" 2110 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2111 ins_encode %{ 2112 __ movdl($dst$$XMMRegister, $mem$$Address); 2113 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2114 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2115 %} 2116 ins_pipe( pipe_slow ); 2117 %} 2118 2119 // Replicate integer (4 byte) scalar zero to be vector 2120 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2121 predicate(n->as_Vector()->length() == 2); 2122 match(Set dst (ReplicateI zero)); 2123 format %{ "pxor $dst,$dst\t! replicate2I" %} 2124 ins_encode %{ 2125 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2126 %} 2127 ins_pipe( fpu_reg_reg ); 2128 %} 2129 2130 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2131 predicate(n->as_Vector()->length() == 4); 2132 match(Set dst (ReplicateI zero)); 2133 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2134 ins_encode %{ 2135 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2136 %} 2137 ins_pipe( fpu_reg_reg ); 2138 %} 2139 2140 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2141 predicate(n->as_Vector()->length() == 8); 2142 match(Set dst (ReplicateI zero)); 2143 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2144 ins_encode %{ 2145 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2146 bool vector256 = true; 2147 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2148 %} 2149 ins_pipe( fpu_reg_reg ); 2150 %} 2151 2152 // Replicate long (8 byte) scalar to be vector 2153 #ifdef _LP64 2154 instruct Repl2L(vecX dst, rRegL src) %{ 2155 predicate(n->as_Vector()->length() == 2); 2156 match(Set dst (ReplicateL src)); 2157 format %{ "movdq $dst,$src\n\t" 2158 "punpcklqdq $dst,$dst\t! replicate2L" %} 2159 ins_encode %{ 2160 __ movdq($dst$$XMMRegister, $src$$Register); 2161 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2162 %} 2163 ins_pipe( pipe_slow ); 2164 %} 2165 2166 instruct Repl4L(vecY dst, rRegL src) %{ 2167 predicate(n->as_Vector()->length() == 4); 2168 match(Set dst (ReplicateL src)); 2169 format %{ "movdq $dst,$src\n\t" 2170 "punpcklqdq $dst,$dst\n\t" 2171 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2172 ins_encode %{ 2173 __ movdq($dst$$XMMRegister, $src$$Register); 2174 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2175 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2176 %} 2177 ins_pipe( pipe_slow ); 2178 %} 2179 #else // _LP64 2180 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2181 predicate(n->as_Vector()->length() == 2); 2182 match(Set dst (ReplicateL src)); 2183 effect(TEMP dst, USE src, TEMP tmp); 2184 format %{ "movdl $dst,$src.lo\n\t" 2185 "movdl $tmp,$src.hi\n\t" 2186 "punpckldq $dst,$tmp\n\t" 2187 "punpcklqdq $dst,$dst\t! replicate2L"%} 2188 ins_encode %{ 2189 __ movdl($dst$$XMMRegister, $src$$Register); 2190 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2191 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2192 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2193 %} 2194 ins_pipe( pipe_slow ); 2195 %} 2196 2197 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2198 predicate(n->as_Vector()->length() == 4); 2199 match(Set dst (ReplicateL src)); 2200 effect(TEMP dst, USE src, TEMP tmp); 2201 format %{ "movdl $dst,$src.lo\n\t" 2202 "movdl $tmp,$src.hi\n\t" 2203 "punpckldq $dst,$tmp\n\t" 2204 "punpcklqdq $dst,$dst\n\t" 2205 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2206 ins_encode %{ 2207 __ movdl($dst$$XMMRegister, $src$$Register); 2208 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2209 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2210 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2211 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2212 %} 2213 ins_pipe( pipe_slow ); 2214 %} 2215 #endif // _LP64 2216 2217 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2218 instruct Repl2L_imm(vecX dst, immL con) %{ 2219 predicate(n->as_Vector()->length() == 2); 2220 match(Set dst (ReplicateL con)); 2221 format %{ "movq $dst,[$constantaddress]\n\t" 2222 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2223 ins_encode %{ 2224 __ movq($dst$$XMMRegister, $constantaddress($con)); 2225 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2226 %} 2227 ins_pipe( pipe_slow ); 2228 %} 2229 2230 instruct Repl4L_imm(vecY dst, immL con) %{ 2231 predicate(n->as_Vector()->length() == 4); 2232 match(Set dst (ReplicateL con)); 2233 format %{ "movq $dst,[$constantaddress]\n\t" 2234 "punpcklqdq $dst,$dst\n\t" 2235 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2236 ins_encode %{ 2237 __ movq($dst$$XMMRegister, $constantaddress($con)); 2238 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2239 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2240 %} 2241 ins_pipe( pipe_slow ); 2242 %} 2243 2244 // Long could be loaded into xmm register directly from memory. 2245 instruct Repl2L_mem(vecX dst, memory mem) %{ 2246 predicate(n->as_Vector()->length() == 2); 2247 match(Set dst (ReplicateL (LoadL mem))); 2248 format %{ "movq $dst,$mem\n\t" 2249 "punpcklqdq $dst,$dst\t! replicate2L" %} 2250 ins_encode %{ 2251 __ movq($dst$$XMMRegister, $mem$$Address); 2252 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2253 %} 2254 ins_pipe( pipe_slow ); 2255 %} 2256 2257 instruct Repl4L_mem(vecY dst, memory mem) %{ 2258 predicate(n->as_Vector()->length() == 4); 2259 match(Set dst (ReplicateL (LoadL mem))); 2260 format %{ "movq $dst,$mem\n\t" 2261 "punpcklqdq $dst,$dst\n\t" 2262 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2263 ins_encode %{ 2264 __ movq($dst$$XMMRegister, $mem$$Address); 2265 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2266 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2267 %} 2268 ins_pipe( pipe_slow ); 2269 %} 2270 2271 // Replicate long (8 byte) scalar zero to be vector 2272 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2273 predicate(n->as_Vector()->length() == 2); 2274 match(Set dst (ReplicateL zero)); 2275 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2276 ins_encode %{ 2277 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2278 %} 2279 ins_pipe( fpu_reg_reg ); 2280 %} 2281 2282 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2283 predicate(n->as_Vector()->length() == 4); 2284 match(Set dst (ReplicateL zero)); 2285 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2286 ins_encode %{ 2287 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2288 bool vector256 = true; 2289 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2290 %} 2291 ins_pipe( fpu_reg_reg ); 2292 %} 2293 2294 // Replicate float (4 byte) scalar to be vector 2295 instruct Repl2F(vecD dst, regF src) %{ 2296 predicate(n->as_Vector()->length() == 2); 2297 match(Set dst (ReplicateF src)); 2298 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2299 ins_encode %{ 2300 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2301 %} 2302 ins_pipe( fpu_reg_reg ); 2303 %} 2304 2305 instruct Repl4F(vecX dst, regF src) %{ 2306 predicate(n->as_Vector()->length() == 4); 2307 match(Set dst (ReplicateF src)); 2308 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2309 ins_encode %{ 2310 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2311 %} 2312 ins_pipe( pipe_slow ); 2313 %} 2314 2315 instruct Repl8F(vecY dst, regF src) %{ 2316 predicate(n->as_Vector()->length() == 8); 2317 match(Set dst (ReplicateF src)); 2318 format %{ "pshufd $dst,$src,0x00\n\t" 2319 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2320 ins_encode %{ 2321 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2322 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2323 %} 2324 ins_pipe( pipe_slow ); 2325 %} 2326 2327 // Replicate float (4 byte) scalar zero to be vector 2328 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2329 predicate(n->as_Vector()->length() == 2); 2330 match(Set dst (ReplicateF zero)); 2331 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2332 ins_encode %{ 2333 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2334 %} 2335 ins_pipe( fpu_reg_reg ); 2336 %} 2337 2338 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2339 predicate(n->as_Vector()->length() == 4); 2340 match(Set dst (ReplicateF zero)); 2341 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2342 ins_encode %{ 2343 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2344 %} 2345 ins_pipe( fpu_reg_reg ); 2346 %} 2347 2348 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2349 predicate(n->as_Vector()->length() == 8); 2350 match(Set dst (ReplicateF zero)); 2351 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2352 ins_encode %{ 2353 bool vector256 = true; 2354 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2355 %} 2356 ins_pipe( fpu_reg_reg ); 2357 %} 2358 2359 // Replicate double (8 bytes) scalar to be vector 2360 instruct Repl2D(vecX dst, regD src) %{ 2361 predicate(n->as_Vector()->length() == 2); 2362 match(Set dst (ReplicateD src)); 2363 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2364 ins_encode %{ 2365 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2366 %} 2367 ins_pipe( pipe_slow ); 2368 %} 2369 2370 instruct Repl4D(vecY dst, regD src) %{ 2371 predicate(n->as_Vector()->length() == 4); 2372 match(Set dst (ReplicateD src)); 2373 format %{ "pshufd $dst,$src,0x44\n\t" 2374 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2375 ins_encode %{ 2376 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2377 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2378 %} 2379 ins_pipe( pipe_slow ); 2380 %} 2381 2382 // Replicate double (8 byte) scalar zero to be vector 2383 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2384 predicate(n->as_Vector()->length() == 2); 2385 match(Set dst (ReplicateD zero)); 2386 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2387 ins_encode %{ 2388 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2389 %} 2390 ins_pipe( fpu_reg_reg ); 2391 %} 2392 2393 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2394 predicate(n->as_Vector()->length() == 4); 2395 match(Set dst (ReplicateD zero)); 2396 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2397 ins_encode %{ 2398 bool vector256 = true; 2399 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2400 %} 2401 ins_pipe( fpu_reg_reg ); 2402 %} 2403 2404 // ====================VECTOR ARITHMETIC======================================= 2405 2406 // --------------------------------- ADD -------------------------------------- 2407 2408 // Bytes vector add 2409 instruct vadd4B(vecS dst, vecS src) %{ 2410 predicate(n->as_Vector()->length() == 4); 2411 match(Set dst (AddVB dst src)); 2412 format %{ "paddb $dst,$src\t! add packed4B" %} 2413 ins_encode %{ 2414 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2415 %} 2416 ins_pipe( pipe_slow ); 2417 %} 2418 2419 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 2420 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2421 match(Set dst (AddVB src1 src2)); 2422 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 2423 ins_encode %{ 2424 bool vector256 = false; 2425 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2426 %} 2427 ins_pipe( pipe_slow ); 2428 %} 2429 2430 instruct vadd8B(vecD dst, vecD src) %{ 2431 predicate(n->as_Vector()->length() == 8); 2432 match(Set dst (AddVB dst src)); 2433 format %{ "paddb $dst,$src\t! add packed8B" %} 2434 ins_encode %{ 2435 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2436 %} 2437 ins_pipe( pipe_slow ); 2438 %} 2439 2440 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 2441 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2442 match(Set dst (AddVB src1 src2)); 2443 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 2444 ins_encode %{ 2445 bool vector256 = false; 2446 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2447 %} 2448 ins_pipe( pipe_slow ); 2449 %} 2450 2451 instruct vadd16B(vecX dst, vecX src) %{ 2452 predicate(n->as_Vector()->length() == 16); 2453 match(Set dst (AddVB dst src)); 2454 format %{ "paddb $dst,$src\t! add packed16B" %} 2455 ins_encode %{ 2456 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2457 %} 2458 ins_pipe( pipe_slow ); 2459 %} 2460 2461 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 2462 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2463 match(Set dst (AddVB src1 src2)); 2464 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 2465 ins_encode %{ 2466 bool vector256 = false; 2467 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2468 %} 2469 ins_pipe( pipe_slow ); 2470 %} 2471 2472 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 2473 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2474 match(Set dst (AddVB src (LoadVector mem))); 2475 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 2476 ins_encode %{ 2477 bool vector256 = false; 2478 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2479 %} 2480 ins_pipe( pipe_slow ); 2481 %} 2482 2483 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 2484 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2485 match(Set dst (AddVB src1 src2)); 2486 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 2487 ins_encode %{ 2488 bool vector256 = true; 2489 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2490 %} 2491 ins_pipe( pipe_slow ); 2492 %} 2493 2494 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 2495 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2496 match(Set dst (AddVB src (LoadVector mem))); 2497 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 2498 ins_encode %{ 2499 bool vector256 = true; 2500 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2501 %} 2502 ins_pipe( pipe_slow ); 2503 %} 2504 2505 // Shorts/Chars vector add 2506 instruct vadd2S(vecS dst, vecS src) %{ 2507 predicate(n->as_Vector()->length() == 2); 2508 match(Set dst (AddVS dst src)); 2509 format %{ "paddw $dst,$src\t! add packed2S" %} 2510 ins_encode %{ 2511 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2512 %} 2513 ins_pipe( pipe_slow ); 2514 %} 2515 2516 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 2517 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2518 match(Set dst (AddVS src1 src2)); 2519 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 2520 ins_encode %{ 2521 bool vector256 = false; 2522 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2523 %} 2524 ins_pipe( pipe_slow ); 2525 %} 2526 2527 instruct vadd4S(vecD dst, vecD src) %{ 2528 predicate(n->as_Vector()->length() == 4); 2529 match(Set dst (AddVS dst src)); 2530 format %{ "paddw $dst,$src\t! add packed4S" %} 2531 ins_encode %{ 2532 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2533 %} 2534 ins_pipe( pipe_slow ); 2535 %} 2536 2537 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 2538 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2539 match(Set dst (AddVS src1 src2)); 2540 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 2541 ins_encode %{ 2542 bool vector256 = false; 2543 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2544 %} 2545 ins_pipe( pipe_slow ); 2546 %} 2547 2548 instruct vadd8S(vecX dst, vecX src) %{ 2549 predicate(n->as_Vector()->length() == 8); 2550 match(Set dst (AddVS dst src)); 2551 format %{ "paddw $dst,$src\t! add packed8S" %} 2552 ins_encode %{ 2553 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2554 %} 2555 ins_pipe( pipe_slow ); 2556 %} 2557 2558 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 2559 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2560 match(Set dst (AddVS src1 src2)); 2561 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 2562 ins_encode %{ 2563 bool vector256 = false; 2564 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2565 %} 2566 ins_pipe( pipe_slow ); 2567 %} 2568 2569 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 2570 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2571 match(Set dst (AddVS src (LoadVector mem))); 2572 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 2573 ins_encode %{ 2574 bool vector256 = false; 2575 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2576 %} 2577 ins_pipe( pipe_slow ); 2578 %} 2579 2580 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 2581 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2582 match(Set dst (AddVS src1 src2)); 2583 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 2584 ins_encode %{ 2585 bool vector256 = true; 2586 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2587 %} 2588 ins_pipe( pipe_slow ); 2589 %} 2590 2591 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 2592 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2593 match(Set dst (AddVS src (LoadVector mem))); 2594 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 2595 ins_encode %{ 2596 bool vector256 = true; 2597 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2598 %} 2599 ins_pipe( pipe_slow ); 2600 %} 2601 2602 // Integers vector add 2603 instruct vadd2I(vecD dst, vecD src) %{ 2604 predicate(n->as_Vector()->length() == 2); 2605 match(Set dst (AddVI dst src)); 2606 format %{ "paddd $dst,$src\t! add packed2I" %} 2607 ins_encode %{ 2608 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2609 %} 2610 ins_pipe( pipe_slow ); 2611 %} 2612 2613 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 2614 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2615 match(Set dst (AddVI src1 src2)); 2616 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 2617 ins_encode %{ 2618 bool vector256 = false; 2619 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2620 %} 2621 ins_pipe( pipe_slow ); 2622 %} 2623 2624 instruct vadd4I(vecX dst, vecX src) %{ 2625 predicate(n->as_Vector()->length() == 4); 2626 match(Set dst (AddVI dst src)); 2627 format %{ "paddd $dst,$src\t! add packed4I" %} 2628 ins_encode %{ 2629 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2630 %} 2631 ins_pipe( pipe_slow ); 2632 %} 2633 2634 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 2635 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2636 match(Set dst (AddVI src1 src2)); 2637 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 2638 ins_encode %{ 2639 bool vector256 = false; 2640 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2641 %} 2642 ins_pipe( pipe_slow ); 2643 %} 2644 2645 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 2646 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2647 match(Set dst (AddVI src (LoadVector mem))); 2648 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 2649 ins_encode %{ 2650 bool vector256 = false; 2651 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2652 %} 2653 ins_pipe( pipe_slow ); 2654 %} 2655 2656 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 2657 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2658 match(Set dst (AddVI src1 src2)); 2659 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 2660 ins_encode %{ 2661 bool vector256 = true; 2662 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2663 %} 2664 ins_pipe( pipe_slow ); 2665 %} 2666 2667 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 2668 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2669 match(Set dst (AddVI src (LoadVector mem))); 2670 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 2671 ins_encode %{ 2672 bool vector256 = true; 2673 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2674 %} 2675 ins_pipe( pipe_slow ); 2676 %} 2677 2678 // Longs vector add 2679 instruct vadd2L(vecX dst, vecX src) %{ 2680 predicate(n->as_Vector()->length() == 2); 2681 match(Set dst (AddVL dst src)); 2682 format %{ "paddq $dst,$src\t! add packed2L" %} 2683 ins_encode %{ 2684 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 2685 %} 2686 ins_pipe( pipe_slow ); 2687 %} 2688 2689 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 2690 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2691 match(Set dst (AddVL src1 src2)); 2692 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 2693 ins_encode %{ 2694 bool vector256 = false; 2695 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2696 %} 2697 ins_pipe( pipe_slow ); 2698 %} 2699 2700 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 2701 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2702 match(Set dst (AddVL src (LoadVector mem))); 2703 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 2704 ins_encode %{ 2705 bool vector256 = false; 2706 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2707 %} 2708 ins_pipe( pipe_slow ); 2709 %} 2710 2711 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 2712 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2713 match(Set dst (AddVL src1 src2)); 2714 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 2715 ins_encode %{ 2716 bool vector256 = true; 2717 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2718 %} 2719 ins_pipe( pipe_slow ); 2720 %} 2721 2722 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 2723 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2724 match(Set dst (AddVL src (LoadVector mem))); 2725 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 2726 ins_encode %{ 2727 bool vector256 = true; 2728 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2729 %} 2730 ins_pipe( pipe_slow ); 2731 %} 2732 2733 // Floats vector add 2734 instruct vadd2F(vecD dst, vecD src) %{ 2735 predicate(n->as_Vector()->length() == 2); 2736 match(Set dst (AddVF dst src)); 2737 format %{ "addps $dst,$src\t! add packed2F" %} 2738 ins_encode %{ 2739 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2740 %} 2741 ins_pipe( pipe_slow ); 2742 %} 2743 2744 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 2745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2746 match(Set dst (AddVF src1 src2)); 2747 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 2748 ins_encode %{ 2749 bool vector256 = false; 2750 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2751 %} 2752 ins_pipe( pipe_slow ); 2753 %} 2754 2755 instruct vadd4F(vecX dst, vecX src) %{ 2756 predicate(n->as_Vector()->length() == 4); 2757 match(Set dst (AddVF dst src)); 2758 format %{ "addps $dst,$src\t! add packed4F" %} 2759 ins_encode %{ 2760 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2761 %} 2762 ins_pipe( pipe_slow ); 2763 %} 2764 2765 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 2766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2767 match(Set dst (AddVF src1 src2)); 2768 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 2769 ins_encode %{ 2770 bool vector256 = false; 2771 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2772 %} 2773 ins_pipe( pipe_slow ); 2774 %} 2775 2776 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 2777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2778 match(Set dst (AddVF src (LoadVector mem))); 2779 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 2780 ins_encode %{ 2781 bool vector256 = false; 2782 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2783 %} 2784 ins_pipe( pipe_slow ); 2785 %} 2786 2787 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 2788 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2789 match(Set dst (AddVF src1 src2)); 2790 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 2791 ins_encode %{ 2792 bool vector256 = true; 2793 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2794 %} 2795 ins_pipe( pipe_slow ); 2796 %} 2797 2798 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 2799 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2800 match(Set dst (AddVF src (LoadVector mem))); 2801 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 2802 ins_encode %{ 2803 bool vector256 = true; 2804 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2805 %} 2806 ins_pipe( pipe_slow ); 2807 %} 2808 2809 // Doubles vector add 2810 instruct vadd2D(vecX dst, vecX src) %{ 2811 predicate(n->as_Vector()->length() == 2); 2812 match(Set dst (AddVD dst src)); 2813 format %{ "addpd $dst,$src\t! add packed2D" %} 2814 ins_encode %{ 2815 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 2816 %} 2817 ins_pipe( pipe_slow ); 2818 %} 2819 2820 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 2821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2822 match(Set dst (AddVD src1 src2)); 2823 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 2824 ins_encode %{ 2825 bool vector256 = false; 2826 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2827 %} 2828 ins_pipe( pipe_slow ); 2829 %} 2830 2831 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 2832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2833 match(Set dst (AddVD src (LoadVector mem))); 2834 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 2835 ins_encode %{ 2836 bool vector256 = false; 2837 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2838 %} 2839 ins_pipe( pipe_slow ); 2840 %} 2841 2842 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 2843 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2844 match(Set dst (AddVD src1 src2)); 2845 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 2846 ins_encode %{ 2847 bool vector256 = true; 2848 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2849 %} 2850 ins_pipe( pipe_slow ); 2851 %} 2852 2853 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 2854 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2855 match(Set dst (AddVD src (LoadVector mem))); 2856 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 2857 ins_encode %{ 2858 bool vector256 = true; 2859 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2860 %} 2861 ins_pipe( pipe_slow ); 2862 %} 2863 2864 // --------------------------------- SUB -------------------------------------- 2865 2866 // Bytes vector sub 2867 instruct vsub4B(vecS dst, vecS src) %{ 2868 predicate(n->as_Vector()->length() == 4); 2869 match(Set dst (SubVB dst src)); 2870 format %{ "psubb $dst,$src\t! sub packed4B" %} 2871 ins_encode %{ 2872 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2873 %} 2874 ins_pipe( pipe_slow ); 2875 %} 2876 2877 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 2878 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2879 match(Set dst (SubVB src1 src2)); 2880 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 2881 ins_encode %{ 2882 bool vector256 = false; 2883 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2884 %} 2885 ins_pipe( pipe_slow ); 2886 %} 2887 2888 instruct vsub8B(vecD dst, vecD src) %{ 2889 predicate(n->as_Vector()->length() == 8); 2890 match(Set dst (SubVB dst src)); 2891 format %{ "psubb $dst,$src\t! sub packed8B" %} 2892 ins_encode %{ 2893 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2894 %} 2895 ins_pipe( pipe_slow ); 2896 %} 2897 2898 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 2899 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2900 match(Set dst (SubVB src1 src2)); 2901 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 2902 ins_encode %{ 2903 bool vector256 = false; 2904 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2905 %} 2906 ins_pipe( pipe_slow ); 2907 %} 2908 2909 instruct vsub16B(vecX dst, vecX src) %{ 2910 predicate(n->as_Vector()->length() == 16); 2911 match(Set dst (SubVB dst src)); 2912 format %{ "psubb $dst,$src\t! sub packed16B" %} 2913 ins_encode %{ 2914 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2915 %} 2916 ins_pipe( pipe_slow ); 2917 %} 2918 2919 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 2920 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2921 match(Set dst (SubVB src1 src2)); 2922 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 2923 ins_encode %{ 2924 bool vector256 = false; 2925 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2926 %} 2927 ins_pipe( pipe_slow ); 2928 %} 2929 2930 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 2931 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2932 match(Set dst (SubVB src (LoadVector mem))); 2933 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 2934 ins_encode %{ 2935 bool vector256 = false; 2936 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2937 %} 2938 ins_pipe( pipe_slow ); 2939 %} 2940 2941 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 2942 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2943 match(Set dst (SubVB src1 src2)); 2944 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 2945 ins_encode %{ 2946 bool vector256 = true; 2947 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2948 %} 2949 ins_pipe( pipe_slow ); 2950 %} 2951 2952 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 2953 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2954 match(Set dst (SubVB src (LoadVector mem))); 2955 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 2956 ins_encode %{ 2957 bool vector256 = true; 2958 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2959 %} 2960 ins_pipe( pipe_slow ); 2961 %} 2962 2963 // Shorts/Chars vector sub 2964 instruct vsub2S(vecS dst, vecS src) %{ 2965 predicate(n->as_Vector()->length() == 2); 2966 match(Set dst (SubVS dst src)); 2967 format %{ "psubw $dst,$src\t! sub packed2S" %} 2968 ins_encode %{ 2969 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 2970 %} 2971 ins_pipe( pipe_slow ); 2972 %} 2973 2974 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 2975 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2976 match(Set dst (SubVS src1 src2)); 2977 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 2978 ins_encode %{ 2979 bool vector256 = false; 2980 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2981 %} 2982 ins_pipe( pipe_slow ); 2983 %} 2984 2985 instruct vsub4S(vecD dst, vecD src) %{ 2986 predicate(n->as_Vector()->length() == 4); 2987 match(Set dst (SubVS dst src)); 2988 format %{ "psubw $dst,$src\t! sub packed4S" %} 2989 ins_encode %{ 2990 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 2991 %} 2992 ins_pipe( pipe_slow ); 2993 %} 2994 2995 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 2996 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2997 match(Set dst (SubVS src1 src2)); 2998 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 2999 ins_encode %{ 3000 bool vector256 = false; 3001 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3002 %} 3003 ins_pipe( pipe_slow ); 3004 %} 3005 3006 instruct vsub8S(vecX dst, vecX src) %{ 3007 predicate(n->as_Vector()->length() == 8); 3008 match(Set dst (SubVS dst src)); 3009 format %{ "psubw $dst,$src\t! sub packed8S" %} 3010 ins_encode %{ 3011 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3012 %} 3013 ins_pipe( pipe_slow ); 3014 %} 3015 3016 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3017 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3018 match(Set dst (SubVS src1 src2)); 3019 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3020 ins_encode %{ 3021 bool vector256 = false; 3022 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3023 %} 3024 ins_pipe( pipe_slow ); 3025 %} 3026 3027 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3028 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3029 match(Set dst (SubVS src (LoadVector mem))); 3030 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3031 ins_encode %{ 3032 bool vector256 = false; 3033 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3034 %} 3035 ins_pipe( pipe_slow ); 3036 %} 3037 3038 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3039 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3040 match(Set dst (SubVS src1 src2)); 3041 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3042 ins_encode %{ 3043 bool vector256 = true; 3044 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3045 %} 3046 ins_pipe( pipe_slow ); 3047 %} 3048 3049 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3050 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3051 match(Set dst (SubVS src (LoadVector mem))); 3052 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3053 ins_encode %{ 3054 bool vector256 = true; 3055 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3056 %} 3057 ins_pipe( pipe_slow ); 3058 %} 3059 3060 // Integers vector sub 3061 instruct vsub2I(vecD dst, vecD src) %{ 3062 predicate(n->as_Vector()->length() == 2); 3063 match(Set dst (SubVI dst src)); 3064 format %{ "psubd $dst,$src\t! sub packed2I" %} 3065 ins_encode %{ 3066 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3067 %} 3068 ins_pipe( pipe_slow ); 3069 %} 3070 3071 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3072 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3073 match(Set dst (SubVI src1 src2)); 3074 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3075 ins_encode %{ 3076 bool vector256 = false; 3077 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3078 %} 3079 ins_pipe( pipe_slow ); 3080 %} 3081 3082 instruct vsub4I(vecX dst, vecX src) %{ 3083 predicate(n->as_Vector()->length() == 4); 3084 match(Set dst (SubVI dst src)); 3085 format %{ "psubd $dst,$src\t! sub packed4I" %} 3086 ins_encode %{ 3087 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3088 %} 3089 ins_pipe( pipe_slow ); 3090 %} 3091 3092 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3093 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3094 match(Set dst (SubVI src1 src2)); 3095 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3096 ins_encode %{ 3097 bool vector256 = false; 3098 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3099 %} 3100 ins_pipe( pipe_slow ); 3101 %} 3102 3103 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3104 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3105 match(Set dst (SubVI src (LoadVector mem))); 3106 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3107 ins_encode %{ 3108 bool vector256 = false; 3109 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3110 %} 3111 ins_pipe( pipe_slow ); 3112 %} 3113 3114 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3115 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3116 match(Set dst (SubVI src1 src2)); 3117 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3118 ins_encode %{ 3119 bool vector256 = true; 3120 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3121 %} 3122 ins_pipe( pipe_slow ); 3123 %} 3124 3125 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3126 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3127 match(Set dst (SubVI src (LoadVector mem))); 3128 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3129 ins_encode %{ 3130 bool vector256 = true; 3131 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3132 %} 3133 ins_pipe( pipe_slow ); 3134 %} 3135 3136 // Longs vector sub 3137 instruct vsub2L(vecX dst, vecX src) %{ 3138 predicate(n->as_Vector()->length() == 2); 3139 match(Set dst (SubVL dst src)); 3140 format %{ "psubq $dst,$src\t! sub packed2L" %} 3141 ins_encode %{ 3142 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3143 %} 3144 ins_pipe( pipe_slow ); 3145 %} 3146 3147 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3148 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3149 match(Set dst (SubVL src1 src2)); 3150 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3151 ins_encode %{ 3152 bool vector256 = false; 3153 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3154 %} 3155 ins_pipe( pipe_slow ); 3156 %} 3157 3158 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3159 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3160 match(Set dst (SubVL src (LoadVector mem))); 3161 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3162 ins_encode %{ 3163 bool vector256 = false; 3164 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3165 %} 3166 ins_pipe( pipe_slow ); 3167 %} 3168 3169 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3170 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3171 match(Set dst (SubVL src1 src2)); 3172 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3173 ins_encode %{ 3174 bool vector256 = true; 3175 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3176 %} 3177 ins_pipe( pipe_slow ); 3178 %} 3179 3180 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3181 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3182 match(Set dst (SubVL src (LoadVector mem))); 3183 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3184 ins_encode %{ 3185 bool vector256 = true; 3186 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3187 %} 3188 ins_pipe( pipe_slow ); 3189 %} 3190 3191 // Floats vector sub 3192 instruct vsub2F(vecD dst, vecD src) %{ 3193 predicate(n->as_Vector()->length() == 2); 3194 match(Set dst (SubVF dst src)); 3195 format %{ "subps $dst,$src\t! sub packed2F" %} 3196 ins_encode %{ 3197 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3198 %} 3199 ins_pipe( pipe_slow ); 3200 %} 3201 3202 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3204 match(Set dst (SubVF src1 src2)); 3205 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3206 ins_encode %{ 3207 bool vector256 = false; 3208 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3209 %} 3210 ins_pipe( pipe_slow ); 3211 %} 3212 3213 instruct vsub4F(vecX dst, vecX src) %{ 3214 predicate(n->as_Vector()->length() == 4); 3215 match(Set dst (SubVF dst src)); 3216 format %{ "subps $dst,$src\t! sub packed4F" %} 3217 ins_encode %{ 3218 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3219 %} 3220 ins_pipe( pipe_slow ); 3221 %} 3222 3223 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3225 match(Set dst (SubVF src1 src2)); 3226 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3227 ins_encode %{ 3228 bool vector256 = false; 3229 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3230 %} 3231 ins_pipe( pipe_slow ); 3232 %} 3233 3234 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3235 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3236 match(Set dst (SubVF src (LoadVector mem))); 3237 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3238 ins_encode %{ 3239 bool vector256 = false; 3240 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3241 %} 3242 ins_pipe( pipe_slow ); 3243 %} 3244 3245 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3246 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3247 match(Set dst (SubVF src1 src2)); 3248 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3249 ins_encode %{ 3250 bool vector256 = true; 3251 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3252 %} 3253 ins_pipe( pipe_slow ); 3254 %} 3255 3256 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3257 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3258 match(Set dst (SubVF src (LoadVector mem))); 3259 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3260 ins_encode %{ 3261 bool vector256 = true; 3262 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3263 %} 3264 ins_pipe( pipe_slow ); 3265 %} 3266 3267 // Doubles vector sub 3268 instruct vsub2D(vecX dst, vecX src) %{ 3269 predicate(n->as_Vector()->length() == 2); 3270 match(Set dst (SubVD dst src)); 3271 format %{ "subpd $dst,$src\t! sub packed2D" %} 3272 ins_encode %{ 3273 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3280 match(Set dst (SubVD src1 src2)); 3281 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3282 ins_encode %{ 3283 bool vector256 = false; 3284 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3285 %} 3286 ins_pipe( pipe_slow ); 3287 %} 3288 3289 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3290 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3291 match(Set dst (SubVD src (LoadVector mem))); 3292 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3293 ins_encode %{ 3294 bool vector256 = false; 3295 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3296 %} 3297 ins_pipe( pipe_slow ); 3298 %} 3299 3300 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 3301 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3302 match(Set dst (SubVD src1 src2)); 3303 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 3304 ins_encode %{ 3305 bool vector256 = true; 3306 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3307 %} 3308 ins_pipe( pipe_slow ); 3309 %} 3310 3311 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 3312 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3313 match(Set dst (SubVD src (LoadVector mem))); 3314 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 3315 ins_encode %{ 3316 bool vector256 = true; 3317 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3318 %} 3319 ins_pipe( pipe_slow ); 3320 %} 3321 3322 // --------------------------------- MUL -------------------------------------- 3323 3324 // Shorts/Chars vector mul 3325 instruct vmul2S(vecS dst, vecS src) %{ 3326 predicate(n->as_Vector()->length() == 2); 3327 match(Set dst (MulVS dst src)); 3328 format %{ "pmullw $dst,$src\t! mul packed2S" %} 3329 ins_encode %{ 3330 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3331 %} 3332 ins_pipe( pipe_slow ); 3333 %} 3334 3335 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 3336 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3337 match(Set dst (MulVS src1 src2)); 3338 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 3339 ins_encode %{ 3340 bool vector256 = false; 3341 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3342 %} 3343 ins_pipe( pipe_slow ); 3344 %} 3345 3346 instruct vmul4S(vecD dst, vecD src) %{ 3347 predicate(n->as_Vector()->length() == 4); 3348 match(Set dst (MulVS dst src)); 3349 format %{ "pmullw $dst,$src\t! mul packed4S" %} 3350 ins_encode %{ 3351 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 3357 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3358 match(Set dst (MulVS src1 src2)); 3359 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 3360 ins_encode %{ 3361 bool vector256 = false; 3362 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3363 %} 3364 ins_pipe( pipe_slow ); 3365 %} 3366 3367 instruct vmul8S(vecX dst, vecX src) %{ 3368 predicate(n->as_Vector()->length() == 8); 3369 match(Set dst (MulVS dst src)); 3370 format %{ "pmullw $dst,$src\t! mul packed8S" %} 3371 ins_encode %{ 3372 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3373 %} 3374 ins_pipe( pipe_slow ); 3375 %} 3376 3377 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 3378 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3379 match(Set dst (MulVS src1 src2)); 3380 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 3381 ins_encode %{ 3382 bool vector256 = false; 3383 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3384 %} 3385 ins_pipe( pipe_slow ); 3386 %} 3387 3388 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 3389 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3390 match(Set dst (MulVS src (LoadVector mem))); 3391 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 3392 ins_encode %{ 3393 bool vector256 = false; 3394 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3395 %} 3396 ins_pipe( pipe_slow ); 3397 %} 3398 3399 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 3400 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3401 match(Set dst (MulVS src1 src2)); 3402 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 3403 ins_encode %{ 3404 bool vector256 = true; 3405 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3406 %} 3407 ins_pipe( pipe_slow ); 3408 %} 3409 3410 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 3411 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3412 match(Set dst (MulVS src (LoadVector mem))); 3413 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 3414 ins_encode %{ 3415 bool vector256 = true; 3416 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3417 %} 3418 ins_pipe( pipe_slow ); 3419 %} 3420 3421 // Integers vector mul (sse4_1) 3422 instruct vmul2I(vecD dst, vecD src) %{ 3423 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 3424 match(Set dst (MulVI dst src)); 3425 format %{ "pmulld $dst,$src\t! mul packed2I" %} 3426 ins_encode %{ 3427 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3428 %} 3429 ins_pipe( pipe_slow ); 3430 %} 3431 3432 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 3433 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3434 match(Set dst (MulVI src1 src2)); 3435 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 3436 ins_encode %{ 3437 bool vector256 = false; 3438 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3439 %} 3440 ins_pipe( pipe_slow ); 3441 %} 3442 3443 instruct vmul4I(vecX dst, vecX src) %{ 3444 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 3445 match(Set dst (MulVI dst src)); 3446 format %{ "pmulld $dst,$src\t! mul packed4I" %} 3447 ins_encode %{ 3448 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3449 %} 3450 ins_pipe( pipe_slow ); 3451 %} 3452 3453 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 3454 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3455 match(Set dst (MulVI src1 src2)); 3456 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 3457 ins_encode %{ 3458 bool vector256 = false; 3459 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3460 %} 3461 ins_pipe( pipe_slow ); 3462 %} 3463 3464 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 3465 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3466 match(Set dst (MulVI src (LoadVector mem))); 3467 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 3468 ins_encode %{ 3469 bool vector256 = false; 3470 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3471 %} 3472 ins_pipe( pipe_slow ); 3473 %} 3474 3475 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 3476 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3477 match(Set dst (MulVI src1 src2)); 3478 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 3479 ins_encode %{ 3480 bool vector256 = true; 3481 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3482 %} 3483 ins_pipe( pipe_slow ); 3484 %} 3485 3486 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 3487 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3488 match(Set dst (MulVI src (LoadVector mem))); 3489 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 3490 ins_encode %{ 3491 bool vector256 = true; 3492 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3493 %} 3494 ins_pipe( pipe_slow ); 3495 %} 3496 3497 // Floats vector mul 3498 instruct vmul2F(vecD dst, vecD src) %{ 3499 predicate(n->as_Vector()->length() == 2); 3500 match(Set dst (MulVF dst src)); 3501 format %{ "mulps $dst,$src\t! mul packed2F" %} 3502 ins_encode %{ 3503 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3504 %} 3505 ins_pipe( pipe_slow ); 3506 %} 3507 3508 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 3509 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3510 match(Set dst (MulVF src1 src2)); 3511 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 3512 ins_encode %{ 3513 bool vector256 = false; 3514 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3515 %} 3516 ins_pipe( pipe_slow ); 3517 %} 3518 3519 instruct vmul4F(vecX dst, vecX src) %{ 3520 predicate(n->as_Vector()->length() == 4); 3521 match(Set dst (MulVF dst src)); 3522 format %{ "mulps $dst,$src\t! mul packed4F" %} 3523 ins_encode %{ 3524 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3525 %} 3526 ins_pipe( pipe_slow ); 3527 %} 3528 3529 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 3530 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3531 match(Set dst (MulVF src1 src2)); 3532 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 3533 ins_encode %{ 3534 bool vector256 = false; 3535 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3536 %} 3537 ins_pipe( pipe_slow ); 3538 %} 3539 3540 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 3541 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3542 match(Set dst (MulVF src (LoadVector mem))); 3543 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 3544 ins_encode %{ 3545 bool vector256 = false; 3546 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3547 %} 3548 ins_pipe( pipe_slow ); 3549 %} 3550 3551 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 3552 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3553 match(Set dst (MulVF src1 src2)); 3554 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 3555 ins_encode %{ 3556 bool vector256 = true; 3557 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3558 %} 3559 ins_pipe( pipe_slow ); 3560 %} 3561 3562 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 3563 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3564 match(Set dst (MulVF src (LoadVector mem))); 3565 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 3566 ins_encode %{ 3567 bool vector256 = true; 3568 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3569 %} 3570 ins_pipe( pipe_slow ); 3571 %} 3572 3573 // Doubles vector mul 3574 instruct vmul2D(vecX dst, vecX src) %{ 3575 predicate(n->as_Vector()->length() == 2); 3576 match(Set dst (MulVD dst src)); 3577 format %{ "mulpd $dst,$src\t! mul packed2D" %} 3578 ins_encode %{ 3579 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 3580 %} 3581 ins_pipe( pipe_slow ); 3582 %} 3583 3584 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 3585 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3586 match(Set dst (MulVD src1 src2)); 3587 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 3588 ins_encode %{ 3589 bool vector256 = false; 3590 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3591 %} 3592 ins_pipe( pipe_slow ); 3593 %} 3594 3595 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 3596 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3597 match(Set dst (MulVD src (LoadVector mem))); 3598 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 3599 ins_encode %{ 3600 bool vector256 = false; 3601 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3602 %} 3603 ins_pipe( pipe_slow ); 3604 %} 3605 3606 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 3607 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3608 match(Set dst (MulVD src1 src2)); 3609 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 3610 ins_encode %{ 3611 bool vector256 = true; 3612 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3613 %} 3614 ins_pipe( pipe_slow ); 3615 %} 3616 3617 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 3618 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3619 match(Set dst (MulVD src (LoadVector mem))); 3620 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 3621 ins_encode %{ 3622 bool vector256 = true; 3623 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3624 %} 3625 ins_pipe( pipe_slow ); 3626 %} 3627 3628 // --------------------------------- DIV -------------------------------------- 3629 3630 // Floats vector div 3631 instruct vdiv2F(vecD dst, vecD src) %{ 3632 predicate(n->as_Vector()->length() == 2); 3633 match(Set dst (DivVF dst src)); 3634 format %{ "divps $dst,$src\t! div packed2F" %} 3635 ins_encode %{ 3636 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3637 %} 3638 ins_pipe( pipe_slow ); 3639 %} 3640 3641 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 3642 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3643 match(Set dst (DivVF src1 src2)); 3644 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 3645 ins_encode %{ 3646 bool vector256 = false; 3647 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3648 %} 3649 ins_pipe( pipe_slow ); 3650 %} 3651 3652 instruct vdiv4F(vecX dst, vecX src) %{ 3653 predicate(n->as_Vector()->length() == 4); 3654 match(Set dst (DivVF dst src)); 3655 format %{ "divps $dst,$src\t! div packed4F" %} 3656 ins_encode %{ 3657 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3658 %} 3659 ins_pipe( pipe_slow ); 3660 %} 3661 3662 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 3663 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3664 match(Set dst (DivVF src1 src2)); 3665 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 3666 ins_encode %{ 3667 bool vector256 = false; 3668 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3669 %} 3670 ins_pipe( pipe_slow ); 3671 %} 3672 3673 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 3674 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3675 match(Set dst (DivVF src (LoadVector mem))); 3676 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 3677 ins_encode %{ 3678 bool vector256 = false; 3679 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3680 %} 3681 ins_pipe( pipe_slow ); 3682 %} 3683 3684 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 3685 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3686 match(Set dst (DivVF src1 src2)); 3687 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 3688 ins_encode %{ 3689 bool vector256 = true; 3690 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3691 %} 3692 ins_pipe( pipe_slow ); 3693 %} 3694 3695 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 3696 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3697 match(Set dst (DivVF src (LoadVector mem))); 3698 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 3699 ins_encode %{ 3700 bool vector256 = true; 3701 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3702 %} 3703 ins_pipe( pipe_slow ); 3704 %} 3705 3706 // Doubles vector div 3707 instruct vdiv2D(vecX dst, vecX src) %{ 3708 predicate(n->as_Vector()->length() == 2); 3709 match(Set dst (DivVD dst src)); 3710 format %{ "divpd $dst,$src\t! div packed2D" %} 3711 ins_encode %{ 3712 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 3713 %} 3714 ins_pipe( pipe_slow ); 3715 %} 3716 3717 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 3718 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3719 match(Set dst (DivVD src1 src2)); 3720 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 3721 ins_encode %{ 3722 bool vector256 = false; 3723 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 3729 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3730 match(Set dst (DivVD src (LoadVector mem))); 3731 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 3732 ins_encode %{ 3733 bool vector256 = false; 3734 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3735 %} 3736 ins_pipe( pipe_slow ); 3737 %} 3738 3739 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 3740 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3741 match(Set dst (DivVD src1 src2)); 3742 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 3743 ins_encode %{ 3744 bool vector256 = true; 3745 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3746 %} 3747 ins_pipe( pipe_slow ); 3748 %} 3749 3750 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 3751 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3752 match(Set dst (DivVD src (LoadVector mem))); 3753 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 3754 ins_encode %{ 3755 bool vector256 = true; 3756 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 // ------------------------------ LeftShift ----------------------------------- 3762 3763 // Shorts/Chars vector left shift 3764 instruct vsll2S(vecS dst, regF shift) %{ 3765 predicate(n->as_Vector()->length() == 2); 3766 match(Set dst (LShiftVS dst shift)); 3767 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3768 ins_encode %{ 3769 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3770 %} 3771 ins_pipe( pipe_slow ); 3772 %} 3773 3774 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 3775 predicate(n->as_Vector()->length() == 2); 3776 match(Set dst (LShiftVS dst shift)); 3777 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3778 ins_encode %{ 3779 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3780 %} 3781 ins_pipe( pipe_slow ); 3782 %} 3783 3784 instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{ 3785 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3786 match(Set dst (LShiftVS src shift)); 3787 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3788 ins_encode %{ 3789 bool vector256 = false; 3790 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3791 %} 3792 ins_pipe( pipe_slow ); 3793 %} 3794 3795 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 3796 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3797 match(Set dst (LShiftVS src shift)); 3798 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3799 ins_encode %{ 3800 bool vector256 = false; 3801 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3802 %} 3803 ins_pipe( pipe_slow ); 3804 %} 3805 3806 instruct vsll4S(vecD dst, regF shift) %{ 3807 predicate(n->as_Vector()->length() == 4); 3808 match(Set dst (LShiftVS dst shift)); 3809 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3810 ins_encode %{ 3811 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3812 %} 3813 ins_pipe( pipe_slow ); 3814 %} 3815 3816 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 3817 predicate(n->as_Vector()->length() == 4); 3818 match(Set dst (LShiftVS dst shift)); 3819 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3820 ins_encode %{ 3821 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3822 %} 3823 ins_pipe( pipe_slow ); 3824 %} 3825 3826 instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{ 3827 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3828 match(Set dst (LShiftVS src shift)); 3829 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3830 ins_encode %{ 3831 bool vector256 = false; 3832 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3833 %} 3834 ins_pipe( pipe_slow ); 3835 %} 3836 3837 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3838 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3839 match(Set dst (LShiftVS src shift)); 3840 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3841 ins_encode %{ 3842 bool vector256 = false; 3843 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3844 %} 3845 ins_pipe( pipe_slow ); 3846 %} 3847 3848 instruct vsll8S(vecX dst, regF shift) %{ 3849 predicate(n->as_Vector()->length() == 8); 3850 match(Set dst (LShiftVS dst shift)); 3851 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3852 ins_encode %{ 3853 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 3859 predicate(n->as_Vector()->length() == 8); 3860 match(Set dst (LShiftVS dst shift)); 3861 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3862 ins_encode %{ 3863 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3864 %} 3865 ins_pipe( pipe_slow ); 3866 %} 3867 3868 instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{ 3869 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3870 match(Set dst (LShiftVS src shift)); 3871 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 3872 ins_encode %{ 3873 bool vector256 = false; 3874 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3875 %} 3876 ins_pipe( pipe_slow ); 3877 %} 3878 3879 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 3880 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3881 match(Set dst (LShiftVS src shift)); 3882 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 3883 ins_encode %{ 3884 bool vector256 = false; 3885 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3886 %} 3887 ins_pipe( pipe_slow ); 3888 %} 3889 3890 instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{ 3891 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3892 match(Set dst (LShiftVS src shift)); 3893 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 3894 ins_encode %{ 3895 bool vector256 = true; 3896 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3897 %} 3898 ins_pipe( pipe_slow ); 3899 %} 3900 3901 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 3902 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3903 match(Set dst (LShiftVS src shift)); 3904 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 3905 ins_encode %{ 3906 bool vector256 = true; 3907 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3908 %} 3909 ins_pipe( pipe_slow ); 3910 %} 3911 3912 // Integers vector left shift 3913 instruct vsll2I(vecD dst, regF shift) %{ 3914 predicate(n->as_Vector()->length() == 2); 3915 match(Set dst (LShiftVI dst shift)); 3916 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 3917 ins_encode %{ 3918 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 3919 %} 3920 ins_pipe( pipe_slow ); 3921 %} 3922 3923 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 3924 predicate(n->as_Vector()->length() == 2); 3925 match(Set dst (LShiftVI dst shift)); 3926 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 3927 ins_encode %{ 3928 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 3929 %} 3930 ins_pipe( pipe_slow ); 3931 %} 3932 3933 instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{ 3934 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3935 match(Set dst (LShiftVI src shift)); 3936 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 3937 ins_encode %{ 3938 bool vector256 = false; 3939 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3940 %} 3941 ins_pipe( pipe_slow ); 3942 %} 3943 3944 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3945 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3946 match(Set dst (LShiftVI src shift)); 3947 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 3948 ins_encode %{ 3949 bool vector256 = false; 3950 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3951 %} 3952 ins_pipe( pipe_slow ); 3953 %} 3954 3955 instruct vsll4I(vecX dst, regF shift) %{ 3956 predicate(n->as_Vector()->length() == 4); 3957 match(Set dst (LShiftVI dst shift)); 3958 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 3959 ins_encode %{ 3960 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 3961 %} 3962 ins_pipe( pipe_slow ); 3963 %} 3964 3965 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 3966 predicate(n->as_Vector()->length() == 4); 3967 match(Set dst (LShiftVI dst shift)); 3968 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 3969 ins_encode %{ 3970 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 3971 %} 3972 ins_pipe( pipe_slow ); 3973 %} 3974 3975 instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{ 3976 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3977 match(Set dst (LShiftVI src shift)); 3978 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 3979 ins_encode %{ 3980 bool vector256 = false; 3981 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3982 %} 3983 ins_pipe( pipe_slow ); 3984 %} 3985 3986 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 3987 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3988 match(Set dst (LShiftVI src shift)); 3989 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 3990 ins_encode %{ 3991 bool vector256 = false; 3992 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3993 %} 3994 ins_pipe( pipe_slow ); 3995 %} 3996 3997 instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{ 3998 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3999 match(Set dst (LShiftVI src shift)); 4000 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4001 ins_encode %{ 4002 bool vector256 = true; 4003 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4004 %} 4005 ins_pipe( pipe_slow ); 4006 %} 4007 4008 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4009 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4010 match(Set dst (LShiftVI src shift)); 4011 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4012 ins_encode %{ 4013 bool vector256 = true; 4014 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 4019 // Longs vector left shift 4020 instruct vsll2L(vecX dst, regF shift) %{ 4021 predicate(n->as_Vector()->length() == 2); 4022 match(Set dst (LShiftVL dst shift)); 4023 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4024 ins_encode %{ 4025 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4026 %} 4027 ins_pipe( pipe_slow ); 4028 %} 4029 4030 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4031 predicate(n->as_Vector()->length() == 2); 4032 match(Set dst (LShiftVL dst shift)); 4033 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4034 ins_encode %{ 4035 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4036 %} 4037 ins_pipe( pipe_slow ); 4038 %} 4039 4040 instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{ 4041 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4042 match(Set dst (LShiftVL src shift)); 4043 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4044 ins_encode %{ 4045 bool vector256 = false; 4046 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4047 %} 4048 ins_pipe( pipe_slow ); 4049 %} 4050 4051 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4052 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4053 match(Set dst (LShiftVL src shift)); 4054 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4055 ins_encode %{ 4056 bool vector256 = false; 4057 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4058 %} 4059 ins_pipe( pipe_slow ); 4060 %} 4061 4062 instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{ 4063 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4064 match(Set dst (LShiftVL src shift)); 4065 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4066 ins_encode %{ 4067 bool vector256 = true; 4068 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4069 %} 4070 ins_pipe( pipe_slow ); 4071 %} 4072 4073 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4074 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4075 match(Set dst (LShiftVL src shift)); 4076 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4077 ins_encode %{ 4078 bool vector256 = true; 4079 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4080 %} 4081 ins_pipe( pipe_slow ); 4082 %} 4083 4084 // ----------------------- LogicalRightShift ----------------------------------- 4085 4086 // Shorts/Chars vector logical right shift produces incorrect Java result 4087 // for negative data because java code convert short value into int with 4088 // sign extension before a shift. 4089 4090 // Integers vector logical right shift 4091 instruct vsrl2I(vecD dst, regF shift) %{ 4092 predicate(n->as_Vector()->length() == 2); 4093 match(Set dst (URShiftVI dst shift)); 4094 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4095 ins_encode %{ 4096 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4097 %} 4098 ins_pipe( pipe_slow ); 4099 %} 4100 4101 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4102 predicate(n->as_Vector()->length() == 2); 4103 match(Set dst (URShiftVI dst shift)); 4104 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4105 ins_encode %{ 4106 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4107 %} 4108 ins_pipe( pipe_slow ); 4109 %} 4110 4111 instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{ 4112 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4113 match(Set dst (URShiftVI src shift)); 4114 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4115 ins_encode %{ 4116 bool vector256 = false; 4117 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4118 %} 4119 ins_pipe( pipe_slow ); 4120 %} 4121 4122 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4123 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4124 match(Set dst (URShiftVI src shift)); 4125 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4126 ins_encode %{ 4127 bool vector256 = false; 4128 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4129 %} 4130 ins_pipe( pipe_slow ); 4131 %} 4132 4133 instruct vsrl4I(vecX dst, regF shift) %{ 4134 predicate(n->as_Vector()->length() == 4); 4135 match(Set dst (URShiftVI dst shift)); 4136 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4137 ins_encode %{ 4138 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4139 %} 4140 ins_pipe( pipe_slow ); 4141 %} 4142 4143 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 4144 predicate(n->as_Vector()->length() == 4); 4145 match(Set dst (URShiftVI dst shift)); 4146 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4147 ins_encode %{ 4148 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{ 4154 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4155 match(Set dst (URShiftVI src shift)); 4156 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4157 ins_encode %{ 4158 bool vector256 = false; 4159 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 4164 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4165 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4166 match(Set dst (URShiftVI src shift)); 4167 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4168 ins_encode %{ 4169 bool vector256 = false; 4170 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4171 %} 4172 ins_pipe( pipe_slow ); 4173 %} 4174 4175 instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{ 4176 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4177 match(Set dst (URShiftVI src shift)); 4178 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4179 ins_encode %{ 4180 bool vector256 = true; 4181 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4182 %} 4183 ins_pipe( pipe_slow ); 4184 %} 4185 4186 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4187 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4188 match(Set dst (URShiftVI src shift)); 4189 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4190 ins_encode %{ 4191 bool vector256 = true; 4192 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 // Longs vector logical right shift 4198 instruct vsrl2L(vecX dst, regF shift) %{ 4199 predicate(n->as_Vector()->length() == 2); 4200 match(Set dst (URShiftVL dst shift)); 4201 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4202 ins_encode %{ 4203 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4204 %} 4205 ins_pipe( pipe_slow ); 4206 %} 4207 4208 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 4209 predicate(n->as_Vector()->length() == 2); 4210 match(Set dst (URShiftVL dst shift)); 4211 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4212 ins_encode %{ 4213 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{ 4219 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4220 match(Set dst (URShiftVL src shift)); 4221 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4222 ins_encode %{ 4223 bool vector256 = false; 4224 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4225 %} 4226 ins_pipe( pipe_slow ); 4227 %} 4228 4229 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4230 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4231 match(Set dst (URShiftVL src shift)); 4232 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4233 ins_encode %{ 4234 bool vector256 = false; 4235 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4236 %} 4237 ins_pipe( pipe_slow ); 4238 %} 4239 4240 instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{ 4241 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4242 match(Set dst (URShiftVL src shift)); 4243 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4244 ins_encode %{ 4245 bool vector256 = true; 4246 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4247 %} 4248 ins_pipe( pipe_slow ); 4249 %} 4250 4251 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4252 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4253 match(Set dst (URShiftVL src shift)); 4254 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4255 ins_encode %{ 4256 bool vector256 = true; 4257 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4258 %} 4259 ins_pipe( pipe_slow ); 4260 %} 4261 4262 // ------------------- ArithmeticRightShift ----------------------------------- 4263 4264 // Shorts/Chars vector arithmetic right shift 4265 instruct vsra2S(vecS dst, regF shift) %{ 4266 predicate(n->as_Vector()->length() == 2); 4267 match(Set dst (RShiftVS dst shift)); 4268 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4269 ins_encode %{ 4270 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4271 %} 4272 ins_pipe( pipe_slow ); 4273 %} 4274 4275 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 4276 predicate(n->as_Vector()->length() == 2); 4277 match(Set dst (RShiftVS dst shift)); 4278 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4279 ins_encode %{ 4280 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4281 %} 4282 ins_pipe( pipe_slow ); 4283 %} 4284 4285 instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{ 4286 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4287 match(Set dst (RShiftVS src shift)); 4288 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4289 ins_encode %{ 4290 bool vector256 = false; 4291 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4292 %} 4293 ins_pipe( pipe_slow ); 4294 %} 4295 4296 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4297 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4298 match(Set dst (RShiftVS src shift)); 4299 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4300 ins_encode %{ 4301 bool vector256 = false; 4302 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4303 %} 4304 ins_pipe( pipe_slow ); 4305 %} 4306 4307 instruct vsra4S(vecD dst, regF shift) %{ 4308 predicate(n->as_Vector()->length() == 4); 4309 match(Set dst (RShiftVS dst shift)); 4310 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4311 ins_encode %{ 4312 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 4317 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 4318 predicate(n->as_Vector()->length() == 4); 4319 match(Set dst (RShiftVS dst shift)); 4320 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4321 ins_encode %{ 4322 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4323 %} 4324 ins_pipe( pipe_slow ); 4325 %} 4326 4327 instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{ 4328 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4329 match(Set dst (RShiftVS src shift)); 4330 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4331 ins_encode %{ 4332 bool vector256 = false; 4333 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4334 %} 4335 ins_pipe( pipe_slow ); 4336 %} 4337 4338 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4339 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4340 match(Set dst (RShiftVS src shift)); 4341 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4342 ins_encode %{ 4343 bool vector256 = false; 4344 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4345 %} 4346 ins_pipe( pipe_slow ); 4347 %} 4348 4349 instruct vsra8S(vecX dst, regF shift) %{ 4350 predicate(n->as_Vector()->length() == 8); 4351 match(Set dst (RShiftVS dst shift)); 4352 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4353 ins_encode %{ 4354 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4355 %} 4356 ins_pipe( pipe_slow ); 4357 %} 4358 4359 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 4360 predicate(n->as_Vector()->length() == 8); 4361 match(Set dst (RShiftVS dst shift)); 4362 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4363 ins_encode %{ 4364 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4365 %} 4366 ins_pipe( pipe_slow ); 4367 %} 4368 4369 instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{ 4370 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4371 match(Set dst (RShiftVS src shift)); 4372 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4373 ins_encode %{ 4374 bool vector256 = false; 4375 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4376 %} 4377 ins_pipe( pipe_slow ); 4378 %} 4379 4380 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4381 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4382 match(Set dst (RShiftVS src shift)); 4383 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4384 ins_encode %{ 4385 bool vector256 = false; 4386 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4387 %} 4388 ins_pipe( pipe_slow ); 4389 %} 4390 4391 instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{ 4392 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4393 match(Set dst (RShiftVS src shift)); 4394 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4395 ins_encode %{ 4396 bool vector256 = true; 4397 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4398 %} 4399 ins_pipe( pipe_slow ); 4400 %} 4401 4402 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4403 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4404 match(Set dst (RShiftVS src shift)); 4405 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4406 ins_encode %{ 4407 bool vector256 = true; 4408 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4409 %} 4410 ins_pipe( pipe_slow ); 4411 %} 4412 4413 // Integers vector arithmetic right shift 4414 instruct vsra2I(vecD dst, regF shift) %{ 4415 predicate(n->as_Vector()->length() == 2); 4416 match(Set dst (RShiftVI dst shift)); 4417 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4418 ins_encode %{ 4419 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4420 %} 4421 ins_pipe( pipe_slow ); 4422 %} 4423 4424 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 4425 predicate(n->as_Vector()->length() == 2); 4426 match(Set dst (RShiftVI dst shift)); 4427 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4428 ins_encode %{ 4429 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4430 %} 4431 ins_pipe( pipe_slow ); 4432 %} 4433 4434 instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{ 4435 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4436 match(Set dst (RShiftVI src shift)); 4437 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4438 ins_encode %{ 4439 bool vector256 = false; 4440 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4441 %} 4442 ins_pipe( pipe_slow ); 4443 %} 4444 4445 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4446 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4447 match(Set dst (RShiftVI src shift)); 4448 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4449 ins_encode %{ 4450 bool vector256 = false; 4451 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4452 %} 4453 ins_pipe( pipe_slow ); 4454 %} 4455 4456 instruct vsra4I(vecX dst, regF shift) %{ 4457 predicate(n->as_Vector()->length() == 4); 4458 match(Set dst (RShiftVI dst shift)); 4459 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4460 ins_encode %{ 4461 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4462 %} 4463 ins_pipe( pipe_slow ); 4464 %} 4465 4466 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 4467 predicate(n->as_Vector()->length() == 4); 4468 match(Set dst (RShiftVI dst shift)); 4469 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4470 ins_encode %{ 4471 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4472 %} 4473 ins_pipe( pipe_slow ); 4474 %} 4475 4476 instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{ 4477 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4478 match(Set dst (RShiftVI src shift)); 4479 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4480 ins_encode %{ 4481 bool vector256 = false; 4482 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4483 %} 4484 ins_pipe( pipe_slow ); 4485 %} 4486 4487 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4488 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4489 match(Set dst (RShiftVI src shift)); 4490 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4491 ins_encode %{ 4492 bool vector256 = false; 4493 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4494 %} 4495 ins_pipe( pipe_slow ); 4496 %} 4497 4498 instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{ 4499 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4500 match(Set dst (RShiftVI src shift)); 4501 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4502 ins_encode %{ 4503 bool vector256 = true; 4504 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4505 %} 4506 ins_pipe( pipe_slow ); 4507 %} 4508 4509 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4510 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4511 match(Set dst (RShiftVI src shift)); 4512 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4513 ins_encode %{ 4514 bool vector256 = true; 4515 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4516 %} 4517 ins_pipe( pipe_slow ); 4518 %} 4519 4520 // There are no longs vector arithmetic right shift instructions. 4521 4522 4523 // --------------------------------- AND -------------------------------------- 4524 4525 instruct vand4B(vecS dst, vecS src) %{ 4526 predicate(n->as_Vector()->length_in_bytes() == 4); 4527 match(Set dst (AndV dst src)); 4528 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 4529 ins_encode %{ 4530 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4531 %} 4532 ins_pipe( pipe_slow ); 4533 %} 4534 4535 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 4536 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4537 match(Set dst (AndV src1 src2)); 4538 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 4539 ins_encode %{ 4540 bool vector256 = false; 4541 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4542 %} 4543 ins_pipe( pipe_slow ); 4544 %} 4545 4546 instruct vand8B(vecD dst, vecD src) %{ 4547 predicate(n->as_Vector()->length_in_bytes() == 8); 4548 match(Set dst (AndV dst src)); 4549 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 4550 ins_encode %{ 4551 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4552 %} 4553 ins_pipe( pipe_slow ); 4554 %} 4555 4556 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 4557 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4558 match(Set dst (AndV src1 src2)); 4559 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 4560 ins_encode %{ 4561 bool vector256 = false; 4562 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4563 %} 4564 ins_pipe( pipe_slow ); 4565 %} 4566 4567 instruct vand16B(vecX dst, vecX src) %{ 4568 predicate(n->as_Vector()->length_in_bytes() == 16); 4569 match(Set dst (AndV dst src)); 4570 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 4571 ins_encode %{ 4572 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4573 %} 4574 ins_pipe( pipe_slow ); 4575 %} 4576 4577 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 4578 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4579 match(Set dst (AndV src1 src2)); 4580 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 4581 ins_encode %{ 4582 bool vector256 = false; 4583 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4584 %} 4585 ins_pipe( pipe_slow ); 4586 %} 4587 4588 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 4589 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4590 match(Set dst (AndV src (LoadVector mem))); 4591 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 4592 ins_encode %{ 4593 bool vector256 = false; 4594 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4595 %} 4596 ins_pipe( pipe_slow ); 4597 %} 4598 4599 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 4600 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4601 match(Set dst (AndV src1 src2)); 4602 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 4603 ins_encode %{ 4604 bool vector256 = true; 4605 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4606 %} 4607 ins_pipe( pipe_slow ); 4608 %} 4609 4610 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 4611 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4612 match(Set dst (AndV src (LoadVector mem))); 4613 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 4614 ins_encode %{ 4615 bool vector256 = true; 4616 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4617 %} 4618 ins_pipe( pipe_slow ); 4619 %} 4620 4621 // --------------------------------- OR --------------------------------------- 4622 4623 instruct vor4B(vecS dst, vecS src) %{ 4624 predicate(n->as_Vector()->length_in_bytes() == 4); 4625 match(Set dst (OrV dst src)); 4626 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 4627 ins_encode %{ 4628 __ por($dst$$XMMRegister, $src$$XMMRegister); 4629 %} 4630 ins_pipe( pipe_slow ); 4631 %} 4632 4633 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4634 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4635 match(Set dst (OrV src1 src2)); 4636 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 4637 ins_encode %{ 4638 bool vector256 = false; 4639 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4640 %} 4641 ins_pipe( pipe_slow ); 4642 %} 4643 4644 instruct vor8B(vecD dst, vecD src) %{ 4645 predicate(n->as_Vector()->length_in_bytes() == 8); 4646 match(Set dst (OrV dst src)); 4647 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 4648 ins_encode %{ 4649 __ por($dst$$XMMRegister, $src$$XMMRegister); 4650 %} 4651 ins_pipe( pipe_slow ); 4652 %} 4653 4654 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4655 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4656 match(Set dst (OrV src1 src2)); 4657 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 4658 ins_encode %{ 4659 bool vector256 = false; 4660 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4661 %} 4662 ins_pipe( pipe_slow ); 4663 %} 4664 4665 instruct vor16B(vecX dst, vecX src) %{ 4666 predicate(n->as_Vector()->length_in_bytes() == 16); 4667 match(Set dst (OrV dst src)); 4668 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 4669 ins_encode %{ 4670 __ por($dst$$XMMRegister, $src$$XMMRegister); 4671 %} 4672 ins_pipe( pipe_slow ); 4673 %} 4674 4675 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4676 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4677 match(Set dst (OrV src1 src2)); 4678 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 4679 ins_encode %{ 4680 bool vector256 = false; 4681 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4682 %} 4683 ins_pipe( pipe_slow ); 4684 %} 4685 4686 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 4687 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4688 match(Set dst (OrV src (LoadVector mem))); 4689 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 4690 ins_encode %{ 4691 bool vector256 = false; 4692 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4693 %} 4694 ins_pipe( pipe_slow ); 4695 %} 4696 4697 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4698 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4699 match(Set dst (OrV src1 src2)); 4700 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 4701 ins_encode %{ 4702 bool vector256 = true; 4703 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4704 %} 4705 ins_pipe( pipe_slow ); 4706 %} 4707 4708 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 4709 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4710 match(Set dst (OrV src (LoadVector mem))); 4711 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 4712 ins_encode %{ 4713 bool vector256 = true; 4714 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4715 %} 4716 ins_pipe( pipe_slow ); 4717 %} 4718 4719 // --------------------------------- XOR -------------------------------------- 4720 4721 instruct vxor4B(vecS dst, vecS src) %{ 4722 predicate(n->as_Vector()->length_in_bytes() == 4); 4723 match(Set dst (XorV dst src)); 4724 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 4725 ins_encode %{ 4726 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4727 %} 4728 ins_pipe( pipe_slow ); 4729 %} 4730 4731 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4732 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4733 match(Set dst (XorV src1 src2)); 4734 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 4735 ins_encode %{ 4736 bool vector256 = false; 4737 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4738 %} 4739 ins_pipe( pipe_slow ); 4740 %} 4741 4742 instruct vxor8B(vecD dst, vecD src) %{ 4743 predicate(n->as_Vector()->length_in_bytes() == 8); 4744 match(Set dst (XorV dst src)); 4745 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 4746 ins_encode %{ 4747 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4748 %} 4749 ins_pipe( pipe_slow ); 4750 %} 4751 4752 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4753 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4754 match(Set dst (XorV src1 src2)); 4755 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 4756 ins_encode %{ 4757 bool vector256 = false; 4758 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4759 %} 4760 ins_pipe( pipe_slow ); 4761 %} 4762 4763 instruct vxor16B(vecX dst, vecX src) %{ 4764 predicate(n->as_Vector()->length_in_bytes() == 16); 4765 match(Set dst (XorV dst src)); 4766 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 4767 ins_encode %{ 4768 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4769 %} 4770 ins_pipe( pipe_slow ); 4771 %} 4772 4773 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4774 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4775 match(Set dst (XorV src1 src2)); 4776 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 4777 ins_encode %{ 4778 bool vector256 = false; 4779 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4780 %} 4781 ins_pipe( pipe_slow ); 4782 %} 4783 4784 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 4785 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4786 match(Set dst (XorV src (LoadVector mem))); 4787 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 4788 ins_encode %{ 4789 bool vector256 = false; 4790 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4791 %} 4792 ins_pipe( pipe_slow ); 4793 %} 4794 4795 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4796 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4797 match(Set dst (XorV src1 src2)); 4798 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 4799 ins_encode %{ 4800 bool vector256 = true; 4801 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4802 %} 4803 ins_pipe( pipe_slow ); 4804 %} 4805 4806 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 4807 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4808 match(Set dst (XorV src (LoadVector mem))); 4809 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 4810 ins_encode %{ 4811 bool vector256 = true; 4812 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4813 %} 4814 ins_pipe( pipe_slow ); 4815 %} 4816