1 // 2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340 #endif 341 ); 342 343 // flags allocation class should be last. 344 alloc_class chunk2(RFLAGS); 345 346 // Singleton class for condition codes 347 reg_class int_flags(RFLAGS); 348 349 // Class for all float registers 350 reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358 #ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367 #endif 368 ); 369 370 // Class for all double registers 371 reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379 #ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388 #endif 389 ); 390 391 // Class for all 32bit vector registers 392 reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400 #ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409 #endif 410 ); 411 412 // Class for all 64bit vector registers 413 reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421 #ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430 #endif 431 ); 432 433 // Class for all 128bit vector registers 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442 #ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451 #endif 452 ); 453 454 // Class for all 256bit vector registers 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463 #ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472 #endif 473 ); 474 475 %} 476 477 478 //----------SOURCE BLOCK------------------------------------------------------- 479 // This is a block of C++ code which provides values, functions, and 480 // definitions necessary in the rest of the architecture description 481 482 source_hpp %{ 483 // Header information of the source block. 484 // Method declarations/definitions which are used outside 485 // the ad-scope can conveniently be defined here. 486 // 487 // To keep related declarations/definitions/uses close together, 488 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 489 490 class CallStubImpl { 491 492 //-------------------------------------------------------------- 493 //---< Used for optimization in Compile::shorten_branches >--- 494 //-------------------------------------------------------------- 495 496 public: 497 // Size of call trampoline stub. 498 static uint size_call_trampoline() { 499 return 0; // no call trampolines on this platform 500 } 501 502 // number of relocations needed by a call trampoline stub 503 static uint reloc_call_trampoline() { 504 return 0; // no call trampolines on this platform 505 } 506 }; 507 508 class HandlerImpl { 509 510 public: 511 512 static int emit_exception_handler(CodeBuffer &cbuf); 513 static int emit_deopt_handler(CodeBuffer& cbuf); 514 515 static uint size_exception_handler() { 516 // NativeCall instruction size is the same as NativeJump. 517 // exception handler starts out as jump and can be patched to 518 // a call be deoptimization. (4932387) 519 // Note that this value is also credited (in output.cpp) to 520 // the size of the code section. 521 return NativeJump::instruction_size; 522 } 523 524 #ifdef _LP64 525 static uint size_deopt_handler() { 526 // three 5 byte instructions 527 return 15; 528 } 529 #else 530 static uint size_deopt_handler() { 531 // NativeCall instruction size is the same as NativeJump. 532 // exception handler starts out as jump and can be patched to 533 // a call be deoptimization. (4932387) 534 // Note that this value is also credited (in output.cpp) to 535 // the size of the code section. 536 return 5 + NativeJump::instruction_size; // pushl(); jmp; 537 } 538 #endif 539 }; 540 541 %} // end source_hpp 542 543 source %{ 544 545 // Emit exception handler code. 546 // Stuff framesize into a register and call a VM stub routine. 547 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 548 549 // Note that the code buffer's insts_mark is always relative to insts. 550 // That's why we must use the macroassembler to generate a handler. 551 MacroAssembler _masm(&cbuf); 552 address base = __ start_a_stub(size_exception_handler()); 553 if (base == NULL) { 554 ciEnv::current()->record_failure("CodeCache is full"); 555 return 0; // CodeBuffer::expand failed 556 } 557 int offset = __ offset(); 558 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 559 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 560 __ end_a_stub(); 561 return offset; 562 } 563 564 // Emit deopt handler code. 565 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 566 567 // Note that the code buffer's insts_mark is always relative to insts. 568 // That's why we must use the macroassembler to generate a handler. 569 MacroAssembler _masm(&cbuf); 570 address base = __ start_a_stub(size_deopt_handler()); 571 if (base == NULL) { 572 ciEnv::current()->record_failure("CodeCache is full"); 573 return 0; // CodeBuffer::expand failed 574 } 575 int offset = __ offset(); 576 577 #ifdef _LP64 578 address the_pc = (address) __ pc(); 579 Label next; 580 // push a "the_pc" on the stack without destroying any registers 581 // as they all may be live. 582 583 // push address of "next" 584 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 585 __ bind(next); 586 // adjust it so it matches "the_pc" 587 __ subptr(Address(rsp, 0), __ offset() - offset); 588 #else 589 InternalAddress here(__ pc()); 590 __ pushptr(here.addr()); 591 #endif 592 593 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 594 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 595 __ end_a_stub(); 596 return offset; 597 } 598 599 600 //============================================================================= 601 602 // Float masks come from different places depending on platform. 603 #ifdef _LP64 604 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 605 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 606 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 607 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 608 #else 609 static address float_signmask() { return (address)float_signmask_pool; } 610 static address float_signflip() { return (address)float_signflip_pool; } 611 static address double_signmask() { return (address)double_signmask_pool; } 612 static address double_signflip() { return (address)double_signflip_pool; } 613 #endif 614 615 616 const bool Matcher::match_rule_supported(int opcode) { 617 if (!has_match_rule(opcode)) 618 return false; 619 620 switch (opcode) { 621 case Op_PopCountI: 622 case Op_PopCountL: 623 if (!UsePopCountInstruction) 624 return false; 625 break; 626 case Op_MulVI: 627 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 628 return false; 629 break; 630 case Op_CompareAndSwapL: 631 #ifdef _LP64 632 case Op_CompareAndSwapP: 633 #endif 634 if (!VM_Version::supports_cx8()) 635 return false; 636 break; 637 } 638 639 return true; // Per default match rules are supported. 640 } 641 642 // Max vector size in bytes. 0 if not supported. 643 const int Matcher::vector_width_in_bytes(BasicType bt) { 644 assert(is_java_primitive(bt), "only primitive type vectors"); 645 if (UseSSE < 2) return 0; 646 // SSE2 supports 128bit vectors for all types. 647 // AVX2 supports 256bit vectors for all types. 648 int size = (UseAVX > 1) ? 32 : 16; 649 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 650 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 651 size = 32; 652 // Use flag to limit vector size. 653 size = MIN2(size,(int)MaxVectorSize); 654 // Minimum 2 values in vector (or 4 for bytes). 655 switch (bt) { 656 case T_DOUBLE: 657 case T_LONG: 658 if (size < 16) return 0; 659 case T_FLOAT: 660 case T_INT: 661 if (size < 8) return 0; 662 case T_BOOLEAN: 663 case T_BYTE: 664 case T_CHAR: 665 case T_SHORT: 666 if (size < 4) return 0; 667 break; 668 default: 669 ShouldNotReachHere(); 670 } 671 return size; 672 } 673 674 // Limits on vector size (number of elements) loaded into vector. 675 const int Matcher::max_vector_size(const BasicType bt) { 676 return vector_width_in_bytes(bt)/type2aelembytes(bt); 677 } 678 const int Matcher::min_vector_size(const BasicType bt) { 679 int max_size = max_vector_size(bt); 680 // Min size which can be loaded into vector is 4 bytes. 681 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 682 return MIN2(size,max_size); 683 } 684 685 // Vector ideal reg corresponding to specidied size in bytes 686 const int Matcher::vector_ideal_reg(int size) { 687 assert(MaxVectorSize >= size, ""); 688 switch(size) { 689 case 4: return Op_VecS; 690 case 8: return Op_VecD; 691 case 16: return Op_VecX; 692 case 32: return Op_VecY; 693 } 694 ShouldNotReachHere(); 695 return 0; 696 } 697 698 // Only lowest bits of xmm reg are used for vector shift count. 699 const int Matcher::vector_shift_count_ideal_reg(int size) { 700 return Op_VecS; 701 } 702 703 // x86 supports misaligned vectors store/load. 704 const bool Matcher::misaligned_vectors_ok() { 705 return !AlignVector; // can be changed by flag 706 } 707 708 // x86 AES instructions are compatible with SunJCE expanded 709 // keys, hence we do not need to pass the original key to stubs 710 const bool Matcher::pass_original_key_for_aes() { 711 return false; 712 } 713 714 // Helper methods for MachSpillCopyNode::implementation(). 715 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 716 int src_hi, int dst_hi, uint ireg, outputStream* st) { 717 // In 64-bit VM size calculation is very complex. Emitting instructions 718 // into scratch buffer is used to get size in 64-bit VM. 719 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 720 assert(ireg == Op_VecS || // 32bit vector 721 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 722 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 723 "no non-adjacent vector moves" ); 724 if (cbuf) { 725 MacroAssembler _masm(cbuf); 726 int offset = __ offset(); 727 switch (ireg) { 728 case Op_VecS: // copy whole register 729 case Op_VecD: 730 case Op_VecX: 731 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 732 break; 733 case Op_VecY: 734 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 735 break; 736 default: 737 ShouldNotReachHere(); 738 } 739 int size = __ offset() - offset; 740 #ifdef ASSERT 741 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 742 assert(!do_size || size == 4, "incorrect size calculattion"); 743 #endif 744 return size; 745 #ifndef PRODUCT 746 } else if (!do_size) { 747 switch (ireg) { 748 case Op_VecS: 749 case Op_VecD: 750 case Op_VecX: 751 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 752 break; 753 case Op_VecY: 754 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 755 break; 756 default: 757 ShouldNotReachHere(); 758 } 759 #endif 760 } 761 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 762 return 4; 763 } 764 765 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 766 int stack_offset, int reg, uint ireg, outputStream* st) { 767 // In 64-bit VM size calculation is very complex. Emitting instructions 768 // into scratch buffer is used to get size in 64-bit VM. 769 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 770 if (cbuf) { 771 MacroAssembler _masm(cbuf); 772 int offset = __ offset(); 773 if (is_load) { 774 switch (ireg) { 775 case Op_VecS: 776 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 777 break; 778 case Op_VecD: 779 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 780 break; 781 case Op_VecX: 782 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 783 break; 784 case Op_VecY: 785 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 786 break; 787 default: 788 ShouldNotReachHere(); 789 } 790 } else { // store 791 switch (ireg) { 792 case Op_VecS: 793 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 794 break; 795 case Op_VecD: 796 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 797 break; 798 case Op_VecX: 799 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 800 break; 801 case Op_VecY: 802 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 803 break; 804 default: 805 ShouldNotReachHere(); 806 } 807 } 808 int size = __ offset() - offset; 809 #ifdef ASSERT 810 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 811 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 812 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 813 #endif 814 return size; 815 #ifndef PRODUCT 816 } else if (!do_size) { 817 if (is_load) { 818 switch (ireg) { 819 case Op_VecS: 820 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 821 break; 822 case Op_VecD: 823 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 824 break; 825 case Op_VecX: 826 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 827 break; 828 case Op_VecY: 829 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 830 break; 831 default: 832 ShouldNotReachHere(); 833 } 834 } else { // store 835 switch (ireg) { 836 case Op_VecS: 837 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 838 break; 839 case Op_VecD: 840 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 841 break; 842 case Op_VecX: 843 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 844 break; 845 case Op_VecY: 846 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 847 break; 848 default: 849 ShouldNotReachHere(); 850 } 851 } 852 #endif 853 } 854 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 855 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 856 return 5+offset_size; 857 } 858 859 static inline jfloat replicate4_imm(int con, int width) { 860 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 861 assert(width == 1 || width == 2, "only byte or short types here"); 862 int bit_width = width * 8; 863 jint val = con; 864 val &= (1 << bit_width) - 1; // mask off sign bits 865 while(bit_width < 32) { 866 val |= (val << bit_width); 867 bit_width <<= 1; 868 } 869 jfloat fval = *((jfloat*) &val); // coerce to float type 870 return fval; 871 } 872 873 static inline jdouble replicate8_imm(int con, int width) { 874 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 875 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 876 int bit_width = width * 8; 877 jlong val = con; 878 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 879 while(bit_width < 64) { 880 val |= (val << bit_width); 881 bit_width <<= 1; 882 } 883 jdouble dval = *((jdouble*) &val); // coerce to double type 884 return dval; 885 } 886 887 #ifndef PRODUCT 888 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 889 st->print("nop \t# %d bytes pad for loops and calls", _count); 890 } 891 #endif 892 893 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 894 MacroAssembler _masm(&cbuf); 895 __ nop(_count); 896 } 897 898 uint MachNopNode::size(PhaseRegAlloc*) const { 899 return _count; 900 } 901 902 #ifndef PRODUCT 903 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 904 st->print("# breakpoint"); 905 } 906 #endif 907 908 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 909 MacroAssembler _masm(&cbuf); 910 __ int3(); 911 } 912 913 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 914 return MachNode::size(ra_); 915 } 916 917 %} 918 919 encode %{ 920 921 enc_class call_epilog %{ 922 if (VerifyStackAtCalls) { 923 // Check that stack depth is unchanged: find majik cookie on stack 924 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 925 MacroAssembler _masm(&cbuf); 926 Label L; 927 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 928 __ jccb(Assembler::equal, L); 929 // Die if stack mismatch 930 __ int3(); 931 __ bind(L); 932 } 933 %} 934 935 %} 936 937 938 //----------OPERANDS----------------------------------------------------------- 939 // Operand definitions must precede instruction definitions for correct parsing 940 // in the ADLC because operands constitute user defined types which are used in 941 // instruction definitions. 942 943 // Vectors 944 operand vecS() %{ 945 constraint(ALLOC_IN_RC(vectors_reg)); 946 match(VecS); 947 948 format %{ %} 949 interface(REG_INTER); 950 %} 951 952 operand vecD() %{ 953 constraint(ALLOC_IN_RC(vectord_reg)); 954 match(VecD); 955 956 format %{ %} 957 interface(REG_INTER); 958 %} 959 960 operand vecX() %{ 961 constraint(ALLOC_IN_RC(vectorx_reg)); 962 match(VecX); 963 964 format %{ %} 965 interface(REG_INTER); 966 %} 967 968 operand vecY() %{ 969 constraint(ALLOC_IN_RC(vectory_reg)); 970 match(VecY); 971 972 format %{ %} 973 interface(REG_INTER); 974 %} 975 976 977 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 978 979 // ============================================================================ 980 981 instruct ShouldNotReachHere() %{ 982 match(Halt); 983 format %{ "int3\t# ShouldNotReachHere" %} 984 ins_encode %{ 985 __ int3(); 986 %} 987 ins_pipe(pipe_slow); 988 %} 989 990 // ============================================================================ 991 992 instruct addF_reg(regF dst, regF src) %{ 993 predicate((UseSSE>=1) && (UseAVX == 0)); 994 match(Set dst (AddF dst src)); 995 996 format %{ "addss $dst, $src" %} 997 ins_cost(150); 998 ins_encode %{ 999 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1000 %} 1001 ins_pipe(pipe_slow); 1002 %} 1003 1004 instruct addF_mem(regF dst, memory src) %{ 1005 predicate((UseSSE>=1) && (UseAVX == 0)); 1006 match(Set dst (AddF dst (LoadF src))); 1007 1008 format %{ "addss $dst, $src" %} 1009 ins_cost(150); 1010 ins_encode %{ 1011 __ addss($dst$$XMMRegister, $src$$Address); 1012 %} 1013 ins_pipe(pipe_slow); 1014 %} 1015 1016 instruct addF_imm(regF dst, immF con) %{ 1017 predicate((UseSSE>=1) && (UseAVX == 0)); 1018 match(Set dst (AddF dst con)); 1019 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1020 ins_cost(150); 1021 ins_encode %{ 1022 __ addss($dst$$XMMRegister, $constantaddress($con)); 1023 %} 1024 ins_pipe(pipe_slow); 1025 %} 1026 1027 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1028 predicate(UseAVX > 0); 1029 match(Set dst (AddF src1 src2)); 1030 1031 format %{ "vaddss $dst, $src1, $src2" %} 1032 ins_cost(150); 1033 ins_encode %{ 1034 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1035 %} 1036 ins_pipe(pipe_slow); 1037 %} 1038 1039 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1040 predicate(UseAVX > 0); 1041 match(Set dst (AddF src1 (LoadF src2))); 1042 1043 format %{ "vaddss $dst, $src1, $src2" %} 1044 ins_cost(150); 1045 ins_encode %{ 1046 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1047 %} 1048 ins_pipe(pipe_slow); 1049 %} 1050 1051 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1052 predicate(UseAVX > 0); 1053 match(Set dst (AddF src con)); 1054 1055 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1056 ins_cost(150); 1057 ins_encode %{ 1058 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1059 %} 1060 ins_pipe(pipe_slow); 1061 %} 1062 1063 instruct addD_reg(regD dst, regD src) %{ 1064 predicate((UseSSE>=2) && (UseAVX == 0)); 1065 match(Set dst (AddD dst src)); 1066 1067 format %{ "addsd $dst, $src" %} 1068 ins_cost(150); 1069 ins_encode %{ 1070 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1071 %} 1072 ins_pipe(pipe_slow); 1073 %} 1074 1075 instruct addD_mem(regD dst, memory src) %{ 1076 predicate((UseSSE>=2) && (UseAVX == 0)); 1077 match(Set dst (AddD dst (LoadD src))); 1078 1079 format %{ "addsd $dst, $src" %} 1080 ins_cost(150); 1081 ins_encode %{ 1082 __ addsd($dst$$XMMRegister, $src$$Address); 1083 %} 1084 ins_pipe(pipe_slow); 1085 %} 1086 1087 instruct addD_imm(regD dst, immD con) %{ 1088 predicate((UseSSE>=2) && (UseAVX == 0)); 1089 match(Set dst (AddD dst con)); 1090 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1091 ins_cost(150); 1092 ins_encode %{ 1093 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1094 %} 1095 ins_pipe(pipe_slow); 1096 %} 1097 1098 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1099 predicate(UseAVX > 0); 1100 match(Set dst (AddD src1 src2)); 1101 1102 format %{ "vaddsd $dst, $src1, $src2" %} 1103 ins_cost(150); 1104 ins_encode %{ 1105 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1106 %} 1107 ins_pipe(pipe_slow); 1108 %} 1109 1110 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1111 predicate(UseAVX > 0); 1112 match(Set dst (AddD src1 (LoadD src2))); 1113 1114 format %{ "vaddsd $dst, $src1, $src2" %} 1115 ins_cost(150); 1116 ins_encode %{ 1117 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1118 %} 1119 ins_pipe(pipe_slow); 1120 %} 1121 1122 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1123 predicate(UseAVX > 0); 1124 match(Set dst (AddD src con)); 1125 1126 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1127 ins_cost(150); 1128 ins_encode %{ 1129 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1130 %} 1131 ins_pipe(pipe_slow); 1132 %} 1133 1134 instruct subF_reg(regF dst, regF src) %{ 1135 predicate((UseSSE>=1) && (UseAVX == 0)); 1136 match(Set dst (SubF dst src)); 1137 1138 format %{ "subss $dst, $src" %} 1139 ins_cost(150); 1140 ins_encode %{ 1141 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1142 %} 1143 ins_pipe(pipe_slow); 1144 %} 1145 1146 instruct subF_mem(regF dst, memory src) %{ 1147 predicate((UseSSE>=1) && (UseAVX == 0)); 1148 match(Set dst (SubF dst (LoadF src))); 1149 1150 format %{ "subss $dst, $src" %} 1151 ins_cost(150); 1152 ins_encode %{ 1153 __ subss($dst$$XMMRegister, $src$$Address); 1154 %} 1155 ins_pipe(pipe_slow); 1156 %} 1157 1158 instruct subF_imm(regF dst, immF con) %{ 1159 predicate((UseSSE>=1) && (UseAVX == 0)); 1160 match(Set dst (SubF dst con)); 1161 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1162 ins_cost(150); 1163 ins_encode %{ 1164 __ subss($dst$$XMMRegister, $constantaddress($con)); 1165 %} 1166 ins_pipe(pipe_slow); 1167 %} 1168 1169 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1170 predicate(UseAVX > 0); 1171 match(Set dst (SubF src1 src2)); 1172 1173 format %{ "vsubss $dst, $src1, $src2" %} 1174 ins_cost(150); 1175 ins_encode %{ 1176 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1177 %} 1178 ins_pipe(pipe_slow); 1179 %} 1180 1181 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1182 predicate(UseAVX > 0); 1183 match(Set dst (SubF src1 (LoadF src2))); 1184 1185 format %{ "vsubss $dst, $src1, $src2" %} 1186 ins_cost(150); 1187 ins_encode %{ 1188 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1189 %} 1190 ins_pipe(pipe_slow); 1191 %} 1192 1193 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1194 predicate(UseAVX > 0); 1195 match(Set dst (SubF src con)); 1196 1197 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1198 ins_cost(150); 1199 ins_encode %{ 1200 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1201 %} 1202 ins_pipe(pipe_slow); 1203 %} 1204 1205 instruct subD_reg(regD dst, regD src) %{ 1206 predicate((UseSSE>=2) && (UseAVX == 0)); 1207 match(Set dst (SubD dst src)); 1208 1209 format %{ "subsd $dst, $src" %} 1210 ins_cost(150); 1211 ins_encode %{ 1212 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1213 %} 1214 ins_pipe(pipe_slow); 1215 %} 1216 1217 instruct subD_mem(regD dst, memory src) %{ 1218 predicate((UseSSE>=2) && (UseAVX == 0)); 1219 match(Set dst (SubD dst (LoadD src))); 1220 1221 format %{ "subsd $dst, $src" %} 1222 ins_cost(150); 1223 ins_encode %{ 1224 __ subsd($dst$$XMMRegister, $src$$Address); 1225 %} 1226 ins_pipe(pipe_slow); 1227 %} 1228 1229 instruct subD_imm(regD dst, immD con) %{ 1230 predicate((UseSSE>=2) && (UseAVX == 0)); 1231 match(Set dst (SubD dst con)); 1232 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1233 ins_cost(150); 1234 ins_encode %{ 1235 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1236 %} 1237 ins_pipe(pipe_slow); 1238 %} 1239 1240 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1241 predicate(UseAVX > 0); 1242 match(Set dst (SubD src1 src2)); 1243 1244 format %{ "vsubsd $dst, $src1, $src2" %} 1245 ins_cost(150); 1246 ins_encode %{ 1247 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1248 %} 1249 ins_pipe(pipe_slow); 1250 %} 1251 1252 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1253 predicate(UseAVX > 0); 1254 match(Set dst (SubD src1 (LoadD src2))); 1255 1256 format %{ "vsubsd $dst, $src1, $src2" %} 1257 ins_cost(150); 1258 ins_encode %{ 1259 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1260 %} 1261 ins_pipe(pipe_slow); 1262 %} 1263 1264 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1265 predicate(UseAVX > 0); 1266 match(Set dst (SubD src con)); 1267 1268 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1269 ins_cost(150); 1270 ins_encode %{ 1271 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1272 %} 1273 ins_pipe(pipe_slow); 1274 %} 1275 1276 instruct mulF_reg(regF dst, regF src) %{ 1277 predicate((UseSSE>=1) && (UseAVX == 0)); 1278 match(Set dst (MulF dst src)); 1279 1280 format %{ "mulss $dst, $src" %} 1281 ins_cost(150); 1282 ins_encode %{ 1283 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1284 %} 1285 ins_pipe(pipe_slow); 1286 %} 1287 1288 instruct mulF_mem(regF dst, memory src) %{ 1289 predicate((UseSSE>=1) && (UseAVX == 0)); 1290 match(Set dst (MulF dst (LoadF src))); 1291 1292 format %{ "mulss $dst, $src" %} 1293 ins_cost(150); 1294 ins_encode %{ 1295 __ mulss($dst$$XMMRegister, $src$$Address); 1296 %} 1297 ins_pipe(pipe_slow); 1298 %} 1299 1300 instruct mulF_imm(regF dst, immF con) %{ 1301 predicate((UseSSE>=1) && (UseAVX == 0)); 1302 match(Set dst (MulF dst con)); 1303 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1304 ins_cost(150); 1305 ins_encode %{ 1306 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1307 %} 1308 ins_pipe(pipe_slow); 1309 %} 1310 1311 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1312 predicate(UseAVX > 0); 1313 match(Set dst (MulF src1 src2)); 1314 1315 format %{ "vmulss $dst, $src1, $src2" %} 1316 ins_cost(150); 1317 ins_encode %{ 1318 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1319 %} 1320 ins_pipe(pipe_slow); 1321 %} 1322 1323 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1324 predicate(UseAVX > 0); 1325 match(Set dst (MulF src1 (LoadF src2))); 1326 1327 format %{ "vmulss $dst, $src1, $src2" %} 1328 ins_cost(150); 1329 ins_encode %{ 1330 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1331 %} 1332 ins_pipe(pipe_slow); 1333 %} 1334 1335 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1336 predicate(UseAVX > 0); 1337 match(Set dst (MulF src con)); 1338 1339 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1340 ins_cost(150); 1341 ins_encode %{ 1342 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1343 %} 1344 ins_pipe(pipe_slow); 1345 %} 1346 1347 instruct mulD_reg(regD dst, regD src) %{ 1348 predicate((UseSSE>=2) && (UseAVX == 0)); 1349 match(Set dst (MulD dst src)); 1350 1351 format %{ "mulsd $dst, $src" %} 1352 ins_cost(150); 1353 ins_encode %{ 1354 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1355 %} 1356 ins_pipe(pipe_slow); 1357 %} 1358 1359 instruct mulD_mem(regD dst, memory src) %{ 1360 predicate((UseSSE>=2) && (UseAVX == 0)); 1361 match(Set dst (MulD dst (LoadD src))); 1362 1363 format %{ "mulsd $dst, $src" %} 1364 ins_cost(150); 1365 ins_encode %{ 1366 __ mulsd($dst$$XMMRegister, $src$$Address); 1367 %} 1368 ins_pipe(pipe_slow); 1369 %} 1370 1371 instruct mulD_imm(regD dst, immD con) %{ 1372 predicate((UseSSE>=2) && (UseAVX == 0)); 1373 match(Set dst (MulD dst con)); 1374 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1375 ins_cost(150); 1376 ins_encode %{ 1377 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1378 %} 1379 ins_pipe(pipe_slow); 1380 %} 1381 1382 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1383 predicate(UseAVX > 0); 1384 match(Set dst (MulD src1 src2)); 1385 1386 format %{ "vmulsd $dst, $src1, $src2" %} 1387 ins_cost(150); 1388 ins_encode %{ 1389 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1390 %} 1391 ins_pipe(pipe_slow); 1392 %} 1393 1394 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1395 predicate(UseAVX > 0); 1396 match(Set dst (MulD src1 (LoadD src2))); 1397 1398 format %{ "vmulsd $dst, $src1, $src2" %} 1399 ins_cost(150); 1400 ins_encode %{ 1401 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1402 %} 1403 ins_pipe(pipe_slow); 1404 %} 1405 1406 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1407 predicate(UseAVX > 0); 1408 match(Set dst (MulD src con)); 1409 1410 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1411 ins_cost(150); 1412 ins_encode %{ 1413 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1414 %} 1415 ins_pipe(pipe_slow); 1416 %} 1417 1418 instruct divF_reg(regF dst, regF src) %{ 1419 predicate((UseSSE>=1) && (UseAVX == 0)); 1420 match(Set dst (DivF dst src)); 1421 1422 format %{ "divss $dst, $src" %} 1423 ins_cost(150); 1424 ins_encode %{ 1425 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1426 %} 1427 ins_pipe(pipe_slow); 1428 %} 1429 1430 instruct divF_mem(regF dst, memory src) %{ 1431 predicate((UseSSE>=1) && (UseAVX == 0)); 1432 match(Set dst (DivF dst (LoadF src))); 1433 1434 format %{ "divss $dst, $src" %} 1435 ins_cost(150); 1436 ins_encode %{ 1437 __ divss($dst$$XMMRegister, $src$$Address); 1438 %} 1439 ins_pipe(pipe_slow); 1440 %} 1441 1442 instruct divF_imm(regF dst, immF con) %{ 1443 predicate((UseSSE>=1) && (UseAVX == 0)); 1444 match(Set dst (DivF dst con)); 1445 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1446 ins_cost(150); 1447 ins_encode %{ 1448 __ divss($dst$$XMMRegister, $constantaddress($con)); 1449 %} 1450 ins_pipe(pipe_slow); 1451 %} 1452 1453 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1454 predicate(UseAVX > 0); 1455 match(Set dst (DivF src1 src2)); 1456 1457 format %{ "vdivss $dst, $src1, $src2" %} 1458 ins_cost(150); 1459 ins_encode %{ 1460 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1461 %} 1462 ins_pipe(pipe_slow); 1463 %} 1464 1465 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1466 predicate(UseAVX > 0); 1467 match(Set dst (DivF src1 (LoadF src2))); 1468 1469 format %{ "vdivss $dst, $src1, $src2" %} 1470 ins_cost(150); 1471 ins_encode %{ 1472 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1473 %} 1474 ins_pipe(pipe_slow); 1475 %} 1476 1477 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1478 predicate(UseAVX > 0); 1479 match(Set dst (DivF src con)); 1480 1481 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1482 ins_cost(150); 1483 ins_encode %{ 1484 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1485 %} 1486 ins_pipe(pipe_slow); 1487 %} 1488 1489 instruct divD_reg(regD dst, regD src) %{ 1490 predicate((UseSSE>=2) && (UseAVX == 0)); 1491 match(Set dst (DivD dst src)); 1492 1493 format %{ "divsd $dst, $src" %} 1494 ins_cost(150); 1495 ins_encode %{ 1496 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1497 %} 1498 ins_pipe(pipe_slow); 1499 %} 1500 1501 instruct divD_mem(regD dst, memory src) %{ 1502 predicate((UseSSE>=2) && (UseAVX == 0)); 1503 match(Set dst (DivD dst (LoadD src))); 1504 1505 format %{ "divsd $dst, $src" %} 1506 ins_cost(150); 1507 ins_encode %{ 1508 __ divsd($dst$$XMMRegister, $src$$Address); 1509 %} 1510 ins_pipe(pipe_slow); 1511 %} 1512 1513 instruct divD_imm(regD dst, immD con) %{ 1514 predicate((UseSSE>=2) && (UseAVX == 0)); 1515 match(Set dst (DivD dst con)); 1516 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1517 ins_cost(150); 1518 ins_encode %{ 1519 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1520 %} 1521 ins_pipe(pipe_slow); 1522 %} 1523 1524 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1525 predicate(UseAVX > 0); 1526 match(Set dst (DivD src1 src2)); 1527 1528 format %{ "vdivsd $dst, $src1, $src2" %} 1529 ins_cost(150); 1530 ins_encode %{ 1531 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1532 %} 1533 ins_pipe(pipe_slow); 1534 %} 1535 1536 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1537 predicate(UseAVX > 0); 1538 match(Set dst (DivD src1 (LoadD src2))); 1539 1540 format %{ "vdivsd $dst, $src1, $src2" %} 1541 ins_cost(150); 1542 ins_encode %{ 1543 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1544 %} 1545 ins_pipe(pipe_slow); 1546 %} 1547 1548 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1549 predicate(UseAVX > 0); 1550 match(Set dst (DivD src con)); 1551 1552 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1553 ins_cost(150); 1554 ins_encode %{ 1555 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1556 %} 1557 ins_pipe(pipe_slow); 1558 %} 1559 1560 instruct absF_reg(regF dst) %{ 1561 predicate((UseSSE>=1) && (UseAVX == 0)); 1562 match(Set dst (AbsF dst)); 1563 ins_cost(150); 1564 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1565 ins_encode %{ 1566 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1567 %} 1568 ins_pipe(pipe_slow); 1569 %} 1570 1571 instruct absF_reg_reg(regF dst, regF src) %{ 1572 predicate(UseAVX > 0); 1573 match(Set dst (AbsF src)); 1574 ins_cost(150); 1575 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1576 ins_encode %{ 1577 bool vector256 = false; 1578 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1579 ExternalAddress(float_signmask()), vector256); 1580 %} 1581 ins_pipe(pipe_slow); 1582 %} 1583 1584 instruct absD_reg(regD dst) %{ 1585 predicate((UseSSE>=2) && (UseAVX == 0)); 1586 match(Set dst (AbsD dst)); 1587 ins_cost(150); 1588 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1589 "# abs double by sign masking" %} 1590 ins_encode %{ 1591 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1592 %} 1593 ins_pipe(pipe_slow); 1594 %} 1595 1596 instruct absD_reg_reg(regD dst, regD src) %{ 1597 predicate(UseAVX > 0); 1598 match(Set dst (AbsD src)); 1599 ins_cost(150); 1600 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1601 "# abs double by sign masking" %} 1602 ins_encode %{ 1603 bool vector256 = false; 1604 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1605 ExternalAddress(double_signmask()), vector256); 1606 %} 1607 ins_pipe(pipe_slow); 1608 %} 1609 1610 instruct negF_reg(regF dst) %{ 1611 predicate((UseSSE>=1) && (UseAVX == 0)); 1612 match(Set dst (NegF dst)); 1613 ins_cost(150); 1614 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1615 ins_encode %{ 1616 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1617 %} 1618 ins_pipe(pipe_slow); 1619 %} 1620 1621 instruct negF_reg_reg(regF dst, regF src) %{ 1622 predicate(UseAVX > 0); 1623 match(Set dst (NegF src)); 1624 ins_cost(150); 1625 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1626 ins_encode %{ 1627 bool vector256 = false; 1628 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1629 ExternalAddress(float_signflip()), vector256); 1630 %} 1631 ins_pipe(pipe_slow); 1632 %} 1633 1634 instruct negD_reg(regD dst) %{ 1635 predicate((UseSSE>=2) && (UseAVX == 0)); 1636 match(Set dst (NegD dst)); 1637 ins_cost(150); 1638 format %{ "xorpd $dst, [0x8000000000000000]\t" 1639 "# neg double by sign flipping" %} 1640 ins_encode %{ 1641 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1642 %} 1643 ins_pipe(pipe_slow); 1644 %} 1645 1646 instruct negD_reg_reg(regD dst, regD src) %{ 1647 predicate(UseAVX > 0); 1648 match(Set dst (NegD src)); 1649 ins_cost(150); 1650 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1651 "# neg double by sign flipping" %} 1652 ins_encode %{ 1653 bool vector256 = false; 1654 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1655 ExternalAddress(double_signflip()), vector256); 1656 %} 1657 ins_pipe(pipe_slow); 1658 %} 1659 1660 instruct sqrtF_reg(regF dst, regF src) %{ 1661 predicate(UseSSE>=1); 1662 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1663 1664 format %{ "sqrtss $dst, $src" %} 1665 ins_cost(150); 1666 ins_encode %{ 1667 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1668 %} 1669 ins_pipe(pipe_slow); 1670 %} 1671 1672 instruct sqrtF_mem(regF dst, memory src) %{ 1673 predicate(UseSSE>=1); 1674 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1675 1676 format %{ "sqrtss $dst, $src" %} 1677 ins_cost(150); 1678 ins_encode %{ 1679 __ sqrtss($dst$$XMMRegister, $src$$Address); 1680 %} 1681 ins_pipe(pipe_slow); 1682 %} 1683 1684 instruct sqrtF_imm(regF dst, immF con) %{ 1685 predicate(UseSSE>=1); 1686 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1687 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1688 ins_cost(150); 1689 ins_encode %{ 1690 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1691 %} 1692 ins_pipe(pipe_slow); 1693 %} 1694 1695 instruct sqrtD_reg(regD dst, regD src) %{ 1696 predicate(UseSSE>=2); 1697 match(Set dst (SqrtD src)); 1698 1699 format %{ "sqrtsd $dst, $src" %} 1700 ins_cost(150); 1701 ins_encode %{ 1702 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1703 %} 1704 ins_pipe(pipe_slow); 1705 %} 1706 1707 instruct sqrtD_mem(regD dst, memory src) %{ 1708 predicate(UseSSE>=2); 1709 match(Set dst (SqrtD (LoadD src))); 1710 1711 format %{ "sqrtsd $dst, $src" %} 1712 ins_cost(150); 1713 ins_encode %{ 1714 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1715 %} 1716 ins_pipe(pipe_slow); 1717 %} 1718 1719 instruct sqrtD_imm(regD dst, immD con) %{ 1720 predicate(UseSSE>=2); 1721 match(Set dst (SqrtD con)); 1722 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1723 ins_cost(150); 1724 ins_encode %{ 1725 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1726 %} 1727 ins_pipe(pipe_slow); 1728 %} 1729 1730 1731 // ====================VECTOR INSTRUCTIONS===================================== 1732 1733 // Load vectors (4 bytes long) 1734 instruct loadV4(vecS dst, memory mem) %{ 1735 predicate(n->as_LoadVector()->memory_size() == 4); 1736 match(Set dst (LoadVector mem)); 1737 ins_cost(125); 1738 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1739 ins_encode %{ 1740 __ movdl($dst$$XMMRegister, $mem$$Address); 1741 %} 1742 ins_pipe( pipe_slow ); 1743 %} 1744 1745 // Load vectors (8 bytes long) 1746 instruct loadV8(vecD dst, memory mem) %{ 1747 predicate(n->as_LoadVector()->memory_size() == 8); 1748 match(Set dst (LoadVector mem)); 1749 ins_cost(125); 1750 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1751 ins_encode %{ 1752 __ movq($dst$$XMMRegister, $mem$$Address); 1753 %} 1754 ins_pipe( pipe_slow ); 1755 %} 1756 1757 // Load vectors (16 bytes long) 1758 instruct loadV16(vecX dst, memory mem) %{ 1759 predicate(n->as_LoadVector()->memory_size() == 16); 1760 match(Set dst (LoadVector mem)); 1761 ins_cost(125); 1762 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1763 ins_encode %{ 1764 __ movdqu($dst$$XMMRegister, $mem$$Address); 1765 %} 1766 ins_pipe( pipe_slow ); 1767 %} 1768 1769 // Load vectors (32 bytes long) 1770 instruct loadV32(vecY dst, memory mem) %{ 1771 predicate(n->as_LoadVector()->memory_size() == 32); 1772 match(Set dst (LoadVector mem)); 1773 ins_cost(125); 1774 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1775 ins_encode %{ 1776 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1777 %} 1778 ins_pipe( pipe_slow ); 1779 %} 1780 1781 // Store vectors 1782 instruct storeV4(memory mem, vecS src) %{ 1783 predicate(n->as_StoreVector()->memory_size() == 4); 1784 match(Set mem (StoreVector mem src)); 1785 ins_cost(145); 1786 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1787 ins_encode %{ 1788 __ movdl($mem$$Address, $src$$XMMRegister); 1789 %} 1790 ins_pipe( pipe_slow ); 1791 %} 1792 1793 instruct storeV8(memory mem, vecD src) %{ 1794 predicate(n->as_StoreVector()->memory_size() == 8); 1795 match(Set mem (StoreVector mem src)); 1796 ins_cost(145); 1797 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1798 ins_encode %{ 1799 __ movq($mem$$Address, $src$$XMMRegister); 1800 %} 1801 ins_pipe( pipe_slow ); 1802 %} 1803 1804 instruct storeV16(memory mem, vecX src) %{ 1805 predicate(n->as_StoreVector()->memory_size() == 16); 1806 match(Set mem (StoreVector mem src)); 1807 ins_cost(145); 1808 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1809 ins_encode %{ 1810 __ movdqu($mem$$Address, $src$$XMMRegister); 1811 %} 1812 ins_pipe( pipe_slow ); 1813 %} 1814 1815 instruct storeV32(memory mem, vecY src) %{ 1816 predicate(n->as_StoreVector()->memory_size() == 32); 1817 match(Set mem (StoreVector mem src)); 1818 ins_cost(145); 1819 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1820 ins_encode %{ 1821 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1822 %} 1823 ins_pipe( pipe_slow ); 1824 %} 1825 1826 // Replicate byte scalar to be vector 1827 instruct Repl4B(vecS dst, rRegI src) %{ 1828 predicate(n->as_Vector()->length() == 4); 1829 match(Set dst (ReplicateB src)); 1830 format %{ "movd $dst,$src\n\t" 1831 "punpcklbw $dst,$dst\n\t" 1832 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1833 ins_encode %{ 1834 __ movdl($dst$$XMMRegister, $src$$Register); 1835 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1836 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1837 %} 1838 ins_pipe( pipe_slow ); 1839 %} 1840 1841 instruct Repl8B(vecD dst, rRegI src) %{ 1842 predicate(n->as_Vector()->length() == 8); 1843 match(Set dst (ReplicateB src)); 1844 format %{ "movd $dst,$src\n\t" 1845 "punpcklbw $dst,$dst\n\t" 1846 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1847 ins_encode %{ 1848 __ movdl($dst$$XMMRegister, $src$$Register); 1849 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1850 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1851 %} 1852 ins_pipe( pipe_slow ); 1853 %} 1854 1855 instruct Repl16B(vecX dst, rRegI src) %{ 1856 predicate(n->as_Vector()->length() == 16); 1857 match(Set dst (ReplicateB src)); 1858 format %{ "movd $dst,$src\n\t" 1859 "punpcklbw $dst,$dst\n\t" 1860 "pshuflw $dst,$dst,0x00\n\t" 1861 "punpcklqdq $dst,$dst\t! replicate16B" %} 1862 ins_encode %{ 1863 __ movdl($dst$$XMMRegister, $src$$Register); 1864 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1865 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1866 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1867 %} 1868 ins_pipe( pipe_slow ); 1869 %} 1870 1871 instruct Repl32B(vecY dst, rRegI src) %{ 1872 predicate(n->as_Vector()->length() == 32); 1873 match(Set dst (ReplicateB src)); 1874 format %{ "movd $dst,$src\n\t" 1875 "punpcklbw $dst,$dst\n\t" 1876 "pshuflw $dst,$dst,0x00\n\t" 1877 "punpcklqdq $dst,$dst\n\t" 1878 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1879 ins_encode %{ 1880 __ movdl($dst$$XMMRegister, $src$$Register); 1881 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1882 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1883 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1884 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1885 %} 1886 ins_pipe( pipe_slow ); 1887 %} 1888 1889 // Replicate byte scalar immediate to be vector by loading from const table. 1890 instruct Repl4B_imm(vecS dst, immI con) %{ 1891 predicate(n->as_Vector()->length() == 4); 1892 match(Set dst (ReplicateB con)); 1893 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1894 ins_encode %{ 1895 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1896 %} 1897 ins_pipe( pipe_slow ); 1898 %} 1899 1900 instruct Repl8B_imm(vecD dst, immI con) %{ 1901 predicate(n->as_Vector()->length() == 8); 1902 match(Set dst (ReplicateB con)); 1903 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1904 ins_encode %{ 1905 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1906 %} 1907 ins_pipe( pipe_slow ); 1908 %} 1909 1910 instruct Repl16B_imm(vecX dst, immI con) %{ 1911 predicate(n->as_Vector()->length() == 16); 1912 match(Set dst (ReplicateB con)); 1913 format %{ "movq $dst,[$constantaddress]\n\t" 1914 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1915 ins_encode %{ 1916 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1917 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1918 %} 1919 ins_pipe( pipe_slow ); 1920 %} 1921 1922 instruct Repl32B_imm(vecY dst, immI con) %{ 1923 predicate(n->as_Vector()->length() == 32); 1924 match(Set dst (ReplicateB con)); 1925 format %{ "movq $dst,[$constantaddress]\n\t" 1926 "punpcklqdq $dst,$dst\n\t" 1927 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1928 ins_encode %{ 1929 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1930 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1931 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1932 %} 1933 ins_pipe( pipe_slow ); 1934 %} 1935 1936 // Replicate byte scalar zero to be vector 1937 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1938 predicate(n->as_Vector()->length() == 4); 1939 match(Set dst (ReplicateB zero)); 1940 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1941 ins_encode %{ 1942 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1943 %} 1944 ins_pipe( fpu_reg_reg ); 1945 %} 1946 1947 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1948 predicate(n->as_Vector()->length() == 8); 1949 match(Set dst (ReplicateB zero)); 1950 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1951 ins_encode %{ 1952 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1953 %} 1954 ins_pipe( fpu_reg_reg ); 1955 %} 1956 1957 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1958 predicate(n->as_Vector()->length() == 16); 1959 match(Set dst (ReplicateB zero)); 1960 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1961 ins_encode %{ 1962 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1963 %} 1964 ins_pipe( fpu_reg_reg ); 1965 %} 1966 1967 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1968 predicate(n->as_Vector()->length() == 32); 1969 match(Set dst (ReplicateB zero)); 1970 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1971 ins_encode %{ 1972 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1973 bool vector256 = true; 1974 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1975 %} 1976 ins_pipe( fpu_reg_reg ); 1977 %} 1978 1979 // Replicate char/short (2 byte) scalar to be vector 1980 instruct Repl2S(vecS dst, rRegI src) %{ 1981 predicate(n->as_Vector()->length() == 2); 1982 match(Set dst (ReplicateS src)); 1983 format %{ "movd $dst,$src\n\t" 1984 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1985 ins_encode %{ 1986 __ movdl($dst$$XMMRegister, $src$$Register); 1987 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1988 %} 1989 ins_pipe( fpu_reg_reg ); 1990 %} 1991 1992 instruct Repl4S(vecD dst, rRegI src) %{ 1993 predicate(n->as_Vector()->length() == 4); 1994 match(Set dst (ReplicateS src)); 1995 format %{ "movd $dst,$src\n\t" 1996 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1997 ins_encode %{ 1998 __ movdl($dst$$XMMRegister, $src$$Register); 1999 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2000 %} 2001 ins_pipe( fpu_reg_reg ); 2002 %} 2003 2004 instruct Repl8S(vecX dst, rRegI src) %{ 2005 predicate(n->as_Vector()->length() == 8); 2006 match(Set dst (ReplicateS src)); 2007 format %{ "movd $dst,$src\n\t" 2008 "pshuflw $dst,$dst,0x00\n\t" 2009 "punpcklqdq $dst,$dst\t! replicate8S" %} 2010 ins_encode %{ 2011 __ movdl($dst$$XMMRegister, $src$$Register); 2012 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2013 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2014 %} 2015 ins_pipe( pipe_slow ); 2016 %} 2017 2018 instruct Repl16S(vecY dst, rRegI src) %{ 2019 predicate(n->as_Vector()->length() == 16); 2020 match(Set dst (ReplicateS src)); 2021 format %{ "movd $dst,$src\n\t" 2022 "pshuflw $dst,$dst,0x00\n\t" 2023 "punpcklqdq $dst,$dst\n\t" 2024 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 2025 ins_encode %{ 2026 __ movdl($dst$$XMMRegister, $src$$Register); 2027 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2028 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2029 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2030 %} 2031 ins_pipe( pipe_slow ); 2032 %} 2033 2034 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 2035 instruct Repl2S_imm(vecS dst, immI con) %{ 2036 predicate(n->as_Vector()->length() == 2); 2037 match(Set dst (ReplicateS con)); 2038 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 2039 ins_encode %{ 2040 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 2041 %} 2042 ins_pipe( fpu_reg_reg ); 2043 %} 2044 2045 instruct Repl4S_imm(vecD dst, immI con) %{ 2046 predicate(n->as_Vector()->length() == 4); 2047 match(Set dst (ReplicateS con)); 2048 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 2049 ins_encode %{ 2050 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2051 %} 2052 ins_pipe( fpu_reg_reg ); 2053 %} 2054 2055 instruct Repl8S_imm(vecX dst, immI con) %{ 2056 predicate(n->as_Vector()->length() == 8); 2057 match(Set dst (ReplicateS con)); 2058 format %{ "movq $dst,[$constantaddress]\n\t" 2059 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 2060 ins_encode %{ 2061 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2062 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2063 %} 2064 ins_pipe( pipe_slow ); 2065 %} 2066 2067 instruct Repl16S_imm(vecY dst, immI con) %{ 2068 predicate(n->as_Vector()->length() == 16); 2069 match(Set dst (ReplicateS con)); 2070 format %{ "movq $dst,[$constantaddress]\n\t" 2071 "punpcklqdq $dst,$dst\n\t" 2072 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 2073 ins_encode %{ 2074 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2075 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2076 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2077 %} 2078 ins_pipe( pipe_slow ); 2079 %} 2080 2081 // Replicate char/short (2 byte) scalar zero to be vector 2082 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 2083 predicate(n->as_Vector()->length() == 2); 2084 match(Set dst (ReplicateS zero)); 2085 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 2086 ins_encode %{ 2087 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2088 %} 2089 ins_pipe( fpu_reg_reg ); 2090 %} 2091 2092 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 2093 predicate(n->as_Vector()->length() == 4); 2094 match(Set dst (ReplicateS zero)); 2095 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 2096 ins_encode %{ 2097 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2098 %} 2099 ins_pipe( fpu_reg_reg ); 2100 %} 2101 2102 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 2103 predicate(n->as_Vector()->length() == 8); 2104 match(Set dst (ReplicateS zero)); 2105 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 2106 ins_encode %{ 2107 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2108 %} 2109 ins_pipe( fpu_reg_reg ); 2110 %} 2111 2112 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 2113 predicate(n->as_Vector()->length() == 16); 2114 match(Set dst (ReplicateS zero)); 2115 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 2116 ins_encode %{ 2117 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2118 bool vector256 = true; 2119 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2120 %} 2121 ins_pipe( fpu_reg_reg ); 2122 %} 2123 2124 // Replicate integer (4 byte) scalar to be vector 2125 instruct Repl2I(vecD dst, rRegI src) %{ 2126 predicate(n->as_Vector()->length() == 2); 2127 match(Set dst (ReplicateI src)); 2128 format %{ "movd $dst,$src\n\t" 2129 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2130 ins_encode %{ 2131 __ movdl($dst$$XMMRegister, $src$$Register); 2132 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2133 %} 2134 ins_pipe( fpu_reg_reg ); 2135 %} 2136 2137 instruct Repl4I(vecX dst, rRegI src) %{ 2138 predicate(n->as_Vector()->length() == 4); 2139 match(Set dst (ReplicateI src)); 2140 format %{ "movd $dst,$src\n\t" 2141 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2142 ins_encode %{ 2143 __ movdl($dst$$XMMRegister, $src$$Register); 2144 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2145 %} 2146 ins_pipe( pipe_slow ); 2147 %} 2148 2149 instruct Repl8I(vecY dst, rRegI src) %{ 2150 predicate(n->as_Vector()->length() == 8); 2151 match(Set dst (ReplicateI src)); 2152 format %{ "movd $dst,$src\n\t" 2153 "pshufd $dst,$dst,0x00\n\t" 2154 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2155 ins_encode %{ 2156 __ movdl($dst$$XMMRegister, $src$$Register); 2157 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2158 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2159 %} 2160 ins_pipe( pipe_slow ); 2161 %} 2162 2163 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2164 instruct Repl2I_imm(vecD dst, immI con) %{ 2165 predicate(n->as_Vector()->length() == 2); 2166 match(Set dst (ReplicateI con)); 2167 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2168 ins_encode %{ 2169 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2170 %} 2171 ins_pipe( fpu_reg_reg ); 2172 %} 2173 2174 instruct Repl4I_imm(vecX dst, immI con) %{ 2175 predicate(n->as_Vector()->length() == 4); 2176 match(Set dst (ReplicateI con)); 2177 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2178 "punpcklqdq $dst,$dst" %} 2179 ins_encode %{ 2180 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2181 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2182 %} 2183 ins_pipe( pipe_slow ); 2184 %} 2185 2186 instruct Repl8I_imm(vecY dst, immI con) %{ 2187 predicate(n->as_Vector()->length() == 8); 2188 match(Set dst (ReplicateI con)); 2189 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2190 "punpcklqdq $dst,$dst\n\t" 2191 "vinserti128h $dst,$dst,$dst" %} 2192 ins_encode %{ 2193 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2194 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2195 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2196 %} 2197 ins_pipe( pipe_slow ); 2198 %} 2199 2200 // Integer could be loaded into xmm register directly from memory. 2201 instruct Repl2I_mem(vecD dst, memory mem) %{ 2202 predicate(n->as_Vector()->length() == 2); 2203 match(Set dst (ReplicateI (LoadI mem))); 2204 format %{ "movd $dst,$mem\n\t" 2205 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2206 ins_encode %{ 2207 __ movdl($dst$$XMMRegister, $mem$$Address); 2208 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2209 %} 2210 ins_pipe( fpu_reg_reg ); 2211 %} 2212 2213 instruct Repl4I_mem(vecX dst, memory mem) %{ 2214 predicate(n->as_Vector()->length() == 4); 2215 match(Set dst (ReplicateI (LoadI mem))); 2216 format %{ "movd $dst,$mem\n\t" 2217 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2218 ins_encode %{ 2219 __ movdl($dst$$XMMRegister, $mem$$Address); 2220 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2221 %} 2222 ins_pipe( pipe_slow ); 2223 %} 2224 2225 instruct Repl8I_mem(vecY dst, memory mem) %{ 2226 predicate(n->as_Vector()->length() == 8); 2227 match(Set dst (ReplicateI (LoadI mem))); 2228 format %{ "movd $dst,$mem\n\t" 2229 "pshufd $dst,$dst,0x00\n\t" 2230 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2231 ins_encode %{ 2232 __ movdl($dst$$XMMRegister, $mem$$Address); 2233 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2234 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2235 %} 2236 ins_pipe( pipe_slow ); 2237 %} 2238 2239 // Replicate integer (4 byte) scalar zero to be vector 2240 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2241 predicate(n->as_Vector()->length() == 2); 2242 match(Set dst (ReplicateI zero)); 2243 format %{ "pxor $dst,$dst\t! replicate2I" %} 2244 ins_encode %{ 2245 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2246 %} 2247 ins_pipe( fpu_reg_reg ); 2248 %} 2249 2250 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2251 predicate(n->as_Vector()->length() == 4); 2252 match(Set dst (ReplicateI zero)); 2253 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2254 ins_encode %{ 2255 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2256 %} 2257 ins_pipe( fpu_reg_reg ); 2258 %} 2259 2260 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2261 predicate(n->as_Vector()->length() == 8); 2262 match(Set dst (ReplicateI zero)); 2263 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2264 ins_encode %{ 2265 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2266 bool vector256 = true; 2267 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2268 %} 2269 ins_pipe( fpu_reg_reg ); 2270 %} 2271 2272 // Replicate long (8 byte) scalar to be vector 2273 #ifdef _LP64 2274 instruct Repl2L(vecX dst, rRegL src) %{ 2275 predicate(n->as_Vector()->length() == 2); 2276 match(Set dst (ReplicateL src)); 2277 format %{ "movdq $dst,$src\n\t" 2278 "punpcklqdq $dst,$dst\t! replicate2L" %} 2279 ins_encode %{ 2280 __ movdq($dst$$XMMRegister, $src$$Register); 2281 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2282 %} 2283 ins_pipe( pipe_slow ); 2284 %} 2285 2286 instruct Repl4L(vecY dst, rRegL src) %{ 2287 predicate(n->as_Vector()->length() == 4); 2288 match(Set dst (ReplicateL src)); 2289 format %{ "movdq $dst,$src\n\t" 2290 "punpcklqdq $dst,$dst\n\t" 2291 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2292 ins_encode %{ 2293 __ movdq($dst$$XMMRegister, $src$$Register); 2294 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2295 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2296 %} 2297 ins_pipe( pipe_slow ); 2298 %} 2299 #else // _LP64 2300 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2301 predicate(n->as_Vector()->length() == 2); 2302 match(Set dst (ReplicateL src)); 2303 effect(TEMP dst, USE src, TEMP tmp); 2304 format %{ "movdl $dst,$src.lo\n\t" 2305 "movdl $tmp,$src.hi\n\t" 2306 "punpckldq $dst,$tmp\n\t" 2307 "punpcklqdq $dst,$dst\t! replicate2L"%} 2308 ins_encode %{ 2309 __ movdl($dst$$XMMRegister, $src$$Register); 2310 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2311 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2312 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2313 %} 2314 ins_pipe( pipe_slow ); 2315 %} 2316 2317 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2318 predicate(n->as_Vector()->length() == 4); 2319 match(Set dst (ReplicateL src)); 2320 effect(TEMP dst, USE src, TEMP tmp); 2321 format %{ "movdl $dst,$src.lo\n\t" 2322 "movdl $tmp,$src.hi\n\t" 2323 "punpckldq $dst,$tmp\n\t" 2324 "punpcklqdq $dst,$dst\n\t" 2325 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2326 ins_encode %{ 2327 __ movdl($dst$$XMMRegister, $src$$Register); 2328 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2329 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2330 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2331 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2332 %} 2333 ins_pipe( pipe_slow ); 2334 %} 2335 #endif // _LP64 2336 2337 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2338 instruct Repl2L_imm(vecX dst, immL con) %{ 2339 predicate(n->as_Vector()->length() == 2); 2340 match(Set dst (ReplicateL con)); 2341 format %{ "movq $dst,[$constantaddress]\n\t" 2342 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2343 ins_encode %{ 2344 __ movq($dst$$XMMRegister, $constantaddress($con)); 2345 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2346 %} 2347 ins_pipe( pipe_slow ); 2348 %} 2349 2350 instruct Repl4L_imm(vecY dst, immL con) %{ 2351 predicate(n->as_Vector()->length() == 4); 2352 match(Set dst (ReplicateL con)); 2353 format %{ "movq $dst,[$constantaddress]\n\t" 2354 "punpcklqdq $dst,$dst\n\t" 2355 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2356 ins_encode %{ 2357 __ movq($dst$$XMMRegister, $constantaddress($con)); 2358 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2359 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2360 %} 2361 ins_pipe( pipe_slow ); 2362 %} 2363 2364 // Long could be loaded into xmm register directly from memory. 2365 instruct Repl2L_mem(vecX dst, memory mem) %{ 2366 predicate(n->as_Vector()->length() == 2); 2367 match(Set dst (ReplicateL (LoadL mem))); 2368 format %{ "movq $dst,$mem\n\t" 2369 "punpcklqdq $dst,$dst\t! replicate2L" %} 2370 ins_encode %{ 2371 __ movq($dst$$XMMRegister, $mem$$Address); 2372 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2373 %} 2374 ins_pipe( pipe_slow ); 2375 %} 2376 2377 instruct Repl4L_mem(vecY dst, memory mem) %{ 2378 predicate(n->as_Vector()->length() == 4); 2379 match(Set dst (ReplicateL (LoadL mem))); 2380 format %{ "movq $dst,$mem\n\t" 2381 "punpcklqdq $dst,$dst\n\t" 2382 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2383 ins_encode %{ 2384 __ movq($dst$$XMMRegister, $mem$$Address); 2385 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2386 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2387 %} 2388 ins_pipe( pipe_slow ); 2389 %} 2390 2391 // Replicate long (8 byte) scalar zero to be vector 2392 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2393 predicate(n->as_Vector()->length() == 2); 2394 match(Set dst (ReplicateL zero)); 2395 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2396 ins_encode %{ 2397 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2398 %} 2399 ins_pipe( fpu_reg_reg ); 2400 %} 2401 2402 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2403 predicate(n->as_Vector()->length() == 4); 2404 match(Set dst (ReplicateL zero)); 2405 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2406 ins_encode %{ 2407 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2408 bool vector256 = true; 2409 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2410 %} 2411 ins_pipe( fpu_reg_reg ); 2412 %} 2413 2414 // Replicate float (4 byte) scalar to be vector 2415 instruct Repl2F(vecD dst, regF src) %{ 2416 predicate(n->as_Vector()->length() == 2); 2417 match(Set dst (ReplicateF src)); 2418 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2419 ins_encode %{ 2420 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2421 %} 2422 ins_pipe( fpu_reg_reg ); 2423 %} 2424 2425 instruct Repl4F(vecX dst, regF src) %{ 2426 predicate(n->as_Vector()->length() == 4); 2427 match(Set dst (ReplicateF src)); 2428 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2429 ins_encode %{ 2430 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2431 %} 2432 ins_pipe( pipe_slow ); 2433 %} 2434 2435 instruct Repl8F(vecY dst, regF src) %{ 2436 predicate(n->as_Vector()->length() == 8); 2437 match(Set dst (ReplicateF src)); 2438 format %{ "pshufd $dst,$src,0x00\n\t" 2439 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2440 ins_encode %{ 2441 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2442 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2443 %} 2444 ins_pipe( pipe_slow ); 2445 %} 2446 2447 // Replicate float (4 byte) scalar zero to be vector 2448 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2449 predicate(n->as_Vector()->length() == 2); 2450 match(Set dst (ReplicateF zero)); 2451 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2452 ins_encode %{ 2453 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2454 %} 2455 ins_pipe( fpu_reg_reg ); 2456 %} 2457 2458 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2459 predicate(n->as_Vector()->length() == 4); 2460 match(Set dst (ReplicateF zero)); 2461 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2462 ins_encode %{ 2463 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2464 %} 2465 ins_pipe( fpu_reg_reg ); 2466 %} 2467 2468 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2469 predicate(n->as_Vector()->length() == 8); 2470 match(Set dst (ReplicateF zero)); 2471 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2472 ins_encode %{ 2473 bool vector256 = true; 2474 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2475 %} 2476 ins_pipe( fpu_reg_reg ); 2477 %} 2478 2479 // Replicate double (8 bytes) scalar to be vector 2480 instruct Repl2D(vecX dst, regD src) %{ 2481 predicate(n->as_Vector()->length() == 2); 2482 match(Set dst (ReplicateD src)); 2483 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2484 ins_encode %{ 2485 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2486 %} 2487 ins_pipe( pipe_slow ); 2488 %} 2489 2490 instruct Repl4D(vecY dst, regD src) %{ 2491 predicate(n->as_Vector()->length() == 4); 2492 match(Set dst (ReplicateD src)); 2493 format %{ "pshufd $dst,$src,0x44\n\t" 2494 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2495 ins_encode %{ 2496 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2497 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2498 %} 2499 ins_pipe( pipe_slow ); 2500 %} 2501 2502 // Replicate double (8 byte) scalar zero to be vector 2503 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2504 predicate(n->as_Vector()->length() == 2); 2505 match(Set dst (ReplicateD zero)); 2506 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2507 ins_encode %{ 2508 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2509 %} 2510 ins_pipe( fpu_reg_reg ); 2511 %} 2512 2513 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2514 predicate(n->as_Vector()->length() == 4); 2515 match(Set dst (ReplicateD zero)); 2516 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2517 ins_encode %{ 2518 bool vector256 = true; 2519 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2520 %} 2521 ins_pipe( fpu_reg_reg ); 2522 %} 2523 2524 // ====================VECTOR ARITHMETIC======================================= 2525 2526 // --------------------------------- ADD -------------------------------------- 2527 2528 // Bytes vector add 2529 instruct vadd4B(vecS dst, vecS src) %{ 2530 predicate(n->as_Vector()->length() == 4); 2531 match(Set dst (AddVB dst src)); 2532 format %{ "paddb $dst,$src\t! add packed4B" %} 2533 ins_encode %{ 2534 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2535 %} 2536 ins_pipe( pipe_slow ); 2537 %} 2538 2539 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 2540 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2541 match(Set dst (AddVB src1 src2)); 2542 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 2543 ins_encode %{ 2544 bool vector256 = false; 2545 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2546 %} 2547 ins_pipe( pipe_slow ); 2548 %} 2549 2550 instruct vadd8B(vecD dst, vecD src) %{ 2551 predicate(n->as_Vector()->length() == 8); 2552 match(Set dst (AddVB dst src)); 2553 format %{ "paddb $dst,$src\t! add packed8B" %} 2554 ins_encode %{ 2555 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2556 %} 2557 ins_pipe( pipe_slow ); 2558 %} 2559 2560 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 2561 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2562 match(Set dst (AddVB src1 src2)); 2563 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 2564 ins_encode %{ 2565 bool vector256 = false; 2566 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2567 %} 2568 ins_pipe( pipe_slow ); 2569 %} 2570 2571 instruct vadd16B(vecX dst, vecX src) %{ 2572 predicate(n->as_Vector()->length() == 16); 2573 match(Set dst (AddVB dst src)); 2574 format %{ "paddb $dst,$src\t! add packed16B" %} 2575 ins_encode %{ 2576 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2577 %} 2578 ins_pipe( pipe_slow ); 2579 %} 2580 2581 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 2582 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2583 match(Set dst (AddVB src1 src2)); 2584 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 2585 ins_encode %{ 2586 bool vector256 = false; 2587 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2588 %} 2589 ins_pipe( pipe_slow ); 2590 %} 2591 2592 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 2593 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2594 match(Set dst (AddVB src (LoadVector mem))); 2595 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 2596 ins_encode %{ 2597 bool vector256 = false; 2598 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2599 %} 2600 ins_pipe( pipe_slow ); 2601 %} 2602 2603 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 2604 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2605 match(Set dst (AddVB src1 src2)); 2606 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 2607 ins_encode %{ 2608 bool vector256 = true; 2609 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2610 %} 2611 ins_pipe( pipe_slow ); 2612 %} 2613 2614 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 2615 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2616 match(Set dst (AddVB src (LoadVector mem))); 2617 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 2618 ins_encode %{ 2619 bool vector256 = true; 2620 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2621 %} 2622 ins_pipe( pipe_slow ); 2623 %} 2624 2625 // Shorts/Chars vector add 2626 instruct vadd2S(vecS dst, vecS src) %{ 2627 predicate(n->as_Vector()->length() == 2); 2628 match(Set dst (AddVS dst src)); 2629 format %{ "paddw $dst,$src\t! add packed2S" %} 2630 ins_encode %{ 2631 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2632 %} 2633 ins_pipe( pipe_slow ); 2634 %} 2635 2636 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 2637 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2638 match(Set dst (AddVS src1 src2)); 2639 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 2640 ins_encode %{ 2641 bool vector256 = false; 2642 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2643 %} 2644 ins_pipe( pipe_slow ); 2645 %} 2646 2647 instruct vadd4S(vecD dst, vecD src) %{ 2648 predicate(n->as_Vector()->length() == 4); 2649 match(Set dst (AddVS dst src)); 2650 format %{ "paddw $dst,$src\t! add packed4S" %} 2651 ins_encode %{ 2652 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2653 %} 2654 ins_pipe( pipe_slow ); 2655 %} 2656 2657 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 2658 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2659 match(Set dst (AddVS src1 src2)); 2660 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 2661 ins_encode %{ 2662 bool vector256 = false; 2663 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2664 %} 2665 ins_pipe( pipe_slow ); 2666 %} 2667 2668 instruct vadd8S(vecX dst, vecX src) %{ 2669 predicate(n->as_Vector()->length() == 8); 2670 match(Set dst (AddVS dst src)); 2671 format %{ "paddw $dst,$src\t! add packed8S" %} 2672 ins_encode %{ 2673 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2674 %} 2675 ins_pipe( pipe_slow ); 2676 %} 2677 2678 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 2679 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2680 match(Set dst (AddVS src1 src2)); 2681 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 2682 ins_encode %{ 2683 bool vector256 = false; 2684 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2685 %} 2686 ins_pipe( pipe_slow ); 2687 %} 2688 2689 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 2690 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2691 match(Set dst (AddVS src (LoadVector mem))); 2692 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 2693 ins_encode %{ 2694 bool vector256 = false; 2695 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2696 %} 2697 ins_pipe( pipe_slow ); 2698 %} 2699 2700 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 2701 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2702 match(Set dst (AddVS src1 src2)); 2703 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 2704 ins_encode %{ 2705 bool vector256 = true; 2706 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2707 %} 2708 ins_pipe( pipe_slow ); 2709 %} 2710 2711 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 2712 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2713 match(Set dst (AddVS src (LoadVector mem))); 2714 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 2715 ins_encode %{ 2716 bool vector256 = true; 2717 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2718 %} 2719 ins_pipe( pipe_slow ); 2720 %} 2721 2722 // Integers vector add 2723 instruct vadd2I(vecD dst, vecD src) %{ 2724 predicate(n->as_Vector()->length() == 2); 2725 match(Set dst (AddVI dst src)); 2726 format %{ "paddd $dst,$src\t! add packed2I" %} 2727 ins_encode %{ 2728 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2729 %} 2730 ins_pipe( pipe_slow ); 2731 %} 2732 2733 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 2734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2735 match(Set dst (AddVI src1 src2)); 2736 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 2737 ins_encode %{ 2738 bool vector256 = false; 2739 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2740 %} 2741 ins_pipe( pipe_slow ); 2742 %} 2743 2744 instruct vadd4I(vecX dst, vecX src) %{ 2745 predicate(n->as_Vector()->length() == 4); 2746 match(Set dst (AddVI dst src)); 2747 format %{ "paddd $dst,$src\t! add packed4I" %} 2748 ins_encode %{ 2749 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2750 %} 2751 ins_pipe( pipe_slow ); 2752 %} 2753 2754 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 2755 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2756 match(Set dst (AddVI src1 src2)); 2757 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 2758 ins_encode %{ 2759 bool vector256 = false; 2760 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2761 %} 2762 ins_pipe( pipe_slow ); 2763 %} 2764 2765 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 2766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2767 match(Set dst (AddVI src (LoadVector mem))); 2768 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 2769 ins_encode %{ 2770 bool vector256 = false; 2771 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2772 %} 2773 ins_pipe( pipe_slow ); 2774 %} 2775 2776 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 2777 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2778 match(Set dst (AddVI src1 src2)); 2779 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 2780 ins_encode %{ 2781 bool vector256 = true; 2782 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2783 %} 2784 ins_pipe( pipe_slow ); 2785 %} 2786 2787 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 2788 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2789 match(Set dst (AddVI src (LoadVector mem))); 2790 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 2791 ins_encode %{ 2792 bool vector256 = true; 2793 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2794 %} 2795 ins_pipe( pipe_slow ); 2796 %} 2797 2798 // Longs vector add 2799 instruct vadd2L(vecX dst, vecX src) %{ 2800 predicate(n->as_Vector()->length() == 2); 2801 match(Set dst (AddVL dst src)); 2802 format %{ "paddq $dst,$src\t! add packed2L" %} 2803 ins_encode %{ 2804 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 2805 %} 2806 ins_pipe( pipe_slow ); 2807 %} 2808 2809 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 2810 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2811 match(Set dst (AddVL src1 src2)); 2812 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 2813 ins_encode %{ 2814 bool vector256 = false; 2815 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2816 %} 2817 ins_pipe( pipe_slow ); 2818 %} 2819 2820 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 2821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2822 match(Set dst (AddVL src (LoadVector mem))); 2823 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 2824 ins_encode %{ 2825 bool vector256 = false; 2826 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2827 %} 2828 ins_pipe( pipe_slow ); 2829 %} 2830 2831 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 2832 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2833 match(Set dst (AddVL src1 src2)); 2834 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 2835 ins_encode %{ 2836 bool vector256 = true; 2837 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2838 %} 2839 ins_pipe( pipe_slow ); 2840 %} 2841 2842 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 2843 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2844 match(Set dst (AddVL src (LoadVector mem))); 2845 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 2846 ins_encode %{ 2847 bool vector256 = true; 2848 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2849 %} 2850 ins_pipe( pipe_slow ); 2851 %} 2852 2853 // Floats vector add 2854 instruct vadd2F(vecD dst, vecD src) %{ 2855 predicate(n->as_Vector()->length() == 2); 2856 match(Set dst (AddVF dst src)); 2857 format %{ "addps $dst,$src\t! add packed2F" %} 2858 ins_encode %{ 2859 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2860 %} 2861 ins_pipe( pipe_slow ); 2862 %} 2863 2864 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 2865 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2866 match(Set dst (AddVF src1 src2)); 2867 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 2868 ins_encode %{ 2869 bool vector256 = false; 2870 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2871 %} 2872 ins_pipe( pipe_slow ); 2873 %} 2874 2875 instruct vadd4F(vecX dst, vecX src) %{ 2876 predicate(n->as_Vector()->length() == 4); 2877 match(Set dst (AddVF dst src)); 2878 format %{ "addps $dst,$src\t! add packed4F" %} 2879 ins_encode %{ 2880 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 2886 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2887 match(Set dst (AddVF src1 src2)); 2888 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 2889 ins_encode %{ 2890 bool vector256 = false; 2891 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2892 %} 2893 ins_pipe( pipe_slow ); 2894 %} 2895 2896 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 2897 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2898 match(Set dst (AddVF src (LoadVector mem))); 2899 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 2900 ins_encode %{ 2901 bool vector256 = false; 2902 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2903 %} 2904 ins_pipe( pipe_slow ); 2905 %} 2906 2907 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 2908 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2909 match(Set dst (AddVF src1 src2)); 2910 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 2911 ins_encode %{ 2912 bool vector256 = true; 2913 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2914 %} 2915 ins_pipe( pipe_slow ); 2916 %} 2917 2918 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 2919 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2920 match(Set dst (AddVF src (LoadVector mem))); 2921 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 2922 ins_encode %{ 2923 bool vector256 = true; 2924 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2925 %} 2926 ins_pipe( pipe_slow ); 2927 %} 2928 2929 // Doubles vector add 2930 instruct vadd2D(vecX dst, vecX src) %{ 2931 predicate(n->as_Vector()->length() == 2); 2932 match(Set dst (AddVD dst src)); 2933 format %{ "addpd $dst,$src\t! add packed2D" %} 2934 ins_encode %{ 2935 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 2936 %} 2937 ins_pipe( pipe_slow ); 2938 %} 2939 2940 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 2941 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2942 match(Set dst (AddVD src1 src2)); 2943 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 2944 ins_encode %{ 2945 bool vector256 = false; 2946 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2947 %} 2948 ins_pipe( pipe_slow ); 2949 %} 2950 2951 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 2952 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2953 match(Set dst (AddVD src (LoadVector mem))); 2954 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 2955 ins_encode %{ 2956 bool vector256 = false; 2957 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2958 %} 2959 ins_pipe( pipe_slow ); 2960 %} 2961 2962 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 2963 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2964 match(Set dst (AddVD src1 src2)); 2965 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 2966 ins_encode %{ 2967 bool vector256 = true; 2968 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2969 %} 2970 ins_pipe( pipe_slow ); 2971 %} 2972 2973 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 2974 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2975 match(Set dst (AddVD src (LoadVector mem))); 2976 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 2977 ins_encode %{ 2978 bool vector256 = true; 2979 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2980 %} 2981 ins_pipe( pipe_slow ); 2982 %} 2983 2984 // --------------------------------- SUB -------------------------------------- 2985 2986 // Bytes vector sub 2987 instruct vsub4B(vecS dst, vecS src) %{ 2988 predicate(n->as_Vector()->length() == 4); 2989 match(Set dst (SubVB dst src)); 2990 format %{ "psubb $dst,$src\t! sub packed4B" %} 2991 ins_encode %{ 2992 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2993 %} 2994 ins_pipe( pipe_slow ); 2995 %} 2996 2997 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 2998 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2999 match(Set dst (SubVB src1 src2)); 3000 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 3001 ins_encode %{ 3002 bool vector256 = false; 3003 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3004 %} 3005 ins_pipe( pipe_slow ); 3006 %} 3007 3008 instruct vsub8B(vecD dst, vecD src) %{ 3009 predicate(n->as_Vector()->length() == 8); 3010 match(Set dst (SubVB dst src)); 3011 format %{ "psubb $dst,$src\t! sub packed8B" %} 3012 ins_encode %{ 3013 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3014 %} 3015 ins_pipe( pipe_slow ); 3016 %} 3017 3018 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 3019 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3020 match(Set dst (SubVB src1 src2)); 3021 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 3022 ins_encode %{ 3023 bool vector256 = false; 3024 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3025 %} 3026 ins_pipe( pipe_slow ); 3027 %} 3028 3029 instruct vsub16B(vecX dst, vecX src) %{ 3030 predicate(n->as_Vector()->length() == 16); 3031 match(Set dst (SubVB dst src)); 3032 format %{ "psubb $dst,$src\t! sub packed16B" %} 3033 ins_encode %{ 3034 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3035 %} 3036 ins_pipe( pipe_slow ); 3037 %} 3038 3039 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 3040 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3041 match(Set dst (SubVB src1 src2)); 3042 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 3043 ins_encode %{ 3044 bool vector256 = false; 3045 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3046 %} 3047 ins_pipe( pipe_slow ); 3048 %} 3049 3050 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 3051 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3052 match(Set dst (SubVB src (LoadVector mem))); 3053 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 3054 ins_encode %{ 3055 bool vector256 = false; 3056 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3057 %} 3058 ins_pipe( pipe_slow ); 3059 %} 3060 3061 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 3062 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3063 match(Set dst (SubVB src1 src2)); 3064 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 3065 ins_encode %{ 3066 bool vector256 = true; 3067 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3068 %} 3069 ins_pipe( pipe_slow ); 3070 %} 3071 3072 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 3073 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3074 match(Set dst (SubVB src (LoadVector mem))); 3075 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 3076 ins_encode %{ 3077 bool vector256 = true; 3078 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3079 %} 3080 ins_pipe( pipe_slow ); 3081 %} 3082 3083 // Shorts/Chars vector sub 3084 instruct vsub2S(vecS dst, vecS src) %{ 3085 predicate(n->as_Vector()->length() == 2); 3086 match(Set dst (SubVS dst src)); 3087 format %{ "psubw $dst,$src\t! sub packed2S" %} 3088 ins_encode %{ 3089 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3090 %} 3091 ins_pipe( pipe_slow ); 3092 %} 3093 3094 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 3095 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3096 match(Set dst (SubVS src1 src2)); 3097 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 3098 ins_encode %{ 3099 bool vector256 = false; 3100 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3101 %} 3102 ins_pipe( pipe_slow ); 3103 %} 3104 3105 instruct vsub4S(vecD dst, vecD src) %{ 3106 predicate(n->as_Vector()->length() == 4); 3107 match(Set dst (SubVS dst src)); 3108 format %{ "psubw $dst,$src\t! sub packed4S" %} 3109 ins_encode %{ 3110 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3111 %} 3112 ins_pipe( pipe_slow ); 3113 %} 3114 3115 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 3116 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3117 match(Set dst (SubVS src1 src2)); 3118 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 3119 ins_encode %{ 3120 bool vector256 = false; 3121 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3122 %} 3123 ins_pipe( pipe_slow ); 3124 %} 3125 3126 instruct vsub8S(vecX dst, vecX src) %{ 3127 predicate(n->as_Vector()->length() == 8); 3128 match(Set dst (SubVS dst src)); 3129 format %{ "psubw $dst,$src\t! sub packed8S" %} 3130 ins_encode %{ 3131 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3132 %} 3133 ins_pipe( pipe_slow ); 3134 %} 3135 3136 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3137 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3138 match(Set dst (SubVS src1 src2)); 3139 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3140 ins_encode %{ 3141 bool vector256 = false; 3142 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3143 %} 3144 ins_pipe( pipe_slow ); 3145 %} 3146 3147 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3148 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3149 match(Set dst (SubVS src (LoadVector mem))); 3150 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3151 ins_encode %{ 3152 bool vector256 = false; 3153 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3154 %} 3155 ins_pipe( pipe_slow ); 3156 %} 3157 3158 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3159 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3160 match(Set dst (SubVS src1 src2)); 3161 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3162 ins_encode %{ 3163 bool vector256 = true; 3164 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3165 %} 3166 ins_pipe( pipe_slow ); 3167 %} 3168 3169 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3170 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3171 match(Set dst (SubVS src (LoadVector mem))); 3172 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3173 ins_encode %{ 3174 bool vector256 = true; 3175 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3176 %} 3177 ins_pipe( pipe_slow ); 3178 %} 3179 3180 // Integers vector sub 3181 instruct vsub2I(vecD dst, vecD src) %{ 3182 predicate(n->as_Vector()->length() == 2); 3183 match(Set dst (SubVI dst src)); 3184 format %{ "psubd $dst,$src\t! sub packed2I" %} 3185 ins_encode %{ 3186 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3187 %} 3188 ins_pipe( pipe_slow ); 3189 %} 3190 3191 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3192 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3193 match(Set dst (SubVI src1 src2)); 3194 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3195 ins_encode %{ 3196 bool vector256 = false; 3197 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3198 %} 3199 ins_pipe( pipe_slow ); 3200 %} 3201 3202 instruct vsub4I(vecX dst, vecX src) %{ 3203 predicate(n->as_Vector()->length() == 4); 3204 match(Set dst (SubVI dst src)); 3205 format %{ "psubd $dst,$src\t! sub packed4I" %} 3206 ins_encode %{ 3207 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3208 %} 3209 ins_pipe( pipe_slow ); 3210 %} 3211 3212 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3213 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3214 match(Set dst (SubVI src1 src2)); 3215 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3216 ins_encode %{ 3217 bool vector256 = false; 3218 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3219 %} 3220 ins_pipe( pipe_slow ); 3221 %} 3222 3223 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3225 match(Set dst (SubVI src (LoadVector mem))); 3226 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3227 ins_encode %{ 3228 bool vector256 = false; 3229 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3230 %} 3231 ins_pipe( pipe_slow ); 3232 %} 3233 3234 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3235 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3236 match(Set dst (SubVI src1 src2)); 3237 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3238 ins_encode %{ 3239 bool vector256 = true; 3240 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3241 %} 3242 ins_pipe( pipe_slow ); 3243 %} 3244 3245 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3246 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3247 match(Set dst (SubVI src (LoadVector mem))); 3248 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3249 ins_encode %{ 3250 bool vector256 = true; 3251 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3252 %} 3253 ins_pipe( pipe_slow ); 3254 %} 3255 3256 // Longs vector sub 3257 instruct vsub2L(vecX dst, vecX src) %{ 3258 predicate(n->as_Vector()->length() == 2); 3259 match(Set dst (SubVL dst src)); 3260 format %{ "psubq $dst,$src\t! sub packed2L" %} 3261 ins_encode %{ 3262 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3263 %} 3264 ins_pipe( pipe_slow ); 3265 %} 3266 3267 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3268 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3269 match(Set dst (SubVL src1 src2)); 3270 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3271 ins_encode %{ 3272 bool vector256 = false; 3273 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3280 match(Set dst (SubVL src (LoadVector mem))); 3281 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3282 ins_encode %{ 3283 bool vector256 = false; 3284 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3285 %} 3286 ins_pipe( pipe_slow ); 3287 %} 3288 3289 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3290 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3291 match(Set dst (SubVL src1 src2)); 3292 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3293 ins_encode %{ 3294 bool vector256 = true; 3295 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3296 %} 3297 ins_pipe( pipe_slow ); 3298 %} 3299 3300 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3301 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3302 match(Set dst (SubVL src (LoadVector mem))); 3303 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3304 ins_encode %{ 3305 bool vector256 = true; 3306 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3307 %} 3308 ins_pipe( pipe_slow ); 3309 %} 3310 3311 // Floats vector sub 3312 instruct vsub2F(vecD dst, vecD src) %{ 3313 predicate(n->as_Vector()->length() == 2); 3314 match(Set dst (SubVF dst src)); 3315 format %{ "subps $dst,$src\t! sub packed2F" %} 3316 ins_encode %{ 3317 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3318 %} 3319 ins_pipe( pipe_slow ); 3320 %} 3321 3322 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3323 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3324 match(Set dst (SubVF src1 src2)); 3325 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3326 ins_encode %{ 3327 bool vector256 = false; 3328 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3329 %} 3330 ins_pipe( pipe_slow ); 3331 %} 3332 3333 instruct vsub4F(vecX dst, vecX src) %{ 3334 predicate(n->as_Vector()->length() == 4); 3335 match(Set dst (SubVF dst src)); 3336 format %{ "subps $dst,$src\t! sub packed4F" %} 3337 ins_encode %{ 3338 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3339 %} 3340 ins_pipe( pipe_slow ); 3341 %} 3342 3343 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3344 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3345 match(Set dst (SubVF src1 src2)); 3346 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3347 ins_encode %{ 3348 bool vector256 = false; 3349 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3350 %} 3351 ins_pipe( pipe_slow ); 3352 %} 3353 3354 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3355 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3356 match(Set dst (SubVF src (LoadVector mem))); 3357 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3358 ins_encode %{ 3359 bool vector256 = false; 3360 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3361 %} 3362 ins_pipe( pipe_slow ); 3363 %} 3364 3365 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3366 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3367 match(Set dst (SubVF src1 src2)); 3368 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3369 ins_encode %{ 3370 bool vector256 = true; 3371 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3372 %} 3373 ins_pipe( pipe_slow ); 3374 %} 3375 3376 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3377 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3378 match(Set dst (SubVF src (LoadVector mem))); 3379 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3380 ins_encode %{ 3381 bool vector256 = true; 3382 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3383 %} 3384 ins_pipe( pipe_slow ); 3385 %} 3386 3387 // Doubles vector sub 3388 instruct vsub2D(vecX dst, vecX src) %{ 3389 predicate(n->as_Vector()->length() == 2); 3390 match(Set dst (SubVD dst src)); 3391 format %{ "subpd $dst,$src\t! sub packed2D" %} 3392 ins_encode %{ 3393 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3394 %} 3395 ins_pipe( pipe_slow ); 3396 %} 3397 3398 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3399 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3400 match(Set dst (SubVD src1 src2)); 3401 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3402 ins_encode %{ 3403 bool vector256 = false; 3404 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3405 %} 3406 ins_pipe( pipe_slow ); 3407 %} 3408 3409 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3410 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3411 match(Set dst (SubVD src (LoadVector mem))); 3412 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3413 ins_encode %{ 3414 bool vector256 = false; 3415 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3416 %} 3417 ins_pipe( pipe_slow ); 3418 %} 3419 3420 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 3421 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3422 match(Set dst (SubVD src1 src2)); 3423 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 3424 ins_encode %{ 3425 bool vector256 = true; 3426 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3427 %} 3428 ins_pipe( pipe_slow ); 3429 %} 3430 3431 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 3432 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3433 match(Set dst (SubVD src (LoadVector mem))); 3434 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 3435 ins_encode %{ 3436 bool vector256 = true; 3437 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3438 %} 3439 ins_pipe( pipe_slow ); 3440 %} 3441 3442 // --------------------------------- MUL -------------------------------------- 3443 3444 // Shorts/Chars vector mul 3445 instruct vmul2S(vecS dst, vecS src) %{ 3446 predicate(n->as_Vector()->length() == 2); 3447 match(Set dst (MulVS dst src)); 3448 format %{ "pmullw $dst,$src\t! mul packed2S" %} 3449 ins_encode %{ 3450 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3451 %} 3452 ins_pipe( pipe_slow ); 3453 %} 3454 3455 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 3456 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3457 match(Set dst (MulVS src1 src2)); 3458 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 3459 ins_encode %{ 3460 bool vector256 = false; 3461 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3462 %} 3463 ins_pipe( pipe_slow ); 3464 %} 3465 3466 instruct vmul4S(vecD dst, vecD src) %{ 3467 predicate(n->as_Vector()->length() == 4); 3468 match(Set dst (MulVS dst src)); 3469 format %{ "pmullw $dst,$src\t! mul packed4S" %} 3470 ins_encode %{ 3471 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3472 %} 3473 ins_pipe( pipe_slow ); 3474 %} 3475 3476 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 3477 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3478 match(Set dst (MulVS src1 src2)); 3479 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 3480 ins_encode %{ 3481 bool vector256 = false; 3482 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3483 %} 3484 ins_pipe( pipe_slow ); 3485 %} 3486 3487 instruct vmul8S(vecX dst, vecX src) %{ 3488 predicate(n->as_Vector()->length() == 8); 3489 match(Set dst (MulVS dst src)); 3490 format %{ "pmullw $dst,$src\t! mul packed8S" %} 3491 ins_encode %{ 3492 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3493 %} 3494 ins_pipe( pipe_slow ); 3495 %} 3496 3497 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 3498 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3499 match(Set dst (MulVS src1 src2)); 3500 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 3501 ins_encode %{ 3502 bool vector256 = false; 3503 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3504 %} 3505 ins_pipe( pipe_slow ); 3506 %} 3507 3508 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 3509 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3510 match(Set dst (MulVS src (LoadVector mem))); 3511 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 3512 ins_encode %{ 3513 bool vector256 = false; 3514 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3515 %} 3516 ins_pipe( pipe_slow ); 3517 %} 3518 3519 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 3520 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3521 match(Set dst (MulVS src1 src2)); 3522 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 3523 ins_encode %{ 3524 bool vector256 = true; 3525 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3526 %} 3527 ins_pipe( pipe_slow ); 3528 %} 3529 3530 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 3531 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3532 match(Set dst (MulVS src (LoadVector mem))); 3533 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 3534 ins_encode %{ 3535 bool vector256 = true; 3536 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3537 %} 3538 ins_pipe( pipe_slow ); 3539 %} 3540 3541 // Integers vector mul (sse4_1) 3542 instruct vmul2I(vecD dst, vecD src) %{ 3543 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 3544 match(Set dst (MulVI dst src)); 3545 format %{ "pmulld $dst,$src\t! mul packed2I" %} 3546 ins_encode %{ 3547 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3548 %} 3549 ins_pipe( pipe_slow ); 3550 %} 3551 3552 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 3553 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3554 match(Set dst (MulVI src1 src2)); 3555 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 3556 ins_encode %{ 3557 bool vector256 = false; 3558 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3559 %} 3560 ins_pipe( pipe_slow ); 3561 %} 3562 3563 instruct vmul4I(vecX dst, vecX src) %{ 3564 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 3565 match(Set dst (MulVI dst src)); 3566 format %{ "pmulld $dst,$src\t! mul packed4I" %} 3567 ins_encode %{ 3568 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3569 %} 3570 ins_pipe( pipe_slow ); 3571 %} 3572 3573 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 3574 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3575 match(Set dst (MulVI src1 src2)); 3576 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 3577 ins_encode %{ 3578 bool vector256 = false; 3579 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3580 %} 3581 ins_pipe( pipe_slow ); 3582 %} 3583 3584 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 3585 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3586 match(Set dst (MulVI src (LoadVector mem))); 3587 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 3588 ins_encode %{ 3589 bool vector256 = false; 3590 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3591 %} 3592 ins_pipe( pipe_slow ); 3593 %} 3594 3595 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 3596 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3597 match(Set dst (MulVI src1 src2)); 3598 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 3599 ins_encode %{ 3600 bool vector256 = true; 3601 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3602 %} 3603 ins_pipe( pipe_slow ); 3604 %} 3605 3606 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 3607 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3608 match(Set dst (MulVI src (LoadVector mem))); 3609 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 3610 ins_encode %{ 3611 bool vector256 = true; 3612 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3613 %} 3614 ins_pipe( pipe_slow ); 3615 %} 3616 3617 // Floats vector mul 3618 instruct vmul2F(vecD dst, vecD src) %{ 3619 predicate(n->as_Vector()->length() == 2); 3620 match(Set dst (MulVF dst src)); 3621 format %{ "mulps $dst,$src\t! mul packed2F" %} 3622 ins_encode %{ 3623 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3624 %} 3625 ins_pipe( pipe_slow ); 3626 %} 3627 3628 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 3629 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3630 match(Set dst (MulVF src1 src2)); 3631 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 3632 ins_encode %{ 3633 bool vector256 = false; 3634 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3635 %} 3636 ins_pipe( pipe_slow ); 3637 %} 3638 3639 instruct vmul4F(vecX dst, vecX src) %{ 3640 predicate(n->as_Vector()->length() == 4); 3641 match(Set dst (MulVF dst src)); 3642 format %{ "mulps $dst,$src\t! mul packed4F" %} 3643 ins_encode %{ 3644 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3645 %} 3646 ins_pipe( pipe_slow ); 3647 %} 3648 3649 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 3650 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3651 match(Set dst (MulVF src1 src2)); 3652 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 3653 ins_encode %{ 3654 bool vector256 = false; 3655 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3656 %} 3657 ins_pipe( pipe_slow ); 3658 %} 3659 3660 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 3661 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3662 match(Set dst (MulVF src (LoadVector mem))); 3663 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 3664 ins_encode %{ 3665 bool vector256 = false; 3666 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3667 %} 3668 ins_pipe( pipe_slow ); 3669 %} 3670 3671 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 3672 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3673 match(Set dst (MulVF src1 src2)); 3674 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 3675 ins_encode %{ 3676 bool vector256 = true; 3677 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3678 %} 3679 ins_pipe( pipe_slow ); 3680 %} 3681 3682 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 3683 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3684 match(Set dst (MulVF src (LoadVector mem))); 3685 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 3686 ins_encode %{ 3687 bool vector256 = true; 3688 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3689 %} 3690 ins_pipe( pipe_slow ); 3691 %} 3692 3693 // Doubles vector mul 3694 instruct vmul2D(vecX dst, vecX src) %{ 3695 predicate(n->as_Vector()->length() == 2); 3696 match(Set dst (MulVD dst src)); 3697 format %{ "mulpd $dst,$src\t! mul packed2D" %} 3698 ins_encode %{ 3699 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 3700 %} 3701 ins_pipe( pipe_slow ); 3702 %} 3703 3704 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 3705 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3706 match(Set dst (MulVD src1 src2)); 3707 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 3708 ins_encode %{ 3709 bool vector256 = false; 3710 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3711 %} 3712 ins_pipe( pipe_slow ); 3713 %} 3714 3715 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 3716 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3717 match(Set dst (MulVD src (LoadVector mem))); 3718 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 3719 ins_encode %{ 3720 bool vector256 = false; 3721 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3722 %} 3723 ins_pipe( pipe_slow ); 3724 %} 3725 3726 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 3727 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3728 match(Set dst (MulVD src1 src2)); 3729 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 3730 ins_encode %{ 3731 bool vector256 = true; 3732 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3733 %} 3734 ins_pipe( pipe_slow ); 3735 %} 3736 3737 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 3738 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3739 match(Set dst (MulVD src (LoadVector mem))); 3740 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 3741 ins_encode %{ 3742 bool vector256 = true; 3743 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3744 %} 3745 ins_pipe( pipe_slow ); 3746 %} 3747 3748 // --------------------------------- DIV -------------------------------------- 3749 3750 // Floats vector div 3751 instruct vdiv2F(vecD dst, vecD src) %{ 3752 predicate(n->as_Vector()->length() == 2); 3753 match(Set dst (DivVF dst src)); 3754 format %{ "divps $dst,$src\t! div packed2F" %} 3755 ins_encode %{ 3756 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 3762 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3763 match(Set dst (DivVF src1 src2)); 3764 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 3765 ins_encode %{ 3766 bool vector256 = false; 3767 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3768 %} 3769 ins_pipe( pipe_slow ); 3770 %} 3771 3772 instruct vdiv4F(vecX dst, vecX src) %{ 3773 predicate(n->as_Vector()->length() == 4); 3774 match(Set dst (DivVF dst src)); 3775 format %{ "divps $dst,$src\t! div packed4F" %} 3776 ins_encode %{ 3777 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3778 %} 3779 ins_pipe( pipe_slow ); 3780 %} 3781 3782 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 3783 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3784 match(Set dst (DivVF src1 src2)); 3785 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 3786 ins_encode %{ 3787 bool vector256 = false; 3788 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3789 %} 3790 ins_pipe( pipe_slow ); 3791 %} 3792 3793 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 3794 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3795 match(Set dst (DivVF src (LoadVector mem))); 3796 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 3797 ins_encode %{ 3798 bool vector256 = false; 3799 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3800 %} 3801 ins_pipe( pipe_slow ); 3802 %} 3803 3804 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 3805 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3806 match(Set dst (DivVF src1 src2)); 3807 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 3808 ins_encode %{ 3809 bool vector256 = true; 3810 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3811 %} 3812 ins_pipe( pipe_slow ); 3813 %} 3814 3815 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 3816 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3817 match(Set dst (DivVF src (LoadVector mem))); 3818 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 3819 ins_encode %{ 3820 bool vector256 = true; 3821 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3822 %} 3823 ins_pipe( pipe_slow ); 3824 %} 3825 3826 // Doubles vector div 3827 instruct vdiv2D(vecX dst, vecX src) %{ 3828 predicate(n->as_Vector()->length() == 2); 3829 match(Set dst (DivVD dst src)); 3830 format %{ "divpd $dst,$src\t! div packed2D" %} 3831 ins_encode %{ 3832 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 3833 %} 3834 ins_pipe( pipe_slow ); 3835 %} 3836 3837 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 3838 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3839 match(Set dst (DivVD src1 src2)); 3840 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 3841 ins_encode %{ 3842 bool vector256 = false; 3843 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3844 %} 3845 ins_pipe( pipe_slow ); 3846 %} 3847 3848 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 3849 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3850 match(Set dst (DivVD src (LoadVector mem))); 3851 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 3852 ins_encode %{ 3853 bool vector256 = false; 3854 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3855 %} 3856 ins_pipe( pipe_slow ); 3857 %} 3858 3859 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 3860 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3861 match(Set dst (DivVD src1 src2)); 3862 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 3863 ins_encode %{ 3864 bool vector256 = true; 3865 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3866 %} 3867 ins_pipe( pipe_slow ); 3868 %} 3869 3870 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 3871 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3872 match(Set dst (DivVD src (LoadVector mem))); 3873 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 3874 ins_encode %{ 3875 bool vector256 = true; 3876 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3877 %} 3878 ins_pipe( pipe_slow ); 3879 %} 3880 3881 // ------------------------------ Shift --------------------------------------- 3882 3883 // Left and right shift count vectors are the same on x86 3884 // (only lowest bits of xmm reg are used for count). 3885 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 3886 match(Set dst (LShiftCntV cnt)); 3887 match(Set dst (RShiftCntV cnt)); 3888 format %{ "movd $dst,$cnt\t! load shift count" %} 3889 ins_encode %{ 3890 __ movdl($dst$$XMMRegister, $cnt$$Register); 3891 %} 3892 ins_pipe( pipe_slow ); 3893 %} 3894 3895 // ------------------------------ LeftShift ----------------------------------- 3896 3897 // Shorts/Chars vector left shift 3898 instruct vsll2S(vecS dst, vecS shift) %{ 3899 predicate(n->as_Vector()->length() == 2); 3900 match(Set dst (LShiftVS dst shift)); 3901 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3902 ins_encode %{ 3903 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3904 %} 3905 ins_pipe( pipe_slow ); 3906 %} 3907 3908 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 3909 predicate(n->as_Vector()->length() == 2); 3910 match(Set dst (LShiftVS dst shift)); 3911 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3912 ins_encode %{ 3913 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3914 %} 3915 ins_pipe( pipe_slow ); 3916 %} 3917 3918 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 3919 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3920 match(Set dst (LShiftVS src shift)); 3921 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3922 ins_encode %{ 3923 bool vector256 = false; 3924 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3925 %} 3926 ins_pipe( pipe_slow ); 3927 %} 3928 3929 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 3930 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3931 match(Set dst (LShiftVS src shift)); 3932 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3933 ins_encode %{ 3934 bool vector256 = false; 3935 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3936 %} 3937 ins_pipe( pipe_slow ); 3938 %} 3939 3940 instruct vsll4S(vecD dst, vecS shift) %{ 3941 predicate(n->as_Vector()->length() == 4); 3942 match(Set dst (LShiftVS dst shift)); 3943 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3944 ins_encode %{ 3945 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 3951 predicate(n->as_Vector()->length() == 4); 3952 match(Set dst (LShiftVS dst shift)); 3953 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3954 ins_encode %{ 3955 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3956 %} 3957 ins_pipe( pipe_slow ); 3958 %} 3959 3960 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 3961 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3962 match(Set dst (LShiftVS src shift)); 3963 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3964 ins_encode %{ 3965 bool vector256 = false; 3966 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3967 %} 3968 ins_pipe( pipe_slow ); 3969 %} 3970 3971 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3973 match(Set dst (LShiftVS src shift)); 3974 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3975 ins_encode %{ 3976 bool vector256 = false; 3977 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 instruct vsll8S(vecX dst, vecS shift) %{ 3983 predicate(n->as_Vector()->length() == 8); 3984 match(Set dst (LShiftVS dst shift)); 3985 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3986 ins_encode %{ 3987 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3988 %} 3989 ins_pipe( pipe_slow ); 3990 %} 3991 3992 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 3993 predicate(n->as_Vector()->length() == 8); 3994 match(Set dst (LShiftVS dst shift)); 3995 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3996 ins_encode %{ 3997 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3998 %} 3999 ins_pipe( pipe_slow ); 4000 %} 4001 4002 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 4003 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4004 match(Set dst (LShiftVS src shift)); 4005 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4006 ins_encode %{ 4007 bool vector256 = false; 4008 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4009 %} 4010 ins_pipe( pipe_slow ); 4011 %} 4012 4013 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4014 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4015 match(Set dst (LShiftVS src shift)); 4016 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4017 ins_encode %{ 4018 bool vector256 = false; 4019 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4020 %} 4021 ins_pipe( pipe_slow ); 4022 %} 4023 4024 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 4025 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4026 match(Set dst (LShiftVS src shift)); 4027 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4028 ins_encode %{ 4029 bool vector256 = true; 4030 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4031 %} 4032 ins_pipe( pipe_slow ); 4033 %} 4034 4035 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4036 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4037 match(Set dst (LShiftVS src shift)); 4038 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4039 ins_encode %{ 4040 bool vector256 = true; 4041 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4042 %} 4043 ins_pipe( pipe_slow ); 4044 %} 4045 4046 // Integers vector left shift 4047 instruct vsll2I(vecD dst, vecS shift) %{ 4048 predicate(n->as_Vector()->length() == 2); 4049 match(Set dst (LShiftVI dst shift)); 4050 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4051 ins_encode %{ 4052 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4053 %} 4054 ins_pipe( pipe_slow ); 4055 %} 4056 4057 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 4058 predicate(n->as_Vector()->length() == 2); 4059 match(Set dst (LShiftVI dst shift)); 4060 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4061 ins_encode %{ 4062 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 4068 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4069 match(Set dst (LShiftVI src shift)); 4070 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4071 ins_encode %{ 4072 bool vector256 = false; 4073 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4074 %} 4075 ins_pipe( pipe_slow ); 4076 %} 4077 4078 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4079 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4080 match(Set dst (LShiftVI src shift)); 4081 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4082 ins_encode %{ 4083 bool vector256 = false; 4084 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4085 %} 4086 ins_pipe( pipe_slow ); 4087 %} 4088 4089 instruct vsll4I(vecX dst, vecS shift) %{ 4090 predicate(n->as_Vector()->length() == 4); 4091 match(Set dst (LShiftVI dst shift)); 4092 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4093 ins_encode %{ 4094 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4095 %} 4096 ins_pipe( pipe_slow ); 4097 %} 4098 4099 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 4100 predicate(n->as_Vector()->length() == 4); 4101 match(Set dst (LShiftVI dst shift)); 4102 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4103 ins_encode %{ 4104 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4105 %} 4106 ins_pipe( pipe_slow ); 4107 %} 4108 4109 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 4110 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4111 match(Set dst (LShiftVI src shift)); 4112 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4113 ins_encode %{ 4114 bool vector256 = false; 4115 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4116 %} 4117 ins_pipe( pipe_slow ); 4118 %} 4119 4120 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4122 match(Set dst (LShiftVI src shift)); 4123 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4124 ins_encode %{ 4125 bool vector256 = false; 4126 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4127 %} 4128 ins_pipe( pipe_slow ); 4129 %} 4130 4131 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 4132 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4133 match(Set dst (LShiftVI src shift)); 4134 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4135 ins_encode %{ 4136 bool vector256 = true; 4137 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4138 %} 4139 ins_pipe( pipe_slow ); 4140 %} 4141 4142 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4143 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4144 match(Set dst (LShiftVI src shift)); 4145 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4146 ins_encode %{ 4147 bool vector256 = true; 4148 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 // Longs vector left shift 4154 instruct vsll2L(vecX dst, vecS shift) %{ 4155 predicate(n->as_Vector()->length() == 2); 4156 match(Set dst (LShiftVL dst shift)); 4157 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4158 ins_encode %{ 4159 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 4164 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4165 predicate(n->as_Vector()->length() == 2); 4166 match(Set dst (LShiftVL dst shift)); 4167 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4168 ins_encode %{ 4169 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4170 %} 4171 ins_pipe( pipe_slow ); 4172 %} 4173 4174 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 4175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4176 match(Set dst (LShiftVL src shift)); 4177 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4178 ins_encode %{ 4179 bool vector256 = false; 4180 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4181 %} 4182 ins_pipe( pipe_slow ); 4183 %} 4184 4185 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4187 match(Set dst (LShiftVL src shift)); 4188 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4189 ins_encode %{ 4190 bool vector256 = false; 4191 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4192 %} 4193 ins_pipe( pipe_slow ); 4194 %} 4195 4196 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 4197 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4198 match(Set dst (LShiftVL src shift)); 4199 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4200 ins_encode %{ 4201 bool vector256 = true; 4202 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4203 %} 4204 ins_pipe( pipe_slow ); 4205 %} 4206 4207 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4208 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4209 match(Set dst (LShiftVL src shift)); 4210 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4211 ins_encode %{ 4212 bool vector256 = true; 4213 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 // ----------------------- LogicalRightShift ----------------------------------- 4219 4220 // Shorts vector logical right shift produces incorrect Java result 4221 // for negative data because java code convert short value into int with 4222 // sign extension before a shift. But char vectors are fine since chars are 4223 // unsigned values. 4224 4225 instruct vsrl2S(vecS dst, vecS shift) %{ 4226 predicate(n->as_Vector()->length() == 2); 4227 match(Set dst (URShiftVS dst shift)); 4228 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4229 ins_encode %{ 4230 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4231 %} 4232 ins_pipe( pipe_slow ); 4233 %} 4234 4235 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 4236 predicate(n->as_Vector()->length() == 2); 4237 match(Set dst (URShiftVS dst shift)); 4238 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4239 ins_encode %{ 4240 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4241 %} 4242 ins_pipe( pipe_slow ); 4243 %} 4244 4245 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 4246 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4247 match(Set dst (URShiftVS src shift)); 4248 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4249 ins_encode %{ 4250 bool vector256 = false; 4251 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4252 %} 4253 ins_pipe( pipe_slow ); 4254 %} 4255 4256 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4257 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4258 match(Set dst (URShiftVS src shift)); 4259 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4260 ins_encode %{ 4261 bool vector256 = false; 4262 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4263 %} 4264 ins_pipe( pipe_slow ); 4265 %} 4266 4267 instruct vsrl4S(vecD dst, vecS shift) %{ 4268 predicate(n->as_Vector()->length() == 4); 4269 match(Set dst (URShiftVS dst shift)); 4270 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4271 ins_encode %{ 4272 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4273 %} 4274 ins_pipe( pipe_slow ); 4275 %} 4276 4277 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 4278 predicate(n->as_Vector()->length() == 4); 4279 match(Set dst (URShiftVS dst shift)); 4280 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4281 ins_encode %{ 4282 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4283 %} 4284 ins_pipe( pipe_slow ); 4285 %} 4286 4287 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 4288 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4289 match(Set dst (URShiftVS src shift)); 4290 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4291 ins_encode %{ 4292 bool vector256 = false; 4293 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4294 %} 4295 ins_pipe( pipe_slow ); 4296 %} 4297 4298 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4299 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4300 match(Set dst (URShiftVS src shift)); 4301 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4302 ins_encode %{ 4303 bool vector256 = false; 4304 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4305 %} 4306 ins_pipe( pipe_slow ); 4307 %} 4308 4309 instruct vsrl8S(vecX dst, vecS shift) %{ 4310 predicate(n->as_Vector()->length() == 8); 4311 match(Set dst (URShiftVS dst shift)); 4312 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4313 ins_encode %{ 4314 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4315 %} 4316 ins_pipe( pipe_slow ); 4317 %} 4318 4319 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 4320 predicate(n->as_Vector()->length() == 8); 4321 match(Set dst (URShiftVS dst shift)); 4322 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4323 ins_encode %{ 4324 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4325 %} 4326 ins_pipe( pipe_slow ); 4327 %} 4328 4329 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 4330 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4331 match(Set dst (URShiftVS src shift)); 4332 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4333 ins_encode %{ 4334 bool vector256 = false; 4335 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4336 %} 4337 ins_pipe( pipe_slow ); 4338 %} 4339 4340 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4341 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4342 match(Set dst (URShiftVS src shift)); 4343 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4344 ins_encode %{ 4345 bool vector256 = false; 4346 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4347 %} 4348 ins_pipe( pipe_slow ); 4349 %} 4350 4351 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 4352 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4353 match(Set dst (URShiftVS src shift)); 4354 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4355 ins_encode %{ 4356 bool vector256 = true; 4357 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4358 %} 4359 ins_pipe( pipe_slow ); 4360 %} 4361 4362 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4363 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4364 match(Set dst (URShiftVS src shift)); 4365 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4366 ins_encode %{ 4367 bool vector256 = true; 4368 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4369 %} 4370 ins_pipe( pipe_slow ); 4371 %} 4372 4373 // Integers vector logical right shift 4374 instruct vsrl2I(vecD dst, vecS shift) %{ 4375 predicate(n->as_Vector()->length() == 2); 4376 match(Set dst (URShiftVI dst shift)); 4377 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4378 ins_encode %{ 4379 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4380 %} 4381 ins_pipe( pipe_slow ); 4382 %} 4383 4384 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4385 predicate(n->as_Vector()->length() == 2); 4386 match(Set dst (URShiftVI dst shift)); 4387 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4388 ins_encode %{ 4389 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4390 %} 4391 ins_pipe( pipe_slow ); 4392 %} 4393 4394 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 4395 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4396 match(Set dst (URShiftVI src shift)); 4397 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4398 ins_encode %{ 4399 bool vector256 = false; 4400 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4401 %} 4402 ins_pipe( pipe_slow ); 4403 %} 4404 4405 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4406 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4407 match(Set dst (URShiftVI src shift)); 4408 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4409 ins_encode %{ 4410 bool vector256 = false; 4411 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4412 %} 4413 ins_pipe( pipe_slow ); 4414 %} 4415 4416 instruct vsrl4I(vecX dst, vecS shift) %{ 4417 predicate(n->as_Vector()->length() == 4); 4418 match(Set dst (URShiftVI dst shift)); 4419 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4420 ins_encode %{ 4421 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4422 %} 4423 ins_pipe( pipe_slow ); 4424 %} 4425 4426 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 4427 predicate(n->as_Vector()->length() == 4); 4428 match(Set dst (URShiftVI dst shift)); 4429 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4430 ins_encode %{ 4431 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4432 %} 4433 ins_pipe( pipe_slow ); 4434 %} 4435 4436 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 4437 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4438 match(Set dst (URShiftVI src shift)); 4439 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4440 ins_encode %{ 4441 bool vector256 = false; 4442 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4443 %} 4444 ins_pipe( pipe_slow ); 4445 %} 4446 4447 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4448 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4449 match(Set dst (URShiftVI src shift)); 4450 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4451 ins_encode %{ 4452 bool vector256 = false; 4453 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4454 %} 4455 ins_pipe( pipe_slow ); 4456 %} 4457 4458 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 4459 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4460 match(Set dst (URShiftVI src shift)); 4461 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4462 ins_encode %{ 4463 bool vector256 = true; 4464 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4470 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4471 match(Set dst (URShiftVI src shift)); 4472 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4473 ins_encode %{ 4474 bool vector256 = true; 4475 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4476 %} 4477 ins_pipe( pipe_slow ); 4478 %} 4479 4480 // Longs vector logical right shift 4481 instruct vsrl2L(vecX dst, vecS shift) %{ 4482 predicate(n->as_Vector()->length() == 2); 4483 match(Set dst (URShiftVL dst shift)); 4484 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4485 ins_encode %{ 4486 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4487 %} 4488 ins_pipe( pipe_slow ); 4489 %} 4490 4491 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 4492 predicate(n->as_Vector()->length() == 2); 4493 match(Set dst (URShiftVL dst shift)); 4494 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4495 ins_encode %{ 4496 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 4497 %} 4498 ins_pipe( pipe_slow ); 4499 %} 4500 4501 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 4502 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4503 match(Set dst (URShiftVL src shift)); 4504 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4505 ins_encode %{ 4506 bool vector256 = false; 4507 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4508 %} 4509 ins_pipe( pipe_slow ); 4510 %} 4511 4512 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4513 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4514 match(Set dst (URShiftVL src shift)); 4515 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4516 ins_encode %{ 4517 bool vector256 = false; 4518 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4519 %} 4520 ins_pipe( pipe_slow ); 4521 %} 4522 4523 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 4524 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4525 match(Set dst (URShiftVL src shift)); 4526 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4527 ins_encode %{ 4528 bool vector256 = true; 4529 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4530 %} 4531 ins_pipe( pipe_slow ); 4532 %} 4533 4534 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4535 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4536 match(Set dst (URShiftVL src shift)); 4537 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4538 ins_encode %{ 4539 bool vector256 = true; 4540 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4541 %} 4542 ins_pipe( pipe_slow ); 4543 %} 4544 4545 // ------------------- ArithmeticRightShift ----------------------------------- 4546 4547 // Shorts/Chars vector arithmetic right shift 4548 instruct vsra2S(vecS dst, vecS shift) %{ 4549 predicate(n->as_Vector()->length() == 2); 4550 match(Set dst (RShiftVS dst shift)); 4551 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4552 ins_encode %{ 4553 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4554 %} 4555 ins_pipe( pipe_slow ); 4556 %} 4557 4558 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 4559 predicate(n->as_Vector()->length() == 2); 4560 match(Set dst (RShiftVS dst shift)); 4561 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4562 ins_encode %{ 4563 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4564 %} 4565 ins_pipe( pipe_slow ); 4566 %} 4567 4568 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 4569 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4570 match(Set dst (RShiftVS src shift)); 4571 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4572 ins_encode %{ 4573 bool vector256 = false; 4574 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4575 %} 4576 ins_pipe( pipe_slow ); 4577 %} 4578 4579 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4580 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4581 match(Set dst (RShiftVS src shift)); 4582 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4583 ins_encode %{ 4584 bool vector256 = false; 4585 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4586 %} 4587 ins_pipe( pipe_slow ); 4588 %} 4589 4590 instruct vsra4S(vecD dst, vecS shift) %{ 4591 predicate(n->as_Vector()->length() == 4); 4592 match(Set dst (RShiftVS dst shift)); 4593 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4594 ins_encode %{ 4595 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4596 %} 4597 ins_pipe( pipe_slow ); 4598 %} 4599 4600 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 4601 predicate(n->as_Vector()->length() == 4); 4602 match(Set dst (RShiftVS dst shift)); 4603 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4604 ins_encode %{ 4605 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4606 %} 4607 ins_pipe( pipe_slow ); 4608 %} 4609 4610 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 4611 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4612 match(Set dst (RShiftVS src shift)); 4613 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4614 ins_encode %{ 4615 bool vector256 = false; 4616 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4617 %} 4618 ins_pipe( pipe_slow ); 4619 %} 4620 4621 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4622 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4623 match(Set dst (RShiftVS src shift)); 4624 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4625 ins_encode %{ 4626 bool vector256 = false; 4627 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4628 %} 4629 ins_pipe( pipe_slow ); 4630 %} 4631 4632 instruct vsra8S(vecX dst, vecS shift) %{ 4633 predicate(n->as_Vector()->length() == 8); 4634 match(Set dst (RShiftVS dst shift)); 4635 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4636 ins_encode %{ 4637 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4638 %} 4639 ins_pipe( pipe_slow ); 4640 %} 4641 4642 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 4643 predicate(n->as_Vector()->length() == 8); 4644 match(Set dst (RShiftVS dst shift)); 4645 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4646 ins_encode %{ 4647 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4648 %} 4649 ins_pipe( pipe_slow ); 4650 %} 4651 4652 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 4653 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4654 match(Set dst (RShiftVS src shift)); 4655 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4656 ins_encode %{ 4657 bool vector256 = false; 4658 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4659 %} 4660 ins_pipe( pipe_slow ); 4661 %} 4662 4663 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4664 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4665 match(Set dst (RShiftVS src shift)); 4666 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4667 ins_encode %{ 4668 bool vector256 = false; 4669 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4670 %} 4671 ins_pipe( pipe_slow ); 4672 %} 4673 4674 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 4675 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4676 match(Set dst (RShiftVS src shift)); 4677 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4678 ins_encode %{ 4679 bool vector256 = true; 4680 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4681 %} 4682 ins_pipe( pipe_slow ); 4683 %} 4684 4685 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4686 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4687 match(Set dst (RShiftVS src shift)); 4688 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4689 ins_encode %{ 4690 bool vector256 = true; 4691 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4692 %} 4693 ins_pipe( pipe_slow ); 4694 %} 4695 4696 // Integers vector arithmetic right shift 4697 instruct vsra2I(vecD dst, vecS shift) %{ 4698 predicate(n->as_Vector()->length() == 2); 4699 match(Set dst (RShiftVI dst shift)); 4700 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4701 ins_encode %{ 4702 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4703 %} 4704 ins_pipe( pipe_slow ); 4705 %} 4706 4707 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 4708 predicate(n->as_Vector()->length() == 2); 4709 match(Set dst (RShiftVI dst shift)); 4710 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4711 ins_encode %{ 4712 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4713 %} 4714 ins_pipe( pipe_slow ); 4715 %} 4716 4717 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 4718 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4719 match(Set dst (RShiftVI src shift)); 4720 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4721 ins_encode %{ 4722 bool vector256 = false; 4723 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4724 %} 4725 ins_pipe( pipe_slow ); 4726 %} 4727 4728 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4729 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4730 match(Set dst (RShiftVI src shift)); 4731 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4732 ins_encode %{ 4733 bool vector256 = false; 4734 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4735 %} 4736 ins_pipe( pipe_slow ); 4737 %} 4738 4739 instruct vsra4I(vecX dst, vecS shift) %{ 4740 predicate(n->as_Vector()->length() == 4); 4741 match(Set dst (RShiftVI dst shift)); 4742 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4743 ins_encode %{ 4744 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4745 %} 4746 ins_pipe( pipe_slow ); 4747 %} 4748 4749 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 4750 predicate(n->as_Vector()->length() == 4); 4751 match(Set dst (RShiftVI dst shift)); 4752 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4753 ins_encode %{ 4754 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4755 %} 4756 ins_pipe( pipe_slow ); 4757 %} 4758 4759 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 4760 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4761 match(Set dst (RShiftVI src shift)); 4762 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4763 ins_encode %{ 4764 bool vector256 = false; 4765 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4766 %} 4767 ins_pipe( pipe_slow ); 4768 %} 4769 4770 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4771 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4772 match(Set dst (RShiftVI src shift)); 4773 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4774 ins_encode %{ 4775 bool vector256 = false; 4776 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4777 %} 4778 ins_pipe( pipe_slow ); 4779 %} 4780 4781 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 4782 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4783 match(Set dst (RShiftVI src shift)); 4784 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4785 ins_encode %{ 4786 bool vector256 = true; 4787 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4788 %} 4789 ins_pipe( pipe_slow ); 4790 %} 4791 4792 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4793 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4794 match(Set dst (RShiftVI src shift)); 4795 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4796 ins_encode %{ 4797 bool vector256 = true; 4798 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4799 %} 4800 ins_pipe( pipe_slow ); 4801 %} 4802 4803 // There are no longs vector arithmetic right shift instructions. 4804 4805 4806 // --------------------------------- AND -------------------------------------- 4807 4808 instruct vand4B(vecS dst, vecS src) %{ 4809 predicate(n->as_Vector()->length_in_bytes() == 4); 4810 match(Set dst (AndV dst src)); 4811 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 4812 ins_encode %{ 4813 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4814 %} 4815 ins_pipe( pipe_slow ); 4816 %} 4817 4818 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 4819 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4820 match(Set dst (AndV src1 src2)); 4821 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 4822 ins_encode %{ 4823 bool vector256 = false; 4824 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4825 %} 4826 ins_pipe( pipe_slow ); 4827 %} 4828 4829 instruct vand8B(vecD dst, vecD src) %{ 4830 predicate(n->as_Vector()->length_in_bytes() == 8); 4831 match(Set dst (AndV dst src)); 4832 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 4833 ins_encode %{ 4834 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4835 %} 4836 ins_pipe( pipe_slow ); 4837 %} 4838 4839 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 4840 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4841 match(Set dst (AndV src1 src2)); 4842 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 4843 ins_encode %{ 4844 bool vector256 = false; 4845 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4846 %} 4847 ins_pipe( pipe_slow ); 4848 %} 4849 4850 instruct vand16B(vecX dst, vecX src) %{ 4851 predicate(n->as_Vector()->length_in_bytes() == 16); 4852 match(Set dst (AndV dst src)); 4853 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 4854 ins_encode %{ 4855 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4856 %} 4857 ins_pipe( pipe_slow ); 4858 %} 4859 4860 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 4861 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4862 match(Set dst (AndV src1 src2)); 4863 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 4864 ins_encode %{ 4865 bool vector256 = false; 4866 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4867 %} 4868 ins_pipe( pipe_slow ); 4869 %} 4870 4871 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 4872 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4873 match(Set dst (AndV src (LoadVector mem))); 4874 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 4875 ins_encode %{ 4876 bool vector256 = false; 4877 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4878 %} 4879 ins_pipe( pipe_slow ); 4880 %} 4881 4882 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 4883 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4884 match(Set dst (AndV src1 src2)); 4885 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 4886 ins_encode %{ 4887 bool vector256 = true; 4888 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4889 %} 4890 ins_pipe( pipe_slow ); 4891 %} 4892 4893 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 4894 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4895 match(Set dst (AndV src (LoadVector mem))); 4896 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 4897 ins_encode %{ 4898 bool vector256 = true; 4899 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4900 %} 4901 ins_pipe( pipe_slow ); 4902 %} 4903 4904 // --------------------------------- OR --------------------------------------- 4905 4906 instruct vor4B(vecS dst, vecS src) %{ 4907 predicate(n->as_Vector()->length_in_bytes() == 4); 4908 match(Set dst (OrV dst src)); 4909 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 4910 ins_encode %{ 4911 __ por($dst$$XMMRegister, $src$$XMMRegister); 4912 %} 4913 ins_pipe( pipe_slow ); 4914 %} 4915 4916 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4917 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4918 match(Set dst (OrV src1 src2)); 4919 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 4920 ins_encode %{ 4921 bool vector256 = false; 4922 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4923 %} 4924 ins_pipe( pipe_slow ); 4925 %} 4926 4927 instruct vor8B(vecD dst, vecD src) %{ 4928 predicate(n->as_Vector()->length_in_bytes() == 8); 4929 match(Set dst (OrV dst src)); 4930 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 4931 ins_encode %{ 4932 __ por($dst$$XMMRegister, $src$$XMMRegister); 4933 %} 4934 ins_pipe( pipe_slow ); 4935 %} 4936 4937 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4938 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4939 match(Set dst (OrV src1 src2)); 4940 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 4941 ins_encode %{ 4942 bool vector256 = false; 4943 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4944 %} 4945 ins_pipe( pipe_slow ); 4946 %} 4947 4948 instruct vor16B(vecX dst, vecX src) %{ 4949 predicate(n->as_Vector()->length_in_bytes() == 16); 4950 match(Set dst (OrV dst src)); 4951 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 4952 ins_encode %{ 4953 __ por($dst$$XMMRegister, $src$$XMMRegister); 4954 %} 4955 ins_pipe( pipe_slow ); 4956 %} 4957 4958 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4959 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4960 match(Set dst (OrV src1 src2)); 4961 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 4962 ins_encode %{ 4963 bool vector256 = false; 4964 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4965 %} 4966 ins_pipe( pipe_slow ); 4967 %} 4968 4969 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 4970 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4971 match(Set dst (OrV src (LoadVector mem))); 4972 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 4973 ins_encode %{ 4974 bool vector256 = false; 4975 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4976 %} 4977 ins_pipe( pipe_slow ); 4978 %} 4979 4980 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4981 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4982 match(Set dst (OrV src1 src2)); 4983 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 4984 ins_encode %{ 4985 bool vector256 = true; 4986 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4987 %} 4988 ins_pipe( pipe_slow ); 4989 %} 4990 4991 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 4992 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4993 match(Set dst (OrV src (LoadVector mem))); 4994 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 4995 ins_encode %{ 4996 bool vector256 = true; 4997 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4998 %} 4999 ins_pipe( pipe_slow ); 5000 %} 5001 5002 // --------------------------------- XOR -------------------------------------- 5003 5004 instruct vxor4B(vecS dst, vecS src) %{ 5005 predicate(n->as_Vector()->length_in_bytes() == 4); 5006 match(Set dst (XorV dst src)); 5007 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 5008 ins_encode %{ 5009 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5010 %} 5011 ins_pipe( pipe_slow ); 5012 %} 5013 5014 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 5015 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5016 match(Set dst (XorV src1 src2)); 5017 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 5018 ins_encode %{ 5019 bool vector256 = false; 5020 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5021 %} 5022 ins_pipe( pipe_slow ); 5023 %} 5024 5025 instruct vxor8B(vecD dst, vecD src) %{ 5026 predicate(n->as_Vector()->length_in_bytes() == 8); 5027 match(Set dst (XorV dst src)); 5028 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 5029 ins_encode %{ 5030 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5031 %} 5032 ins_pipe( pipe_slow ); 5033 %} 5034 5035 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 5036 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5037 match(Set dst (XorV src1 src2)); 5038 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 5039 ins_encode %{ 5040 bool vector256 = false; 5041 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5042 %} 5043 ins_pipe( pipe_slow ); 5044 %} 5045 5046 instruct vxor16B(vecX dst, vecX src) %{ 5047 predicate(n->as_Vector()->length_in_bytes() == 16); 5048 match(Set dst (XorV dst src)); 5049 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 5050 ins_encode %{ 5051 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5052 %} 5053 ins_pipe( pipe_slow ); 5054 %} 5055 5056 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 5057 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5058 match(Set dst (XorV src1 src2)); 5059 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 5060 ins_encode %{ 5061 bool vector256 = false; 5062 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5063 %} 5064 ins_pipe( pipe_slow ); 5065 %} 5066 5067 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 5068 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5069 match(Set dst (XorV src (LoadVector mem))); 5070 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 5071 ins_encode %{ 5072 bool vector256 = false; 5073 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5074 %} 5075 ins_pipe( pipe_slow ); 5076 %} 5077 5078 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 5079 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5080 match(Set dst (XorV src1 src2)); 5081 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 5082 ins_encode %{ 5083 bool vector256 = true; 5084 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5085 %} 5086 ins_pipe( pipe_slow ); 5087 %} 5088 5089 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 5090 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5091 match(Set dst (XorV src (LoadVector mem))); 5092 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 5093 ins_encode %{ 5094 bool vector256 = true; 5095 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5096 %} 5097 ins_pipe( pipe_slow ); 5098 %} 5099