1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340 #endif 341 ); 342 343 // flags allocation class should be last. 344 alloc_class chunk2(RFLAGS); 345 346 // Singleton class for condition codes 347 reg_class int_flags(RFLAGS); 348 349 // Class for all float registers 350 reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358 #ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367 #endif 368 ); 369 370 // Class for all double registers 371 reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379 #ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388 #endif 389 ); 390 391 // Class for all 32bit vector registers 392 reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400 #ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409 #endif 410 ); 411 412 // Class for all 64bit vector registers 413 reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421 #ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430 #endif 431 ); 432 433 // Class for all 128bit vector registers 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442 #ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451 #endif 452 ); 453 454 // Class for all 256bit vector registers 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463 #ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472 #endif 473 ); 474 475 %} 476 477 478 //----------SOURCE BLOCK------------------------------------------------------- 479 // This is a block of C++ code which provides values, functions, and 480 // definitions necessary in the rest of the architecture description 481 482 source_hpp %{ 483 // Header information of the source block. 484 // Method declarations/definitions which are used outside 485 // the ad-scope can conveniently be defined here. 486 // 487 // To keep related declarations/definitions/uses close together, 488 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 489 490 class NativeJump; 491 492 class CallStubImpl { 493 494 //-------------------------------------------------------------- 495 //---< Used for optimization in Compile::shorten_branches >--- 496 //-------------------------------------------------------------- 497 498 public: 499 // Size of call trampoline stub. 500 static uint size_call_trampoline() { 501 return 0; // no call trampolines on this platform 502 } 503 504 // number of relocations needed by a call trampoline stub 505 static uint reloc_call_trampoline() { 506 return 0; // no call trampolines on this platform 507 } 508 }; 509 510 class HandlerImpl { 511 512 public: 513 514 static int emit_exception_handler(CodeBuffer &cbuf); 515 static int emit_deopt_handler(CodeBuffer& cbuf); 516 517 static uint size_exception_handler() { 518 // NativeCall instruction size is the same as NativeJump. 519 // exception handler starts out as jump and can be patched to 520 // a call be deoptimization. (4932387) 521 // Note that this value is also credited (in output.cpp) to 522 // the size of the code section. 523 return NativeJump::instruction_size; 524 } 525 526 #ifdef _LP64 527 static uint size_deopt_handler() { 528 // three 5 byte instructions 529 return 15; 530 } 531 #else 532 static uint size_deopt_handler() { 533 // NativeCall instruction size is the same as NativeJump. 534 // exception handler starts out as jump and can be patched to 535 // a call be deoptimization. (4932387) 536 // Note that this value is also credited (in output.cpp) to 537 // the size of the code section. 538 return 5 + NativeJump::instruction_size; // pushl(); jmp; 539 } 540 #endif 541 }; 542 543 %} // end source_hpp 544 545 source %{ 546 547 // Emit exception handler code. 548 // Stuff framesize into a register and call a VM stub routine. 549 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 550 551 // Note that the code buffer's insts_mark is always relative to insts. 552 // That's why we must use the macroassembler to generate a handler. 553 MacroAssembler _masm(&cbuf); 554 address base = __ start_a_stub(size_exception_handler()); 555 if (base == NULL) return 0; // CodeBuffer::expand failed 556 int offset = __ offset(); 557 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 558 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 559 __ end_a_stub(); 560 return offset; 561 } 562 563 // Emit deopt handler code. 564 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 565 566 // Note that the code buffer's insts_mark is always relative to insts. 567 // That's why we must use the macroassembler to generate a handler. 568 MacroAssembler _masm(&cbuf); 569 address base = __ start_a_stub(size_deopt_handler()); 570 if (base == NULL) return 0; // CodeBuffer::expand failed 571 int offset = __ offset(); 572 573 #ifdef _LP64 574 address the_pc = (address) __ pc(); 575 Label next; 576 // push a "the_pc" on the stack without destroying any registers 577 // as they all may be live. 578 579 // push address of "next" 580 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 581 __ bind(next); 582 // adjust it so it matches "the_pc" 583 __ subptr(Address(rsp, 0), __ offset() - offset); 584 #else 585 InternalAddress here(__ pc()); 586 __ pushptr(here.addr()); 587 #endif 588 589 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 590 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 591 __ end_a_stub(); 592 return offset; 593 } 594 595 596 //============================================================================= 597 598 // Float masks come from different places depending on platform. 599 #ifdef _LP64 600 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 601 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 602 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 603 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 604 #else 605 static address float_signmask() { return (address)float_signmask_pool; } 606 static address float_signflip() { return (address)float_signflip_pool; } 607 static address double_signmask() { return (address)double_signmask_pool; } 608 static address double_signflip() { return (address)double_signflip_pool; } 609 #endif 610 611 612 const bool Matcher::match_rule_supported(int opcode) { 613 if (!has_match_rule(opcode)) 614 return false; 615 616 switch (opcode) { 617 case Op_PopCountI: 618 case Op_PopCountL: 619 if (!UsePopCountInstruction) 620 return false; 621 break; 622 case Op_MulVI: 623 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 624 return false; 625 break; 626 case Op_AddReductionVL: 627 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 628 return false; 629 case Op_AddReductionVI: 630 if (UseSSE < 3) // requires at least SSE3 631 return false; 632 case Op_MulReductionVI: 633 if (UseSSE < 4) // requires at least SSE4 634 return false; 635 case Op_AddReductionVF: 636 case Op_AddReductionVD: 637 case Op_MulReductionVF: 638 case Op_MulReductionVD: 639 if (UseSSE < 1) // requires at least SSE 640 return false; 641 break; 642 case Op_CompareAndSwapL: 643 #ifdef _LP64 644 case Op_CompareAndSwapP: 645 #endif 646 if (!VM_Version::supports_cx8()) 647 return false; 648 break; 649 } 650 651 return true; // Per default match rules are supported. 652 } 653 654 // Max vector size in bytes. 0 if not supported. 655 const int Matcher::vector_width_in_bytes(BasicType bt) { 656 assert(is_java_primitive(bt), "only primitive type vectors"); 657 if (UseSSE < 2) return 0; 658 // SSE2 supports 128bit vectors for all types. 659 // AVX2 supports 256bit vectors for all types. 660 int size = (UseAVX > 1) ? 32 : 16; 661 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 662 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 663 size = 32; 664 // Use flag to limit vector size. 665 size = MIN2(size,(int)MaxVectorSize); 666 // Minimum 2 values in vector (or 4 for bytes). 667 switch (bt) { 668 case T_DOUBLE: 669 case T_LONG: 670 if (size < 16) return 0; 671 case T_FLOAT: 672 case T_INT: 673 if (size < 8) return 0; 674 case T_BOOLEAN: 675 case T_BYTE: 676 case T_CHAR: 677 case T_SHORT: 678 if (size < 4) return 0; 679 break; 680 default: 681 ShouldNotReachHere(); 682 } 683 return size; 684 } 685 686 // Limits on vector size (number of elements) loaded into vector. 687 const int Matcher::max_vector_size(const BasicType bt) { 688 return vector_width_in_bytes(bt)/type2aelembytes(bt); 689 } 690 const int Matcher::min_vector_size(const BasicType bt) { 691 int max_size = max_vector_size(bt); 692 // Min size which can be loaded into vector is 4 bytes. 693 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 694 return MIN2(size,max_size); 695 } 696 697 // Vector ideal reg corresponding to specidied size in bytes 698 const int Matcher::vector_ideal_reg(int size) { 699 assert(MaxVectorSize >= size, ""); 700 switch(size) { 701 case 4: return Op_VecS; 702 case 8: return Op_VecD; 703 case 16: return Op_VecX; 704 case 32: return Op_VecY; 705 } 706 ShouldNotReachHere(); 707 return 0; 708 } 709 710 // Only lowest bits of xmm reg are used for vector shift count. 711 const int Matcher::vector_shift_count_ideal_reg(int size) { 712 return Op_VecS; 713 } 714 715 // x86 supports misaligned vectors store/load. 716 const bool Matcher::misaligned_vectors_ok() { 717 return !AlignVector; // can be changed by flag 718 } 719 720 // x86 AES instructions are compatible with SunJCE expanded 721 // keys, hence we do not need to pass the original key to stubs 722 const bool Matcher::pass_original_key_for_aes() { 723 return false; 724 } 725 726 // Helper methods for MachSpillCopyNode::implementation(). 727 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 728 int src_hi, int dst_hi, uint ireg, outputStream* st) { 729 // In 64-bit VM size calculation is very complex. Emitting instructions 730 // into scratch buffer is used to get size in 64-bit VM. 731 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 732 assert(ireg == Op_VecS || // 32bit vector 733 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 734 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 735 "no non-adjacent vector moves" ); 736 if (cbuf) { 737 MacroAssembler _masm(cbuf); 738 int offset = __ offset(); 739 switch (ireg) { 740 case Op_VecS: // copy whole register 741 case Op_VecD: 742 case Op_VecX: 743 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 744 break; 745 case Op_VecY: 746 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 747 break; 748 default: 749 ShouldNotReachHere(); 750 } 751 int size = __ offset() - offset; 752 #ifdef ASSERT 753 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 754 assert(!do_size || size == 4, "incorrect size calculattion"); 755 #endif 756 return size; 757 #ifndef PRODUCT 758 } else if (!do_size) { 759 switch (ireg) { 760 case Op_VecS: 761 case Op_VecD: 762 case Op_VecX: 763 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 764 break; 765 case Op_VecY: 766 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 767 break; 768 default: 769 ShouldNotReachHere(); 770 } 771 #endif 772 } 773 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 774 return 4; 775 } 776 777 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 778 int stack_offset, int reg, uint ireg, outputStream* st) { 779 // In 64-bit VM size calculation is very complex. Emitting instructions 780 // into scratch buffer is used to get size in 64-bit VM. 781 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 782 if (cbuf) { 783 MacroAssembler _masm(cbuf); 784 int offset = __ offset(); 785 if (is_load) { 786 switch (ireg) { 787 case Op_VecS: 788 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 789 break; 790 case Op_VecD: 791 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 792 break; 793 case Op_VecX: 794 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 795 break; 796 case Op_VecY: 797 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 798 break; 799 default: 800 ShouldNotReachHere(); 801 } 802 } else { // store 803 switch (ireg) { 804 case Op_VecS: 805 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 806 break; 807 case Op_VecD: 808 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 809 break; 810 case Op_VecX: 811 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 812 break; 813 case Op_VecY: 814 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 815 break; 816 default: 817 ShouldNotReachHere(); 818 } 819 } 820 int size = __ offset() - offset; 821 #ifdef ASSERT 822 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 823 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 824 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 825 #endif 826 return size; 827 #ifndef PRODUCT 828 } else if (!do_size) { 829 if (is_load) { 830 switch (ireg) { 831 case Op_VecS: 832 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 833 break; 834 case Op_VecD: 835 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 836 break; 837 case Op_VecX: 838 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 839 break; 840 case Op_VecY: 841 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 842 break; 843 default: 844 ShouldNotReachHere(); 845 } 846 } else { // store 847 switch (ireg) { 848 case Op_VecS: 849 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 850 break; 851 case Op_VecD: 852 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 853 break; 854 case Op_VecX: 855 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 856 break; 857 case Op_VecY: 858 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 859 break; 860 default: 861 ShouldNotReachHere(); 862 } 863 } 864 #endif 865 } 866 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 867 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 868 return 5+offset_size; 869 } 870 871 static inline jfloat replicate4_imm(int con, int width) { 872 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 873 assert(width == 1 || width == 2, "only byte or short types here"); 874 int bit_width = width * 8; 875 jint val = con; 876 val &= (1 << bit_width) - 1; // mask off sign bits 877 while(bit_width < 32) { 878 val |= (val << bit_width); 879 bit_width <<= 1; 880 } 881 jfloat fval = *((jfloat*) &val); // coerce to float type 882 return fval; 883 } 884 885 static inline jdouble replicate8_imm(int con, int width) { 886 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 887 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 888 int bit_width = width * 8; 889 jlong val = con; 890 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 891 while(bit_width < 64) { 892 val |= (val << bit_width); 893 bit_width <<= 1; 894 } 895 jdouble dval = *((jdouble*) &val); // coerce to double type 896 return dval; 897 } 898 899 #ifndef PRODUCT 900 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 901 st->print("nop \t# %d bytes pad for loops and calls", _count); 902 } 903 #endif 904 905 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 906 MacroAssembler _masm(&cbuf); 907 __ nop(_count); 908 } 909 910 uint MachNopNode::size(PhaseRegAlloc*) const { 911 return _count; 912 } 913 914 #ifndef PRODUCT 915 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 916 st->print("# breakpoint"); 917 } 918 #endif 919 920 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 921 MacroAssembler _masm(&cbuf); 922 __ int3(); 923 } 924 925 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 926 return MachNode::size(ra_); 927 } 928 929 %} 930 931 encode %{ 932 933 enc_class call_epilog %{ 934 if (VerifyStackAtCalls) { 935 // Check that stack depth is unchanged: find majik cookie on stack 936 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 937 MacroAssembler _masm(&cbuf); 938 Label L; 939 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 940 __ jccb(Assembler::equal, L); 941 // Die if stack mismatch 942 __ int3(); 943 __ bind(L); 944 } 945 %} 946 947 %} 948 949 950 //----------OPERANDS----------------------------------------------------------- 951 // Operand definitions must precede instruction definitions for correct parsing 952 // in the ADLC because operands constitute user defined types which are used in 953 // instruction definitions. 954 955 // Vectors 956 operand vecS() %{ 957 constraint(ALLOC_IN_RC(vectors_reg)); 958 match(VecS); 959 960 format %{ %} 961 interface(REG_INTER); 962 %} 963 964 operand vecD() %{ 965 constraint(ALLOC_IN_RC(vectord_reg)); 966 match(VecD); 967 968 format %{ %} 969 interface(REG_INTER); 970 %} 971 972 operand vecX() %{ 973 constraint(ALLOC_IN_RC(vectorx_reg)); 974 match(VecX); 975 976 format %{ %} 977 interface(REG_INTER); 978 %} 979 980 operand vecY() %{ 981 constraint(ALLOC_IN_RC(vectory_reg)); 982 match(VecY); 983 984 format %{ %} 985 interface(REG_INTER); 986 %} 987 988 989 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 990 991 // ============================================================================ 992 993 instruct ShouldNotReachHere() %{ 994 match(Halt); 995 format %{ "int3\t# ShouldNotReachHere" %} 996 ins_encode %{ 997 __ int3(); 998 %} 999 ins_pipe(pipe_slow); 1000 %} 1001 1002 // ============================================================================ 1003 1004 instruct addF_reg(regF dst, regF src) %{ 1005 predicate((UseSSE>=1) && (UseAVX == 0)); 1006 match(Set dst (AddF dst src)); 1007 1008 format %{ "addss $dst, $src" %} 1009 ins_cost(150); 1010 ins_encode %{ 1011 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1012 %} 1013 ins_pipe(pipe_slow); 1014 %} 1015 1016 instruct addF_mem(regF dst, memory src) %{ 1017 predicate((UseSSE>=1) && (UseAVX == 0)); 1018 match(Set dst (AddF dst (LoadF src))); 1019 1020 format %{ "addss $dst, $src" %} 1021 ins_cost(150); 1022 ins_encode %{ 1023 __ addss($dst$$XMMRegister, $src$$Address); 1024 %} 1025 ins_pipe(pipe_slow); 1026 %} 1027 1028 instruct addF_imm(regF dst, immF con) %{ 1029 predicate((UseSSE>=1) && (UseAVX == 0)); 1030 match(Set dst (AddF dst con)); 1031 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1032 ins_cost(150); 1033 ins_encode %{ 1034 __ addss($dst$$XMMRegister, $constantaddress($con)); 1035 %} 1036 ins_pipe(pipe_slow); 1037 %} 1038 1039 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1040 predicate(UseAVX > 0); 1041 match(Set dst (AddF src1 src2)); 1042 1043 format %{ "vaddss $dst, $src1, $src2" %} 1044 ins_cost(150); 1045 ins_encode %{ 1046 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1047 %} 1048 ins_pipe(pipe_slow); 1049 %} 1050 1051 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1052 predicate(UseAVX > 0); 1053 match(Set dst (AddF src1 (LoadF src2))); 1054 1055 format %{ "vaddss $dst, $src1, $src2" %} 1056 ins_cost(150); 1057 ins_encode %{ 1058 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1059 %} 1060 ins_pipe(pipe_slow); 1061 %} 1062 1063 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1064 predicate(UseAVX > 0); 1065 match(Set dst (AddF src con)); 1066 1067 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1068 ins_cost(150); 1069 ins_encode %{ 1070 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1071 %} 1072 ins_pipe(pipe_slow); 1073 %} 1074 1075 instruct addD_reg(regD dst, regD src) %{ 1076 predicate((UseSSE>=2) && (UseAVX == 0)); 1077 match(Set dst (AddD dst src)); 1078 1079 format %{ "addsd $dst, $src" %} 1080 ins_cost(150); 1081 ins_encode %{ 1082 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1083 %} 1084 ins_pipe(pipe_slow); 1085 %} 1086 1087 instruct addD_mem(regD dst, memory src) %{ 1088 predicate((UseSSE>=2) && (UseAVX == 0)); 1089 match(Set dst (AddD dst (LoadD src))); 1090 1091 format %{ "addsd $dst, $src" %} 1092 ins_cost(150); 1093 ins_encode %{ 1094 __ addsd($dst$$XMMRegister, $src$$Address); 1095 %} 1096 ins_pipe(pipe_slow); 1097 %} 1098 1099 instruct addD_imm(regD dst, immD con) %{ 1100 predicate((UseSSE>=2) && (UseAVX == 0)); 1101 match(Set dst (AddD dst con)); 1102 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1103 ins_cost(150); 1104 ins_encode %{ 1105 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1106 %} 1107 ins_pipe(pipe_slow); 1108 %} 1109 1110 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1111 predicate(UseAVX > 0); 1112 match(Set dst (AddD src1 src2)); 1113 1114 format %{ "vaddsd $dst, $src1, $src2" %} 1115 ins_cost(150); 1116 ins_encode %{ 1117 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1118 %} 1119 ins_pipe(pipe_slow); 1120 %} 1121 1122 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1123 predicate(UseAVX > 0); 1124 match(Set dst (AddD src1 (LoadD src2))); 1125 1126 format %{ "vaddsd $dst, $src1, $src2" %} 1127 ins_cost(150); 1128 ins_encode %{ 1129 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1130 %} 1131 ins_pipe(pipe_slow); 1132 %} 1133 1134 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1135 predicate(UseAVX > 0); 1136 match(Set dst (AddD src con)); 1137 1138 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1139 ins_cost(150); 1140 ins_encode %{ 1141 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1142 %} 1143 ins_pipe(pipe_slow); 1144 %} 1145 1146 instruct subF_reg(regF dst, regF src) %{ 1147 predicate((UseSSE>=1) && (UseAVX == 0)); 1148 match(Set dst (SubF dst src)); 1149 1150 format %{ "subss $dst, $src" %} 1151 ins_cost(150); 1152 ins_encode %{ 1153 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1154 %} 1155 ins_pipe(pipe_slow); 1156 %} 1157 1158 instruct subF_mem(regF dst, memory src) %{ 1159 predicate((UseSSE>=1) && (UseAVX == 0)); 1160 match(Set dst (SubF dst (LoadF src))); 1161 1162 format %{ "subss $dst, $src" %} 1163 ins_cost(150); 1164 ins_encode %{ 1165 __ subss($dst$$XMMRegister, $src$$Address); 1166 %} 1167 ins_pipe(pipe_slow); 1168 %} 1169 1170 instruct subF_imm(regF dst, immF con) %{ 1171 predicate((UseSSE>=1) && (UseAVX == 0)); 1172 match(Set dst (SubF dst con)); 1173 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1174 ins_cost(150); 1175 ins_encode %{ 1176 __ subss($dst$$XMMRegister, $constantaddress($con)); 1177 %} 1178 ins_pipe(pipe_slow); 1179 %} 1180 1181 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1182 predicate(UseAVX > 0); 1183 match(Set dst (SubF src1 src2)); 1184 1185 format %{ "vsubss $dst, $src1, $src2" %} 1186 ins_cost(150); 1187 ins_encode %{ 1188 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1189 %} 1190 ins_pipe(pipe_slow); 1191 %} 1192 1193 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1194 predicate(UseAVX > 0); 1195 match(Set dst (SubF src1 (LoadF src2))); 1196 1197 format %{ "vsubss $dst, $src1, $src2" %} 1198 ins_cost(150); 1199 ins_encode %{ 1200 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1201 %} 1202 ins_pipe(pipe_slow); 1203 %} 1204 1205 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1206 predicate(UseAVX > 0); 1207 match(Set dst (SubF src con)); 1208 1209 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1210 ins_cost(150); 1211 ins_encode %{ 1212 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1213 %} 1214 ins_pipe(pipe_slow); 1215 %} 1216 1217 instruct subD_reg(regD dst, regD src) %{ 1218 predicate((UseSSE>=2) && (UseAVX == 0)); 1219 match(Set dst (SubD dst src)); 1220 1221 format %{ "subsd $dst, $src" %} 1222 ins_cost(150); 1223 ins_encode %{ 1224 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1225 %} 1226 ins_pipe(pipe_slow); 1227 %} 1228 1229 instruct subD_mem(regD dst, memory src) %{ 1230 predicate((UseSSE>=2) && (UseAVX == 0)); 1231 match(Set dst (SubD dst (LoadD src))); 1232 1233 format %{ "subsd $dst, $src" %} 1234 ins_cost(150); 1235 ins_encode %{ 1236 __ subsd($dst$$XMMRegister, $src$$Address); 1237 %} 1238 ins_pipe(pipe_slow); 1239 %} 1240 1241 instruct subD_imm(regD dst, immD con) %{ 1242 predicate((UseSSE>=2) && (UseAVX == 0)); 1243 match(Set dst (SubD dst con)); 1244 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1245 ins_cost(150); 1246 ins_encode %{ 1247 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1248 %} 1249 ins_pipe(pipe_slow); 1250 %} 1251 1252 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1253 predicate(UseAVX > 0); 1254 match(Set dst (SubD src1 src2)); 1255 1256 format %{ "vsubsd $dst, $src1, $src2" %} 1257 ins_cost(150); 1258 ins_encode %{ 1259 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1260 %} 1261 ins_pipe(pipe_slow); 1262 %} 1263 1264 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1265 predicate(UseAVX > 0); 1266 match(Set dst (SubD src1 (LoadD src2))); 1267 1268 format %{ "vsubsd $dst, $src1, $src2" %} 1269 ins_cost(150); 1270 ins_encode %{ 1271 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1272 %} 1273 ins_pipe(pipe_slow); 1274 %} 1275 1276 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1277 predicate(UseAVX > 0); 1278 match(Set dst (SubD src con)); 1279 1280 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1281 ins_cost(150); 1282 ins_encode %{ 1283 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1284 %} 1285 ins_pipe(pipe_slow); 1286 %} 1287 1288 instruct mulF_reg(regF dst, regF src) %{ 1289 predicate((UseSSE>=1) && (UseAVX == 0)); 1290 match(Set dst (MulF dst src)); 1291 1292 format %{ "mulss $dst, $src" %} 1293 ins_cost(150); 1294 ins_encode %{ 1295 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1296 %} 1297 ins_pipe(pipe_slow); 1298 %} 1299 1300 instruct mulF_mem(regF dst, memory src) %{ 1301 predicate((UseSSE>=1) && (UseAVX == 0)); 1302 match(Set dst (MulF dst (LoadF src))); 1303 1304 format %{ "mulss $dst, $src" %} 1305 ins_cost(150); 1306 ins_encode %{ 1307 __ mulss($dst$$XMMRegister, $src$$Address); 1308 %} 1309 ins_pipe(pipe_slow); 1310 %} 1311 1312 instruct mulF_imm(regF dst, immF con) %{ 1313 predicate((UseSSE>=1) && (UseAVX == 0)); 1314 match(Set dst (MulF dst con)); 1315 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1316 ins_cost(150); 1317 ins_encode %{ 1318 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1319 %} 1320 ins_pipe(pipe_slow); 1321 %} 1322 1323 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1324 predicate(UseAVX > 0); 1325 match(Set dst (MulF src1 src2)); 1326 1327 format %{ "vmulss $dst, $src1, $src2" %} 1328 ins_cost(150); 1329 ins_encode %{ 1330 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1331 %} 1332 ins_pipe(pipe_slow); 1333 %} 1334 1335 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1336 predicate(UseAVX > 0); 1337 match(Set dst (MulF src1 (LoadF src2))); 1338 1339 format %{ "vmulss $dst, $src1, $src2" %} 1340 ins_cost(150); 1341 ins_encode %{ 1342 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1343 %} 1344 ins_pipe(pipe_slow); 1345 %} 1346 1347 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1348 predicate(UseAVX > 0); 1349 match(Set dst (MulF src con)); 1350 1351 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1352 ins_cost(150); 1353 ins_encode %{ 1354 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1355 %} 1356 ins_pipe(pipe_slow); 1357 %} 1358 1359 instruct mulD_reg(regD dst, regD src) %{ 1360 predicate((UseSSE>=2) && (UseAVX == 0)); 1361 match(Set dst (MulD dst src)); 1362 1363 format %{ "mulsd $dst, $src" %} 1364 ins_cost(150); 1365 ins_encode %{ 1366 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1367 %} 1368 ins_pipe(pipe_slow); 1369 %} 1370 1371 instruct mulD_mem(regD dst, memory src) %{ 1372 predicate((UseSSE>=2) && (UseAVX == 0)); 1373 match(Set dst (MulD dst (LoadD src))); 1374 1375 format %{ "mulsd $dst, $src" %} 1376 ins_cost(150); 1377 ins_encode %{ 1378 __ mulsd($dst$$XMMRegister, $src$$Address); 1379 %} 1380 ins_pipe(pipe_slow); 1381 %} 1382 1383 instruct mulD_imm(regD dst, immD con) %{ 1384 predicate((UseSSE>=2) && (UseAVX == 0)); 1385 match(Set dst (MulD dst con)); 1386 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1387 ins_cost(150); 1388 ins_encode %{ 1389 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1390 %} 1391 ins_pipe(pipe_slow); 1392 %} 1393 1394 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1395 predicate(UseAVX > 0); 1396 match(Set dst (MulD src1 src2)); 1397 1398 format %{ "vmulsd $dst, $src1, $src2" %} 1399 ins_cost(150); 1400 ins_encode %{ 1401 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1402 %} 1403 ins_pipe(pipe_slow); 1404 %} 1405 1406 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1407 predicate(UseAVX > 0); 1408 match(Set dst (MulD src1 (LoadD src2))); 1409 1410 format %{ "vmulsd $dst, $src1, $src2" %} 1411 ins_cost(150); 1412 ins_encode %{ 1413 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1414 %} 1415 ins_pipe(pipe_slow); 1416 %} 1417 1418 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1419 predicate(UseAVX > 0); 1420 match(Set dst (MulD src con)); 1421 1422 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1423 ins_cost(150); 1424 ins_encode %{ 1425 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1426 %} 1427 ins_pipe(pipe_slow); 1428 %} 1429 1430 instruct divF_reg(regF dst, regF src) %{ 1431 predicate((UseSSE>=1) && (UseAVX == 0)); 1432 match(Set dst (DivF dst src)); 1433 1434 format %{ "divss $dst, $src" %} 1435 ins_cost(150); 1436 ins_encode %{ 1437 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1438 %} 1439 ins_pipe(pipe_slow); 1440 %} 1441 1442 instruct divF_mem(regF dst, memory src) %{ 1443 predicate((UseSSE>=1) && (UseAVX == 0)); 1444 match(Set dst (DivF dst (LoadF src))); 1445 1446 format %{ "divss $dst, $src" %} 1447 ins_cost(150); 1448 ins_encode %{ 1449 __ divss($dst$$XMMRegister, $src$$Address); 1450 %} 1451 ins_pipe(pipe_slow); 1452 %} 1453 1454 instruct divF_imm(regF dst, immF con) %{ 1455 predicate((UseSSE>=1) && (UseAVX == 0)); 1456 match(Set dst (DivF dst con)); 1457 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1458 ins_cost(150); 1459 ins_encode %{ 1460 __ divss($dst$$XMMRegister, $constantaddress($con)); 1461 %} 1462 ins_pipe(pipe_slow); 1463 %} 1464 1465 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1466 predicate(UseAVX > 0); 1467 match(Set dst (DivF src1 src2)); 1468 1469 format %{ "vdivss $dst, $src1, $src2" %} 1470 ins_cost(150); 1471 ins_encode %{ 1472 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1473 %} 1474 ins_pipe(pipe_slow); 1475 %} 1476 1477 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1478 predicate(UseAVX > 0); 1479 match(Set dst (DivF src1 (LoadF src2))); 1480 1481 format %{ "vdivss $dst, $src1, $src2" %} 1482 ins_cost(150); 1483 ins_encode %{ 1484 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1485 %} 1486 ins_pipe(pipe_slow); 1487 %} 1488 1489 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1490 predicate(UseAVX > 0); 1491 match(Set dst (DivF src con)); 1492 1493 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1494 ins_cost(150); 1495 ins_encode %{ 1496 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1497 %} 1498 ins_pipe(pipe_slow); 1499 %} 1500 1501 instruct divD_reg(regD dst, regD src) %{ 1502 predicate((UseSSE>=2) && (UseAVX == 0)); 1503 match(Set dst (DivD dst src)); 1504 1505 format %{ "divsd $dst, $src" %} 1506 ins_cost(150); 1507 ins_encode %{ 1508 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1509 %} 1510 ins_pipe(pipe_slow); 1511 %} 1512 1513 instruct divD_mem(regD dst, memory src) %{ 1514 predicate((UseSSE>=2) && (UseAVX == 0)); 1515 match(Set dst (DivD dst (LoadD src))); 1516 1517 format %{ "divsd $dst, $src" %} 1518 ins_cost(150); 1519 ins_encode %{ 1520 __ divsd($dst$$XMMRegister, $src$$Address); 1521 %} 1522 ins_pipe(pipe_slow); 1523 %} 1524 1525 instruct divD_imm(regD dst, immD con) %{ 1526 predicate((UseSSE>=2) && (UseAVX == 0)); 1527 match(Set dst (DivD dst con)); 1528 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1529 ins_cost(150); 1530 ins_encode %{ 1531 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1532 %} 1533 ins_pipe(pipe_slow); 1534 %} 1535 1536 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1537 predicate(UseAVX > 0); 1538 match(Set dst (DivD src1 src2)); 1539 1540 format %{ "vdivsd $dst, $src1, $src2" %} 1541 ins_cost(150); 1542 ins_encode %{ 1543 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1544 %} 1545 ins_pipe(pipe_slow); 1546 %} 1547 1548 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1549 predicate(UseAVX > 0); 1550 match(Set dst (DivD src1 (LoadD src2))); 1551 1552 format %{ "vdivsd $dst, $src1, $src2" %} 1553 ins_cost(150); 1554 ins_encode %{ 1555 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1556 %} 1557 ins_pipe(pipe_slow); 1558 %} 1559 1560 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1561 predicate(UseAVX > 0); 1562 match(Set dst (DivD src con)); 1563 1564 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1565 ins_cost(150); 1566 ins_encode %{ 1567 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1568 %} 1569 ins_pipe(pipe_slow); 1570 %} 1571 1572 instruct absF_reg(regF dst) %{ 1573 predicate((UseSSE>=1) && (UseAVX == 0)); 1574 match(Set dst (AbsF dst)); 1575 ins_cost(150); 1576 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1577 ins_encode %{ 1578 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1579 %} 1580 ins_pipe(pipe_slow); 1581 %} 1582 1583 instruct absF_reg_reg(regF dst, regF src) %{ 1584 predicate(UseAVX > 0); 1585 match(Set dst (AbsF src)); 1586 ins_cost(150); 1587 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1588 ins_encode %{ 1589 bool vector256 = false; 1590 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1591 ExternalAddress(float_signmask()), vector256); 1592 %} 1593 ins_pipe(pipe_slow); 1594 %} 1595 1596 instruct absD_reg(regD dst) %{ 1597 predicate((UseSSE>=2) && (UseAVX == 0)); 1598 match(Set dst (AbsD dst)); 1599 ins_cost(150); 1600 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1601 "# abs double by sign masking" %} 1602 ins_encode %{ 1603 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1604 %} 1605 ins_pipe(pipe_slow); 1606 %} 1607 1608 instruct absD_reg_reg(regD dst, regD src) %{ 1609 predicate(UseAVX > 0); 1610 match(Set dst (AbsD src)); 1611 ins_cost(150); 1612 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1613 "# abs double by sign masking" %} 1614 ins_encode %{ 1615 bool vector256 = false; 1616 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1617 ExternalAddress(double_signmask()), vector256); 1618 %} 1619 ins_pipe(pipe_slow); 1620 %} 1621 1622 instruct negF_reg(regF dst) %{ 1623 predicate((UseSSE>=1) && (UseAVX == 0)); 1624 match(Set dst (NegF dst)); 1625 ins_cost(150); 1626 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1627 ins_encode %{ 1628 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1629 %} 1630 ins_pipe(pipe_slow); 1631 %} 1632 1633 instruct negF_reg_reg(regF dst, regF src) %{ 1634 predicate(UseAVX > 0); 1635 match(Set dst (NegF src)); 1636 ins_cost(150); 1637 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1638 ins_encode %{ 1639 bool vector256 = false; 1640 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1641 ExternalAddress(float_signflip()), vector256); 1642 %} 1643 ins_pipe(pipe_slow); 1644 %} 1645 1646 instruct negD_reg(regD dst) %{ 1647 predicate((UseSSE>=2) && (UseAVX == 0)); 1648 match(Set dst (NegD dst)); 1649 ins_cost(150); 1650 format %{ "xorpd $dst, [0x8000000000000000]\t" 1651 "# neg double by sign flipping" %} 1652 ins_encode %{ 1653 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1654 %} 1655 ins_pipe(pipe_slow); 1656 %} 1657 1658 instruct negD_reg_reg(regD dst, regD src) %{ 1659 predicate(UseAVX > 0); 1660 match(Set dst (NegD src)); 1661 ins_cost(150); 1662 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1663 "# neg double by sign flipping" %} 1664 ins_encode %{ 1665 bool vector256 = false; 1666 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1667 ExternalAddress(double_signflip()), vector256); 1668 %} 1669 ins_pipe(pipe_slow); 1670 %} 1671 1672 instruct sqrtF_reg(regF dst, regF src) %{ 1673 predicate(UseSSE>=1); 1674 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1675 1676 format %{ "sqrtss $dst, $src" %} 1677 ins_cost(150); 1678 ins_encode %{ 1679 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1680 %} 1681 ins_pipe(pipe_slow); 1682 %} 1683 1684 instruct sqrtF_mem(regF dst, memory src) %{ 1685 predicate(UseSSE>=1); 1686 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1687 1688 format %{ "sqrtss $dst, $src" %} 1689 ins_cost(150); 1690 ins_encode %{ 1691 __ sqrtss($dst$$XMMRegister, $src$$Address); 1692 %} 1693 ins_pipe(pipe_slow); 1694 %} 1695 1696 instruct sqrtF_imm(regF dst, immF con) %{ 1697 predicate(UseSSE>=1); 1698 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1699 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1700 ins_cost(150); 1701 ins_encode %{ 1702 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1703 %} 1704 ins_pipe(pipe_slow); 1705 %} 1706 1707 instruct sqrtD_reg(regD dst, regD src) %{ 1708 predicate(UseSSE>=2); 1709 match(Set dst (SqrtD src)); 1710 1711 format %{ "sqrtsd $dst, $src" %} 1712 ins_cost(150); 1713 ins_encode %{ 1714 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1715 %} 1716 ins_pipe(pipe_slow); 1717 %} 1718 1719 instruct sqrtD_mem(regD dst, memory src) %{ 1720 predicate(UseSSE>=2); 1721 match(Set dst (SqrtD (LoadD src))); 1722 1723 format %{ "sqrtsd $dst, $src" %} 1724 ins_cost(150); 1725 ins_encode %{ 1726 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1727 %} 1728 ins_pipe(pipe_slow); 1729 %} 1730 1731 instruct sqrtD_imm(regD dst, immD con) %{ 1732 predicate(UseSSE>=2); 1733 match(Set dst (SqrtD con)); 1734 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1735 ins_cost(150); 1736 ins_encode %{ 1737 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1738 %} 1739 ins_pipe(pipe_slow); 1740 %} 1741 1742 1743 // ====================VECTOR INSTRUCTIONS===================================== 1744 1745 // Load vectors (4 bytes long) 1746 instruct loadV4(vecS dst, memory mem) %{ 1747 predicate(n->as_LoadVector()->memory_size() == 4); 1748 match(Set dst (LoadVector mem)); 1749 ins_cost(125); 1750 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1751 ins_encode %{ 1752 __ movdl($dst$$XMMRegister, $mem$$Address); 1753 %} 1754 ins_pipe( pipe_slow ); 1755 %} 1756 1757 // Load vectors (8 bytes long) 1758 instruct loadV8(vecD dst, memory mem) %{ 1759 predicate(n->as_LoadVector()->memory_size() == 8); 1760 match(Set dst (LoadVector mem)); 1761 ins_cost(125); 1762 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1763 ins_encode %{ 1764 __ movq($dst$$XMMRegister, $mem$$Address); 1765 %} 1766 ins_pipe( pipe_slow ); 1767 %} 1768 1769 // Load vectors (16 bytes long) 1770 instruct loadV16(vecX dst, memory mem) %{ 1771 predicate(n->as_LoadVector()->memory_size() == 16); 1772 match(Set dst (LoadVector mem)); 1773 ins_cost(125); 1774 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1775 ins_encode %{ 1776 __ movdqu($dst$$XMMRegister, $mem$$Address); 1777 %} 1778 ins_pipe( pipe_slow ); 1779 %} 1780 1781 // Load vectors (32 bytes long) 1782 instruct loadV32(vecY dst, memory mem) %{ 1783 predicate(n->as_LoadVector()->memory_size() == 32); 1784 match(Set dst (LoadVector mem)); 1785 ins_cost(125); 1786 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1787 ins_encode %{ 1788 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1789 %} 1790 ins_pipe( pipe_slow ); 1791 %} 1792 1793 // Store vectors 1794 instruct storeV4(memory mem, vecS src) %{ 1795 predicate(n->as_StoreVector()->memory_size() == 4); 1796 match(Set mem (StoreVector mem src)); 1797 ins_cost(145); 1798 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1799 ins_encode %{ 1800 __ movdl($mem$$Address, $src$$XMMRegister); 1801 %} 1802 ins_pipe( pipe_slow ); 1803 %} 1804 1805 instruct storeV8(memory mem, vecD src) %{ 1806 predicate(n->as_StoreVector()->memory_size() == 8); 1807 match(Set mem (StoreVector mem src)); 1808 ins_cost(145); 1809 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1810 ins_encode %{ 1811 __ movq($mem$$Address, $src$$XMMRegister); 1812 %} 1813 ins_pipe( pipe_slow ); 1814 %} 1815 1816 instruct storeV16(memory mem, vecX src) %{ 1817 predicate(n->as_StoreVector()->memory_size() == 16); 1818 match(Set mem (StoreVector mem src)); 1819 ins_cost(145); 1820 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1821 ins_encode %{ 1822 __ movdqu($mem$$Address, $src$$XMMRegister); 1823 %} 1824 ins_pipe( pipe_slow ); 1825 %} 1826 1827 instruct storeV32(memory mem, vecY src) %{ 1828 predicate(n->as_StoreVector()->memory_size() == 32); 1829 match(Set mem (StoreVector mem src)); 1830 ins_cost(145); 1831 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1832 ins_encode %{ 1833 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1834 %} 1835 ins_pipe( pipe_slow ); 1836 %} 1837 1838 // Replicate byte scalar to be vector 1839 instruct Repl4B(vecS dst, rRegI src) %{ 1840 predicate(n->as_Vector()->length() == 4); 1841 match(Set dst (ReplicateB src)); 1842 format %{ "movd $dst,$src\n\t" 1843 "punpcklbw $dst,$dst\n\t" 1844 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1845 ins_encode %{ 1846 __ movdl($dst$$XMMRegister, $src$$Register); 1847 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1848 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1849 %} 1850 ins_pipe( pipe_slow ); 1851 %} 1852 1853 instruct Repl8B(vecD dst, rRegI src) %{ 1854 predicate(n->as_Vector()->length() == 8); 1855 match(Set dst (ReplicateB src)); 1856 format %{ "movd $dst,$src\n\t" 1857 "punpcklbw $dst,$dst\n\t" 1858 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1859 ins_encode %{ 1860 __ movdl($dst$$XMMRegister, $src$$Register); 1861 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1862 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1863 %} 1864 ins_pipe( pipe_slow ); 1865 %} 1866 1867 instruct Repl16B(vecX dst, rRegI src) %{ 1868 predicate(n->as_Vector()->length() == 16); 1869 match(Set dst (ReplicateB src)); 1870 format %{ "movd $dst,$src\n\t" 1871 "punpcklbw $dst,$dst\n\t" 1872 "pshuflw $dst,$dst,0x00\n\t" 1873 "punpcklqdq $dst,$dst\t! replicate16B" %} 1874 ins_encode %{ 1875 __ movdl($dst$$XMMRegister, $src$$Register); 1876 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1877 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1878 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1879 %} 1880 ins_pipe( pipe_slow ); 1881 %} 1882 1883 instruct Repl32B(vecY dst, rRegI src) %{ 1884 predicate(n->as_Vector()->length() == 32); 1885 match(Set dst (ReplicateB src)); 1886 format %{ "movd $dst,$src\n\t" 1887 "punpcklbw $dst,$dst\n\t" 1888 "pshuflw $dst,$dst,0x00\n\t" 1889 "punpcklqdq $dst,$dst\n\t" 1890 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1891 ins_encode %{ 1892 __ movdl($dst$$XMMRegister, $src$$Register); 1893 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1894 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1895 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1896 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1897 %} 1898 ins_pipe( pipe_slow ); 1899 %} 1900 1901 // Replicate byte scalar immediate to be vector by loading from const table. 1902 instruct Repl4B_imm(vecS dst, immI con) %{ 1903 predicate(n->as_Vector()->length() == 4); 1904 match(Set dst (ReplicateB con)); 1905 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1906 ins_encode %{ 1907 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1908 %} 1909 ins_pipe( pipe_slow ); 1910 %} 1911 1912 instruct Repl8B_imm(vecD dst, immI con) %{ 1913 predicate(n->as_Vector()->length() == 8); 1914 match(Set dst (ReplicateB con)); 1915 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1916 ins_encode %{ 1917 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1918 %} 1919 ins_pipe( pipe_slow ); 1920 %} 1921 1922 instruct Repl16B_imm(vecX dst, immI con) %{ 1923 predicate(n->as_Vector()->length() == 16); 1924 match(Set dst (ReplicateB con)); 1925 format %{ "movq $dst,[$constantaddress]\n\t" 1926 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1927 ins_encode %{ 1928 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1929 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1930 %} 1931 ins_pipe( pipe_slow ); 1932 %} 1933 1934 instruct Repl32B_imm(vecY dst, immI con) %{ 1935 predicate(n->as_Vector()->length() == 32); 1936 match(Set dst (ReplicateB con)); 1937 format %{ "movq $dst,[$constantaddress]\n\t" 1938 "punpcklqdq $dst,$dst\n\t" 1939 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1940 ins_encode %{ 1941 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1942 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1943 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1944 %} 1945 ins_pipe( pipe_slow ); 1946 %} 1947 1948 // Replicate byte scalar zero to be vector 1949 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1950 predicate(n->as_Vector()->length() == 4); 1951 match(Set dst (ReplicateB zero)); 1952 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1953 ins_encode %{ 1954 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1955 %} 1956 ins_pipe( fpu_reg_reg ); 1957 %} 1958 1959 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1960 predicate(n->as_Vector()->length() == 8); 1961 match(Set dst (ReplicateB zero)); 1962 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1963 ins_encode %{ 1964 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1965 %} 1966 ins_pipe( fpu_reg_reg ); 1967 %} 1968 1969 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1970 predicate(n->as_Vector()->length() == 16); 1971 match(Set dst (ReplicateB zero)); 1972 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1973 ins_encode %{ 1974 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1975 %} 1976 ins_pipe( fpu_reg_reg ); 1977 %} 1978 1979 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1980 predicate(n->as_Vector()->length() == 32); 1981 match(Set dst (ReplicateB zero)); 1982 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1983 ins_encode %{ 1984 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1985 bool vector256 = true; 1986 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1987 %} 1988 ins_pipe( fpu_reg_reg ); 1989 %} 1990 1991 // Replicate char/short (2 byte) scalar to be vector 1992 instruct Repl2S(vecS dst, rRegI src) %{ 1993 predicate(n->as_Vector()->length() == 2); 1994 match(Set dst (ReplicateS src)); 1995 format %{ "movd $dst,$src\n\t" 1996 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1997 ins_encode %{ 1998 __ movdl($dst$$XMMRegister, $src$$Register); 1999 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2000 %} 2001 ins_pipe( fpu_reg_reg ); 2002 %} 2003 2004 instruct Repl4S(vecD dst, rRegI src) %{ 2005 predicate(n->as_Vector()->length() == 4); 2006 match(Set dst (ReplicateS src)); 2007 format %{ "movd $dst,$src\n\t" 2008 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 2009 ins_encode %{ 2010 __ movdl($dst$$XMMRegister, $src$$Register); 2011 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2012 %} 2013 ins_pipe( fpu_reg_reg ); 2014 %} 2015 2016 instruct Repl8S(vecX dst, rRegI src) %{ 2017 predicate(n->as_Vector()->length() == 8); 2018 match(Set dst (ReplicateS src)); 2019 format %{ "movd $dst,$src\n\t" 2020 "pshuflw $dst,$dst,0x00\n\t" 2021 "punpcklqdq $dst,$dst\t! replicate8S" %} 2022 ins_encode %{ 2023 __ movdl($dst$$XMMRegister, $src$$Register); 2024 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2025 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2026 %} 2027 ins_pipe( pipe_slow ); 2028 %} 2029 2030 instruct Repl16S(vecY dst, rRegI src) %{ 2031 predicate(n->as_Vector()->length() == 16); 2032 match(Set dst (ReplicateS src)); 2033 format %{ "movd $dst,$src\n\t" 2034 "pshuflw $dst,$dst,0x00\n\t" 2035 "punpcklqdq $dst,$dst\n\t" 2036 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 2037 ins_encode %{ 2038 __ movdl($dst$$XMMRegister, $src$$Register); 2039 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2040 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2041 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2042 %} 2043 ins_pipe( pipe_slow ); 2044 %} 2045 2046 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 2047 instruct Repl2S_imm(vecS dst, immI con) %{ 2048 predicate(n->as_Vector()->length() == 2); 2049 match(Set dst (ReplicateS con)); 2050 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 2051 ins_encode %{ 2052 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 2053 %} 2054 ins_pipe( fpu_reg_reg ); 2055 %} 2056 2057 instruct Repl4S_imm(vecD dst, immI con) %{ 2058 predicate(n->as_Vector()->length() == 4); 2059 match(Set dst (ReplicateS con)); 2060 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 2061 ins_encode %{ 2062 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2063 %} 2064 ins_pipe( fpu_reg_reg ); 2065 %} 2066 2067 instruct Repl8S_imm(vecX dst, immI con) %{ 2068 predicate(n->as_Vector()->length() == 8); 2069 match(Set dst (ReplicateS con)); 2070 format %{ "movq $dst,[$constantaddress]\n\t" 2071 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 2072 ins_encode %{ 2073 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2074 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2075 %} 2076 ins_pipe( pipe_slow ); 2077 %} 2078 2079 instruct Repl16S_imm(vecY dst, immI con) %{ 2080 predicate(n->as_Vector()->length() == 16); 2081 match(Set dst (ReplicateS con)); 2082 format %{ "movq $dst,[$constantaddress]\n\t" 2083 "punpcklqdq $dst,$dst\n\t" 2084 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 2085 ins_encode %{ 2086 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2087 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2088 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2089 %} 2090 ins_pipe( pipe_slow ); 2091 %} 2092 2093 // Replicate char/short (2 byte) scalar zero to be vector 2094 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 2095 predicate(n->as_Vector()->length() == 2); 2096 match(Set dst (ReplicateS zero)); 2097 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 2098 ins_encode %{ 2099 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2100 %} 2101 ins_pipe( fpu_reg_reg ); 2102 %} 2103 2104 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 2105 predicate(n->as_Vector()->length() == 4); 2106 match(Set dst (ReplicateS zero)); 2107 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 2108 ins_encode %{ 2109 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2110 %} 2111 ins_pipe( fpu_reg_reg ); 2112 %} 2113 2114 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 2115 predicate(n->as_Vector()->length() == 8); 2116 match(Set dst (ReplicateS zero)); 2117 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 2118 ins_encode %{ 2119 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2120 %} 2121 ins_pipe( fpu_reg_reg ); 2122 %} 2123 2124 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 2125 predicate(n->as_Vector()->length() == 16); 2126 match(Set dst (ReplicateS zero)); 2127 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 2128 ins_encode %{ 2129 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2130 bool vector256 = true; 2131 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2132 %} 2133 ins_pipe( fpu_reg_reg ); 2134 %} 2135 2136 // Replicate integer (4 byte) scalar to be vector 2137 instruct Repl2I(vecD dst, rRegI src) %{ 2138 predicate(n->as_Vector()->length() == 2); 2139 match(Set dst (ReplicateI src)); 2140 format %{ "movd $dst,$src\n\t" 2141 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2142 ins_encode %{ 2143 __ movdl($dst$$XMMRegister, $src$$Register); 2144 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2145 %} 2146 ins_pipe( fpu_reg_reg ); 2147 %} 2148 2149 instruct Repl4I(vecX dst, rRegI src) %{ 2150 predicate(n->as_Vector()->length() == 4); 2151 match(Set dst (ReplicateI src)); 2152 format %{ "movd $dst,$src\n\t" 2153 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2154 ins_encode %{ 2155 __ movdl($dst$$XMMRegister, $src$$Register); 2156 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2157 %} 2158 ins_pipe( pipe_slow ); 2159 %} 2160 2161 instruct Repl8I(vecY dst, rRegI src) %{ 2162 predicate(n->as_Vector()->length() == 8); 2163 match(Set dst (ReplicateI src)); 2164 format %{ "movd $dst,$src\n\t" 2165 "pshufd $dst,$dst,0x00\n\t" 2166 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2167 ins_encode %{ 2168 __ movdl($dst$$XMMRegister, $src$$Register); 2169 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2170 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2171 %} 2172 ins_pipe( pipe_slow ); 2173 %} 2174 2175 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2176 instruct Repl2I_imm(vecD dst, immI con) %{ 2177 predicate(n->as_Vector()->length() == 2); 2178 match(Set dst (ReplicateI con)); 2179 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2180 ins_encode %{ 2181 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2182 %} 2183 ins_pipe( fpu_reg_reg ); 2184 %} 2185 2186 instruct Repl4I_imm(vecX dst, immI con) %{ 2187 predicate(n->as_Vector()->length() == 4); 2188 match(Set dst (ReplicateI con)); 2189 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2190 "punpcklqdq $dst,$dst" %} 2191 ins_encode %{ 2192 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2193 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2194 %} 2195 ins_pipe( pipe_slow ); 2196 %} 2197 2198 instruct Repl8I_imm(vecY dst, immI con) %{ 2199 predicate(n->as_Vector()->length() == 8); 2200 match(Set dst (ReplicateI con)); 2201 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2202 "punpcklqdq $dst,$dst\n\t" 2203 "vinserti128h $dst,$dst,$dst" %} 2204 ins_encode %{ 2205 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2206 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2207 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2208 %} 2209 ins_pipe( pipe_slow ); 2210 %} 2211 2212 // Integer could be loaded into xmm register directly from memory. 2213 instruct Repl2I_mem(vecD dst, memory mem) %{ 2214 predicate(n->as_Vector()->length() == 2); 2215 match(Set dst (ReplicateI (LoadI mem))); 2216 format %{ "movd $dst,$mem\n\t" 2217 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2218 ins_encode %{ 2219 __ movdl($dst$$XMMRegister, $mem$$Address); 2220 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2221 %} 2222 ins_pipe( fpu_reg_reg ); 2223 %} 2224 2225 instruct Repl4I_mem(vecX dst, memory mem) %{ 2226 predicate(n->as_Vector()->length() == 4); 2227 match(Set dst (ReplicateI (LoadI mem))); 2228 format %{ "movd $dst,$mem\n\t" 2229 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2230 ins_encode %{ 2231 __ movdl($dst$$XMMRegister, $mem$$Address); 2232 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2233 %} 2234 ins_pipe( pipe_slow ); 2235 %} 2236 2237 instruct Repl8I_mem(vecY dst, memory mem) %{ 2238 predicate(n->as_Vector()->length() == 8); 2239 match(Set dst (ReplicateI (LoadI mem))); 2240 format %{ "movd $dst,$mem\n\t" 2241 "pshufd $dst,$dst,0x00\n\t" 2242 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2243 ins_encode %{ 2244 __ movdl($dst$$XMMRegister, $mem$$Address); 2245 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2246 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2247 %} 2248 ins_pipe( pipe_slow ); 2249 %} 2250 2251 // Replicate integer (4 byte) scalar zero to be vector 2252 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2253 predicate(n->as_Vector()->length() == 2); 2254 match(Set dst (ReplicateI zero)); 2255 format %{ "pxor $dst,$dst\t! replicate2I" %} 2256 ins_encode %{ 2257 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2258 %} 2259 ins_pipe( fpu_reg_reg ); 2260 %} 2261 2262 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2263 predicate(n->as_Vector()->length() == 4); 2264 match(Set dst (ReplicateI zero)); 2265 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2266 ins_encode %{ 2267 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2268 %} 2269 ins_pipe( fpu_reg_reg ); 2270 %} 2271 2272 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2273 predicate(n->as_Vector()->length() == 8); 2274 match(Set dst (ReplicateI zero)); 2275 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2276 ins_encode %{ 2277 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2278 bool vector256 = true; 2279 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2280 %} 2281 ins_pipe( fpu_reg_reg ); 2282 %} 2283 2284 // Replicate long (8 byte) scalar to be vector 2285 #ifdef _LP64 2286 instruct Repl2L(vecX dst, rRegL src) %{ 2287 predicate(n->as_Vector()->length() == 2); 2288 match(Set dst (ReplicateL src)); 2289 format %{ "movdq $dst,$src\n\t" 2290 "punpcklqdq $dst,$dst\t! replicate2L" %} 2291 ins_encode %{ 2292 __ movdq($dst$$XMMRegister, $src$$Register); 2293 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2294 %} 2295 ins_pipe( pipe_slow ); 2296 %} 2297 2298 instruct Repl4L(vecY dst, rRegL src) %{ 2299 predicate(n->as_Vector()->length() == 4); 2300 match(Set dst (ReplicateL src)); 2301 format %{ "movdq $dst,$src\n\t" 2302 "punpcklqdq $dst,$dst\n\t" 2303 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2304 ins_encode %{ 2305 __ movdq($dst$$XMMRegister, $src$$Register); 2306 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2307 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2308 %} 2309 ins_pipe( pipe_slow ); 2310 %} 2311 #else // _LP64 2312 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2313 predicate(n->as_Vector()->length() == 2); 2314 match(Set dst (ReplicateL src)); 2315 effect(TEMP dst, USE src, TEMP tmp); 2316 format %{ "movdl $dst,$src.lo\n\t" 2317 "movdl $tmp,$src.hi\n\t" 2318 "punpckldq $dst,$tmp\n\t" 2319 "punpcklqdq $dst,$dst\t! replicate2L"%} 2320 ins_encode %{ 2321 __ movdl($dst$$XMMRegister, $src$$Register); 2322 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2323 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2324 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2325 %} 2326 ins_pipe( pipe_slow ); 2327 %} 2328 2329 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2330 predicate(n->as_Vector()->length() == 4); 2331 match(Set dst (ReplicateL src)); 2332 effect(TEMP dst, USE src, TEMP tmp); 2333 format %{ "movdl $dst,$src.lo\n\t" 2334 "movdl $tmp,$src.hi\n\t" 2335 "punpckldq $dst,$tmp\n\t" 2336 "punpcklqdq $dst,$dst\n\t" 2337 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2338 ins_encode %{ 2339 __ movdl($dst$$XMMRegister, $src$$Register); 2340 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2341 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2342 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2343 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2344 %} 2345 ins_pipe( pipe_slow ); 2346 %} 2347 #endif // _LP64 2348 2349 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2350 instruct Repl2L_imm(vecX dst, immL con) %{ 2351 predicate(n->as_Vector()->length() == 2); 2352 match(Set dst (ReplicateL con)); 2353 format %{ "movq $dst,[$constantaddress]\n\t" 2354 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2355 ins_encode %{ 2356 __ movq($dst$$XMMRegister, $constantaddress($con)); 2357 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2358 %} 2359 ins_pipe( pipe_slow ); 2360 %} 2361 2362 instruct Repl4L_imm(vecY dst, immL con) %{ 2363 predicate(n->as_Vector()->length() == 4); 2364 match(Set dst (ReplicateL con)); 2365 format %{ "movq $dst,[$constantaddress]\n\t" 2366 "punpcklqdq $dst,$dst\n\t" 2367 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2368 ins_encode %{ 2369 __ movq($dst$$XMMRegister, $constantaddress($con)); 2370 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2371 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2372 %} 2373 ins_pipe( pipe_slow ); 2374 %} 2375 2376 // Long could be loaded into xmm register directly from memory. 2377 instruct Repl2L_mem(vecX dst, memory mem) %{ 2378 predicate(n->as_Vector()->length() == 2); 2379 match(Set dst (ReplicateL (LoadL mem))); 2380 format %{ "movq $dst,$mem\n\t" 2381 "punpcklqdq $dst,$dst\t! replicate2L" %} 2382 ins_encode %{ 2383 __ movq($dst$$XMMRegister, $mem$$Address); 2384 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2385 %} 2386 ins_pipe( pipe_slow ); 2387 %} 2388 2389 instruct Repl4L_mem(vecY dst, memory mem) %{ 2390 predicate(n->as_Vector()->length() == 4); 2391 match(Set dst (ReplicateL (LoadL mem))); 2392 format %{ "movq $dst,$mem\n\t" 2393 "punpcklqdq $dst,$dst\n\t" 2394 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2395 ins_encode %{ 2396 __ movq($dst$$XMMRegister, $mem$$Address); 2397 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2398 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2399 %} 2400 ins_pipe( pipe_slow ); 2401 %} 2402 2403 // Replicate long (8 byte) scalar zero to be vector 2404 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2405 predicate(n->as_Vector()->length() == 2); 2406 match(Set dst (ReplicateL zero)); 2407 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2408 ins_encode %{ 2409 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2410 %} 2411 ins_pipe( fpu_reg_reg ); 2412 %} 2413 2414 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2415 predicate(n->as_Vector()->length() == 4); 2416 match(Set dst (ReplicateL zero)); 2417 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2418 ins_encode %{ 2419 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2420 bool vector256 = true; 2421 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2422 %} 2423 ins_pipe( fpu_reg_reg ); 2424 %} 2425 2426 // Replicate float (4 byte) scalar to be vector 2427 instruct Repl2F(vecD dst, regF src) %{ 2428 predicate(n->as_Vector()->length() == 2); 2429 match(Set dst (ReplicateF src)); 2430 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2431 ins_encode %{ 2432 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2433 %} 2434 ins_pipe( fpu_reg_reg ); 2435 %} 2436 2437 instruct Repl4F(vecX dst, regF src) %{ 2438 predicate(n->as_Vector()->length() == 4); 2439 match(Set dst (ReplicateF src)); 2440 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2441 ins_encode %{ 2442 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2443 %} 2444 ins_pipe( pipe_slow ); 2445 %} 2446 2447 instruct Repl8F(vecY dst, regF src) %{ 2448 predicate(n->as_Vector()->length() == 8); 2449 match(Set dst (ReplicateF src)); 2450 format %{ "pshufd $dst,$src,0x00\n\t" 2451 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2452 ins_encode %{ 2453 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2454 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2455 %} 2456 ins_pipe( pipe_slow ); 2457 %} 2458 2459 // Replicate float (4 byte) scalar zero to be vector 2460 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2461 predicate(n->as_Vector()->length() == 2); 2462 match(Set dst (ReplicateF zero)); 2463 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2464 ins_encode %{ 2465 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2466 %} 2467 ins_pipe( fpu_reg_reg ); 2468 %} 2469 2470 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2471 predicate(n->as_Vector()->length() == 4); 2472 match(Set dst (ReplicateF zero)); 2473 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2474 ins_encode %{ 2475 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2476 %} 2477 ins_pipe( fpu_reg_reg ); 2478 %} 2479 2480 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2481 predicate(n->as_Vector()->length() == 8); 2482 match(Set dst (ReplicateF zero)); 2483 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2484 ins_encode %{ 2485 bool vector256 = true; 2486 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2487 %} 2488 ins_pipe( fpu_reg_reg ); 2489 %} 2490 2491 // Replicate double (8 bytes) scalar to be vector 2492 instruct Repl2D(vecX dst, regD src) %{ 2493 predicate(n->as_Vector()->length() == 2); 2494 match(Set dst (ReplicateD src)); 2495 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2496 ins_encode %{ 2497 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2498 %} 2499 ins_pipe( pipe_slow ); 2500 %} 2501 2502 instruct Repl4D(vecY dst, regD src) %{ 2503 predicate(n->as_Vector()->length() == 4); 2504 match(Set dst (ReplicateD src)); 2505 format %{ "pshufd $dst,$src,0x44\n\t" 2506 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2507 ins_encode %{ 2508 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2509 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2510 %} 2511 ins_pipe( pipe_slow ); 2512 %} 2513 2514 // Replicate double (8 byte) scalar zero to be vector 2515 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2516 predicate(n->as_Vector()->length() == 2); 2517 match(Set dst (ReplicateD zero)); 2518 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2519 ins_encode %{ 2520 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2521 %} 2522 ins_pipe( fpu_reg_reg ); 2523 %} 2524 2525 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2526 predicate(n->as_Vector()->length() == 4); 2527 match(Set dst (ReplicateD zero)); 2528 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2529 ins_encode %{ 2530 bool vector256 = true; 2531 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2532 %} 2533 ins_pipe( fpu_reg_reg ); 2534 %} 2535 2536 // ====================REDUCTION ARITHMETIC======================================= 2537 2538 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 2539 predicate(UseSSE > 2 && UseAVX == 0); 2540 match(Set dst (AddReductionVI src1 src2)); 2541 effect(TEMP tmp2, TEMP tmp); 2542 format %{ "movdqu $tmp2,$src2\n\t" 2543 "phaddd $tmp2,$tmp2\n\t" 2544 "movd $tmp,$src1\n\t" 2545 "paddd $tmp,$tmp2\n\t" 2546 "movd $dst,$tmp\t! add reduction2I" %} 2547 ins_encode %{ 2548 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 2549 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 2550 __ movdl($tmp$$XMMRegister, $src1$$Register); 2551 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 2552 __ movdl($dst$$Register, $tmp$$XMMRegister); 2553 %} 2554 ins_pipe( pipe_slow ); 2555 %} 2556 2557 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 2558 predicate(UseAVX > 0); 2559 match(Set dst (AddReductionVI src1 src2)); 2560 effect(TEMP tmp, TEMP tmp2); 2561 format %{ "vphaddd $tmp,$src2,$src2\n\t" 2562 "movd $tmp2,$src1\n\t" 2563 "vpaddd $tmp2,$tmp2,$tmp\n\t" 2564 "movd $dst,$tmp2\t! add reduction2I" %} 2565 ins_encode %{ 2566 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); 2567 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2568 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); 2569 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2570 %} 2571 ins_pipe( pipe_slow ); 2572 %} 2573 2574 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 2575 predicate(UseSSE > 2 && UseAVX == 0); 2576 match(Set dst (AddReductionVI src1 src2)); 2577 effect(TEMP tmp2, TEMP tmp); 2578 format %{ "movdqu $tmp2,$src2\n\t" 2579 "phaddd $tmp2,$tmp2\n\t" 2580 "phaddd $tmp2,$tmp2\n\t" 2581 "movd $tmp,$src1\n\t" 2582 "paddd $tmp,$tmp2\n\t" 2583 "movd $dst,$tmp\t! add reduction4I" %} 2584 ins_encode %{ 2585 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 2586 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 2587 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 2588 __ movdl($tmp$$XMMRegister, $src1$$Register); 2589 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 2590 __ movdl($dst$$Register, $tmp$$XMMRegister); 2591 %} 2592 ins_pipe( pipe_slow ); 2593 %} 2594 2595 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 2596 predicate(UseAVX > 0); 2597 match(Set dst (AddReductionVI src1 src2)); 2598 effect(TEMP tmp, TEMP tmp2); 2599 format %{ "vphaddd $tmp,$src2,$src2\n\t" 2600 "vphaddd $tmp,$tmp,$tmp2\n\t" 2601 "movd $tmp2,$src1\n\t" 2602 "vpaddd $tmp2,$tmp2,$tmp\n\t" 2603 "movd $dst,$tmp2\t! add reduction4I" %} 2604 ins_encode %{ 2605 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); 2606 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2607 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2608 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); 2609 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2610 %} 2611 ins_pipe( pipe_slow ); 2612 %} 2613 2614 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 2615 predicate(UseAVX > 0); 2616 match(Set dst (AddReductionVI src1 src2)); 2617 effect(TEMP tmp, TEMP tmp2); 2618 format %{ "vphaddd $tmp,$src2,$src2\n\t" 2619 "vphaddd $tmp,$tmp,$tmp2\n\t" 2620 "vextractf128 $tmp2,$tmp\n\t" 2621 "vpaddd $tmp,$tmp,$tmp2\n\t" 2622 "movd $tmp2,$src1\n\t" 2623 "vpaddd $tmp2,$tmp2,$tmp\n\t" 2624 "movd $dst,$tmp2\t! add reduction8I" %} 2625 ins_encode %{ 2626 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true); 2627 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true); 2628 __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 2629 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2630 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2631 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); 2632 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2633 %} 2634 ins_pipe( pipe_slow ); 2635 %} 2636 2637 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 2638 predicate(UseSSE >= 1 && UseAVX == 0); 2639 match(Set dst (AddReductionVF src1 src2)); 2640 effect(TEMP tmp, TEMP tmp2); 2641 format %{ "movdqu $tmp,$src1\n\t" 2642 "addss $tmp,$src2\n\t" 2643 "pshufd $tmp2,$src2,0x01\n\t" 2644 "addss $tmp,$tmp2\n\t" 2645 "movdqu $dst,$tmp\t! add reduction2F" %} 2646 ins_encode %{ 2647 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2648 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 2649 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 2650 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2651 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 2652 %} 2653 ins_pipe( pipe_slow ); 2654 %} 2655 2656 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 2657 predicate(UseAVX > 0); 2658 match(Set dst (AddReductionVF src1 src2)); 2659 effect(TEMP tmp2, TEMP tmp); 2660 format %{ "vaddss $tmp2,$src1,$src2\n\t" 2661 "pshufd $tmp,$src2,0x01\n\t" 2662 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 2663 ins_encode %{ 2664 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2665 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 2666 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2667 %} 2668 ins_pipe( pipe_slow ); 2669 %} 2670 2671 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 2672 predicate(UseSSE >= 1 && UseAVX == 0); 2673 match(Set dst (AddReductionVF src1 src2)); 2674 effect(TEMP tmp, TEMP tmp2); 2675 format %{ "movdqu $tmp,$src1\n\t" 2676 "addss $tmp,$src2\n\t" 2677 "pshufd $tmp2,$src2,0x01\n\t" 2678 "addss $tmp,$tmp2\n\t" 2679 "pshufd $tmp2,$src2,0x02\n\t" 2680 "addss $tmp,$tmp2\n\t" 2681 "pshufd $tmp2,$src2,0x03\n\t" 2682 "addss $tmp,$tmp2\n\t" 2683 "movdqu $dst,$tmp\t! add reduction4F" %} 2684 ins_encode %{ 2685 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2686 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 2687 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 2688 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2689 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 2690 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2691 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 2692 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2693 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 2694 %} 2695 ins_pipe( pipe_slow ); 2696 %} 2697 2698 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 2699 predicate(UseAVX > 0); 2700 match(Set dst (AddReductionVF src1 src2)); 2701 effect(TEMP tmp, TEMP tmp2); 2702 format %{ "vaddss $tmp2,$src1,$src2\n\t" 2703 "pshufd $tmp,$src2,0x01\n\t" 2704 "vaddss $tmp2,$tmp2,$tmp\n\t" 2705 "pshufd $tmp,$src2,0x02\n\t" 2706 "vaddss $tmp2,$tmp2,$tmp\n\t" 2707 "pshufd $tmp,$src2,0x03\n\t" 2708 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 2709 ins_encode %{ 2710 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2711 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 2712 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2713 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 2714 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2715 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 2716 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2717 %} 2718 ins_pipe( pipe_slow ); 2719 %} 2720 2721 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 2722 predicate(UseAVX > 0); 2723 match(Set dst (AddReductionVF src1 src2)); 2724 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 2725 format %{ "vaddss $tmp2,$src1,$src2\n\t" 2726 "pshufd $tmp,$src2,0x01\n\t" 2727 "vaddss $tmp2,$tmp2,$tmp\n\t" 2728 "pshufd $tmp,$src2,0x02\n\t" 2729 "vaddss $tmp2,$tmp2,$tmp\n\t" 2730 "pshufd $tmp,$src2,0x03\n\t" 2731 "vaddss $tmp2,$tmp2,$tmp\n\t" 2732 "vextractf128 $tmp3,$src2\n\t" 2733 "vaddss $tmp2,$tmp2,$tmp3\n\t" 2734 "pshufd $tmp,$tmp3,0x01\n\t" 2735 "vaddss $tmp2,$tmp2,$tmp\n\t" 2736 "pshufd $tmp,$tmp3,0x02\n\t" 2737 "vaddss $tmp2,$tmp2,$tmp\n\t" 2738 "pshufd $tmp,$tmp3,0x03\n\t" 2739 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 2740 ins_encode %{ 2741 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2742 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 2743 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2744 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 2745 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2746 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 2747 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2748 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 2749 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 2750 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 2751 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2752 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 2753 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2754 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 2755 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2756 %} 2757 ins_pipe( pipe_slow ); 2758 %} 2759 2760 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 2761 predicate(UseSSE >= 1 && UseAVX == 0); 2762 match(Set dst (AddReductionVD src1 src2)); 2763 effect(TEMP tmp, TEMP dst); 2764 format %{ "movdqu $tmp,$src1\n\t" 2765 "addsd $tmp,$src2\n\t" 2766 "pshufd $dst,$src2,0xE\n\t" 2767 "addsd $dst,$tmp\t! add reduction2D" %} 2768 ins_encode %{ 2769 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2770 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 2771 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 2772 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 2773 %} 2774 ins_pipe( pipe_slow ); 2775 %} 2776 2777 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 2778 predicate(UseAVX > 0); 2779 match(Set dst (AddReductionVD src1 src2)); 2780 effect(TEMP tmp, TEMP tmp2); 2781 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 2782 "pshufd $tmp,$src2,0xE\n\t" 2783 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 2784 ins_encode %{ 2785 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2786 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 2787 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2788 %} 2789 ins_pipe( pipe_slow ); 2790 %} 2791 2792 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 2793 predicate(UseAVX > 0); 2794 match(Set dst (AddReductionVD src1 src2)); 2795 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 2796 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 2797 "pshufd $tmp,$src2,0xE\n\t" 2798 "vaddsd $tmp2,$tmp2,$tmp\n\t" 2799 "vextractf128 $tmp3,$src2\n\t" 2800 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 2801 "pshufd $tmp,$tmp3,0xE\n\t" 2802 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 2803 ins_encode %{ 2804 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2805 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 2806 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2807 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 2808 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 2809 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 2810 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2811 %} 2812 ins_pipe( pipe_slow ); 2813 %} 2814 2815 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 2816 predicate(UseSSE > 3 && UseAVX == 0); 2817 match(Set dst (MulReductionVI src1 src2)); 2818 effect(TEMP tmp, TEMP tmp2); 2819 format %{ "pshufd $tmp2,$src2,0x1\n\t" 2820 "pmulld $tmp2,$src2\n\t" 2821 "movd $tmp,$src1\n\t" 2822 "pmulld $tmp2,$tmp\n\t" 2823 "movd $dst,$tmp2\t! mul reduction2I" %} 2824 ins_encode %{ 2825 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 2826 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 2827 __ movdl($tmp$$XMMRegister, $src1$$Register); 2828 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 2829 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2830 %} 2831 ins_pipe( pipe_slow ); 2832 %} 2833 2834 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 2835 predicate(UseAVX > 0); 2836 match(Set dst (MulReductionVI src1 src2)); 2837 effect(TEMP tmp, TEMP tmp2); 2838 format %{ "pshufd $tmp2,$src2,0x1\n\t" 2839 "vpmulld $tmp,$src2,$tmp2\n\t" 2840 "movd $tmp2,$src1\n\t" 2841 "vpmulld $tmp2,$tmp,$tmp2\n\t" 2842 "movd $dst,$tmp2\t! mul reduction2I" %} 2843 ins_encode %{ 2844 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 2845 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false); 2846 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2847 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2848 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2849 %} 2850 ins_pipe( pipe_slow ); 2851 %} 2852 2853 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 2854 predicate(UseSSE > 3 && UseAVX == 0); 2855 match(Set dst (MulReductionVI src1 src2)); 2856 effect(TEMP tmp, TEMP tmp2); 2857 format %{ "pshufd $tmp2,$src2,0xE\n\t" 2858 "pmulld $tmp2,$src2\n\t" 2859 "pshufd $tmp,$tmp2,0x1\n\t" 2860 "pmulld $tmp2,$tmp\n\t" 2861 "movd $tmp,$src1\n\t" 2862 "pmulld $tmp2,$tmp\n\t" 2863 "movd $dst,$tmp2\t! mul reduction4I" %} 2864 ins_encode %{ 2865 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 2866 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 2867 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 2868 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 2869 __ movdl($tmp$$XMMRegister, $src1$$Register); 2870 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 2871 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2872 %} 2873 ins_pipe( pipe_slow ); 2874 %} 2875 2876 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 2877 predicate(UseAVX > 0); 2878 match(Set dst (MulReductionVI src1 src2)); 2879 effect(TEMP tmp, TEMP tmp2); 2880 format %{ "pshufd $tmp2,$src2,0xE\n\t" 2881 "vpmulld $tmp,$src2,$tmp2\n\t" 2882 "pshufd $tmp2,$tmp,0x1\n\t" 2883 "vpmulld $tmp,$tmp,$tmp2\n\t" 2884 "movd $tmp2,$src1\n\t" 2885 "vpmulld $tmp2,$tmp,$tmp2\n\t" 2886 "movd $dst,$tmp2\t! mul reduction4I" %} 2887 ins_encode %{ 2888 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 2889 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false); 2890 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 2891 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2892 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2893 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2894 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2895 %} 2896 ins_pipe( pipe_slow ); 2897 %} 2898 2899 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 2900 predicate(UseAVX > 0); 2901 match(Set dst (MulReductionVI src1 src2)); 2902 effect(TEMP tmp, TEMP tmp2); 2903 format %{ "vextractf128 $tmp,$src2\n\t" 2904 "vpmulld $tmp,$tmp,$src2\n\t" 2905 "pshufd $tmp2,$tmp,0xE\n\t" 2906 "vpmulld $tmp,$tmp,$tmp2\n\t" 2907 "pshufd $tmp2,$tmp,0x1\n\t" 2908 "vpmulld $tmp,$tmp,$tmp2\n\t" 2909 "movd $tmp2,$src1\n\t" 2910 "vpmulld $tmp2,$tmp,$tmp2\n\t" 2911 "movd $dst,$tmp2\t! mul reduction8I" %} 2912 ins_encode %{ 2913 __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister); 2914 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false); 2915 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 2916 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2917 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 2918 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2919 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2920 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2921 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2922 %} 2923 ins_pipe( pipe_slow ); 2924 %} 2925 2926 instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 2927 predicate(UseSSE >= 1 && UseAVX == 0); 2928 match(Set dst (MulReductionVF src1 src2)); 2929 effect(TEMP tmp, TEMP tmp2); 2930 format %{ "movdqu $tmp,$src1\n\t" 2931 "mulss $tmp,$src2\n\t" 2932 "pshufd $tmp2,$src2,0x01\n\t" 2933 "mulss $tmp,$tmp2\n\t" 2934 "movdqu $dst,$tmp\t! add reduction2F" %} 2935 ins_encode %{ 2936 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2937 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 2938 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 2939 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2940 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 2941 %} 2942 ins_pipe( pipe_slow ); 2943 %} 2944 2945 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 2946 predicate(UseAVX > 0); 2947 match(Set dst (MulReductionVF src1 src2)); 2948 effect(TEMP tmp, TEMP tmp2); 2949 format %{ "vmulss $tmp2,$src1,$src2\n\t" 2950 "pshufd $tmp,$src2,0x01\n\t" 2951 "vmulss $dst,$tmp2,$tmp\t! add reduction2F" %} 2952 ins_encode %{ 2953 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2954 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 2955 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2956 %} 2957 ins_pipe( pipe_slow ); 2958 %} 2959 2960 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 2961 predicate(UseSSE >= 1 && UseAVX == 0); 2962 match(Set dst (MulReductionVF src1 src2)); 2963 effect(TEMP tmp, TEMP tmp2); 2964 format %{ "movdqu $tmp,$src1\n\t" 2965 "mulss $tmp,$src2\n\t" 2966 "pshufd $tmp2,$src2,0x01\n\t" 2967 "mulss $tmp,$tmp2\n\t" 2968 "pshufd $tmp2,$src2,0x02\n\t" 2969 "mulss $tmp,$tmp2\n\t" 2970 "pshufd $tmp2,$src2,0x03\n\t" 2971 "mulss $tmp,$tmp2\n\t" 2972 "movdqu $dst,$tmp\t! add reduction4F" %} 2973 ins_encode %{ 2974 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2975 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 2976 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 2977 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2978 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 2979 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2980 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 2981 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2982 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 2983 %} 2984 ins_pipe( pipe_slow ); 2985 %} 2986 2987 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 2988 predicate(UseAVX > 0); 2989 match(Set dst (MulReductionVF src1 src2)); 2990 effect(TEMP tmp, TEMP tmp2); 2991 format %{ "vmulss $tmp2,$src1,$src2\n\t" 2992 "pshufd $tmp,$src2,0x01\n\t" 2993 "vmulss $tmp2,$tmp2,$tmp\n\t" 2994 "pshufd $tmp,$src2,0x02\n\t" 2995 "vmulss $tmp2,$tmp2,$tmp\n\t" 2996 "pshufd $tmp,$src2,0x03\n\t" 2997 "vmulss $dst,$tmp2,$tmp\t! add reduction4F" %} 2998 ins_encode %{ 2999 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3000 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 3001 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3002 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 3003 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3004 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 3005 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3006 %} 3007 ins_pipe( pipe_slow ); 3008 %} 3009 3010 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 3011 predicate(UseAVX > 0); 3012 match(Set dst (MulReductionVF src1 src2)); 3013 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 3014 format %{ "vmulss $tmp2,$src1,$src2\n\t" 3015 "pshufd $tmp,$src2,0x01\n\t" 3016 "vmulss $tmp2,$tmp2,$tmp\n\t" 3017 "pshufd $tmp,$src2,0x02\n\t" 3018 "vmulss $tmp2,$tmp2,$tmp\n\t" 3019 "pshufd $tmp,$src2,0x03\n\t" 3020 "vmulss $tmp2,$tmp2,$tmp\n\t" 3021 "vextractf128 $tmp3,$src2\n\t" 3022 "vmulss $tmp2,$tmp2,$tmp3\n\t" 3023 "pshufd $tmp,$tmp3,0x01\n\t" 3024 "vmulss $tmp2,$tmp2,$tmp\n\t" 3025 "pshufd $tmp,$tmp3,0x02\n\t" 3026 "vmulss $tmp2,$tmp2,$tmp\n\t" 3027 "pshufd $tmp,$tmp3,0x03\n\t" 3028 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 3029 ins_encode %{ 3030 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3031 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 3032 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 3034 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3035 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 3036 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3037 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 3038 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 3039 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 3040 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3041 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 3042 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3043 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 3044 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3045 %} 3046 ins_pipe( pipe_slow ); 3047 %} 3048 3049 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 3050 predicate(UseSSE >= 1 && UseAVX == 0); 3051 match(Set dst (MulReductionVD src1 src2)); 3052 effect(TEMP tmp, TEMP dst); 3053 format %{ "movdqu $tmp,$src1\n\t" 3054 "mulsd $tmp,$src2\n\t" 3055 "pshufd $dst,$src2,0xE\n\t" 3056 "mulsd $dst,$tmp\t! add reduction2D" %} 3057 ins_encode %{ 3058 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 3059 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 3060 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 3061 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 3062 %} 3063 ins_pipe( pipe_slow ); 3064 %} 3065 3066 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 3067 predicate(UseAVX > 0); 3068 match(Set dst (MulReductionVD src1 src2)); 3069 effect(TEMP tmp, TEMP tmp2); 3070 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 3071 "pshufd $tmp,$src2,0xE\n\t" 3072 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 3073 ins_encode %{ 3074 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3075 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 3076 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3077 %} 3078 ins_pipe( pipe_slow ); 3079 %} 3080 3081 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 3082 predicate(UseAVX > 0); 3083 match(Set dst (MulReductionVD src1 src2)); 3084 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 3085 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 3086 "pshufd $tmp,$src2,0xE\n\t" 3087 "vmulsd $tmp2,$tmp2,$tmp\n\t" 3088 "vextractf128 $tmp3,$src2\n\t" 3089 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 3090 "pshufd $tmp,$tmp3,0xE\n\t" 3091 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 3092 ins_encode %{ 3093 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3094 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 3095 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3096 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 3097 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 3098 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 3099 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3100 %} 3101 ins_pipe( pipe_slow ); 3102 %} 3103 3104 // ====================VECTOR ARITHMETIC======================================= 3105 3106 // --------------------------------- ADD -------------------------------------- 3107 3108 // Bytes vector add 3109 instruct vadd4B(vecS dst, vecS src) %{ 3110 predicate(n->as_Vector()->length() == 4); 3111 match(Set dst (AddVB dst src)); 3112 format %{ "paddb $dst,$src\t! add packed4B" %} 3113 ins_encode %{ 3114 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 3120 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3121 match(Set dst (AddVB src1 src2)); 3122 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 3123 ins_encode %{ 3124 bool vector256 = false; 3125 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3126 %} 3127 ins_pipe( pipe_slow ); 3128 %} 3129 3130 instruct vadd8B(vecD dst, vecD src) %{ 3131 predicate(n->as_Vector()->length() == 8); 3132 match(Set dst (AddVB dst src)); 3133 format %{ "paddb $dst,$src\t! add packed8B" %} 3134 ins_encode %{ 3135 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3136 %} 3137 ins_pipe( pipe_slow ); 3138 %} 3139 3140 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 3141 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3142 match(Set dst (AddVB src1 src2)); 3143 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 3144 ins_encode %{ 3145 bool vector256 = false; 3146 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3147 %} 3148 ins_pipe( pipe_slow ); 3149 %} 3150 3151 instruct vadd16B(vecX dst, vecX src) %{ 3152 predicate(n->as_Vector()->length() == 16); 3153 match(Set dst (AddVB dst src)); 3154 format %{ "paddb $dst,$src\t! add packed16B" %} 3155 ins_encode %{ 3156 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3157 %} 3158 ins_pipe( pipe_slow ); 3159 %} 3160 3161 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 3162 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3163 match(Set dst (AddVB src1 src2)); 3164 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 3165 ins_encode %{ 3166 bool vector256 = false; 3167 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3168 %} 3169 ins_pipe( pipe_slow ); 3170 %} 3171 3172 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 3173 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3174 match(Set dst (AddVB src (LoadVector mem))); 3175 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 3176 ins_encode %{ 3177 bool vector256 = false; 3178 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3179 %} 3180 ins_pipe( pipe_slow ); 3181 %} 3182 3183 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 3184 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3185 match(Set dst (AddVB src1 src2)); 3186 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 3187 ins_encode %{ 3188 bool vector256 = true; 3189 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3190 %} 3191 ins_pipe( pipe_slow ); 3192 %} 3193 3194 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 3195 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3196 match(Set dst (AddVB src (LoadVector mem))); 3197 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 3198 ins_encode %{ 3199 bool vector256 = true; 3200 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3201 %} 3202 ins_pipe( pipe_slow ); 3203 %} 3204 3205 // Shorts/Chars vector add 3206 instruct vadd2S(vecS dst, vecS src) %{ 3207 predicate(n->as_Vector()->length() == 2); 3208 match(Set dst (AddVS dst src)); 3209 format %{ "paddw $dst,$src\t! add packed2S" %} 3210 ins_encode %{ 3211 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 3212 %} 3213 ins_pipe( pipe_slow ); 3214 %} 3215 3216 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 3217 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3218 match(Set dst (AddVS src1 src2)); 3219 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 3220 ins_encode %{ 3221 bool vector256 = false; 3222 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3223 %} 3224 ins_pipe( pipe_slow ); 3225 %} 3226 3227 instruct vadd4S(vecD dst, vecD src) %{ 3228 predicate(n->as_Vector()->length() == 4); 3229 match(Set dst (AddVS dst src)); 3230 format %{ "paddw $dst,$src\t! add packed4S" %} 3231 ins_encode %{ 3232 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 3233 %} 3234 ins_pipe( pipe_slow ); 3235 %} 3236 3237 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 3238 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3239 match(Set dst (AddVS src1 src2)); 3240 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 3241 ins_encode %{ 3242 bool vector256 = false; 3243 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3244 %} 3245 ins_pipe( pipe_slow ); 3246 %} 3247 3248 instruct vadd8S(vecX dst, vecX src) %{ 3249 predicate(n->as_Vector()->length() == 8); 3250 match(Set dst (AddVS dst src)); 3251 format %{ "paddw $dst,$src\t! add packed8S" %} 3252 ins_encode %{ 3253 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 3254 %} 3255 ins_pipe( pipe_slow ); 3256 %} 3257 3258 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 3259 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3260 match(Set dst (AddVS src1 src2)); 3261 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 3262 ins_encode %{ 3263 bool vector256 = false; 3264 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3265 %} 3266 ins_pipe( pipe_slow ); 3267 %} 3268 3269 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 3270 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3271 match(Set dst (AddVS src (LoadVector mem))); 3272 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 3273 ins_encode %{ 3274 bool vector256 = false; 3275 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3276 %} 3277 ins_pipe( pipe_slow ); 3278 %} 3279 3280 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 3281 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3282 match(Set dst (AddVS src1 src2)); 3283 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 3284 ins_encode %{ 3285 bool vector256 = true; 3286 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3287 %} 3288 ins_pipe( pipe_slow ); 3289 %} 3290 3291 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 3292 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3293 match(Set dst (AddVS src (LoadVector mem))); 3294 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 3295 ins_encode %{ 3296 bool vector256 = true; 3297 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3298 %} 3299 ins_pipe( pipe_slow ); 3300 %} 3301 3302 // Integers vector add 3303 instruct vadd2I(vecD dst, vecD src) %{ 3304 predicate(n->as_Vector()->length() == 2); 3305 match(Set dst (AddVI dst src)); 3306 format %{ "paddd $dst,$src\t! add packed2I" %} 3307 ins_encode %{ 3308 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 3309 %} 3310 ins_pipe( pipe_slow ); 3311 %} 3312 3313 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 3314 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3315 match(Set dst (AddVI src1 src2)); 3316 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 3317 ins_encode %{ 3318 bool vector256 = false; 3319 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3320 %} 3321 ins_pipe( pipe_slow ); 3322 %} 3323 3324 instruct vadd4I(vecX dst, vecX src) %{ 3325 predicate(n->as_Vector()->length() == 4); 3326 match(Set dst (AddVI dst src)); 3327 format %{ "paddd $dst,$src\t! add packed4I" %} 3328 ins_encode %{ 3329 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 3330 %} 3331 ins_pipe( pipe_slow ); 3332 %} 3333 3334 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 3335 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3336 match(Set dst (AddVI src1 src2)); 3337 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 3338 ins_encode %{ 3339 bool vector256 = false; 3340 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3341 %} 3342 ins_pipe( pipe_slow ); 3343 %} 3344 3345 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 3346 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3347 match(Set dst (AddVI src (LoadVector mem))); 3348 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 3349 ins_encode %{ 3350 bool vector256 = false; 3351 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 3357 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3358 match(Set dst (AddVI src1 src2)); 3359 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 3360 ins_encode %{ 3361 bool vector256 = true; 3362 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3363 %} 3364 ins_pipe( pipe_slow ); 3365 %} 3366 3367 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 3368 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3369 match(Set dst (AddVI src (LoadVector mem))); 3370 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 3371 ins_encode %{ 3372 bool vector256 = true; 3373 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3374 %} 3375 ins_pipe( pipe_slow ); 3376 %} 3377 3378 // Longs vector add 3379 instruct vadd2L(vecX dst, vecX src) %{ 3380 predicate(n->as_Vector()->length() == 2); 3381 match(Set dst (AddVL dst src)); 3382 format %{ "paddq $dst,$src\t! add packed2L" %} 3383 ins_encode %{ 3384 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 3385 %} 3386 ins_pipe( pipe_slow ); 3387 %} 3388 3389 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 3390 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3391 match(Set dst (AddVL src1 src2)); 3392 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 3393 ins_encode %{ 3394 bool vector256 = false; 3395 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3396 %} 3397 ins_pipe( pipe_slow ); 3398 %} 3399 3400 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 3401 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3402 match(Set dst (AddVL src (LoadVector mem))); 3403 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 3404 ins_encode %{ 3405 bool vector256 = false; 3406 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3407 %} 3408 ins_pipe( pipe_slow ); 3409 %} 3410 3411 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 3412 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3413 match(Set dst (AddVL src1 src2)); 3414 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 3415 ins_encode %{ 3416 bool vector256 = true; 3417 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3418 %} 3419 ins_pipe( pipe_slow ); 3420 %} 3421 3422 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 3423 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3424 match(Set dst (AddVL src (LoadVector mem))); 3425 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 3426 ins_encode %{ 3427 bool vector256 = true; 3428 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3429 %} 3430 ins_pipe( pipe_slow ); 3431 %} 3432 3433 // Floats vector add 3434 instruct vadd2F(vecD dst, vecD src) %{ 3435 predicate(n->as_Vector()->length() == 2); 3436 match(Set dst (AddVF dst src)); 3437 format %{ "addps $dst,$src\t! add packed2F" %} 3438 ins_encode %{ 3439 __ addps($dst$$XMMRegister, $src$$XMMRegister); 3440 %} 3441 ins_pipe( pipe_slow ); 3442 %} 3443 3444 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 3445 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3446 match(Set dst (AddVF src1 src2)); 3447 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 3448 ins_encode %{ 3449 bool vector256 = false; 3450 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3451 %} 3452 ins_pipe( pipe_slow ); 3453 %} 3454 3455 instruct vadd4F(vecX dst, vecX src) %{ 3456 predicate(n->as_Vector()->length() == 4); 3457 match(Set dst (AddVF dst src)); 3458 format %{ "addps $dst,$src\t! add packed4F" %} 3459 ins_encode %{ 3460 __ addps($dst$$XMMRegister, $src$$XMMRegister); 3461 %} 3462 ins_pipe( pipe_slow ); 3463 %} 3464 3465 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 3466 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3467 match(Set dst (AddVF src1 src2)); 3468 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 3469 ins_encode %{ 3470 bool vector256 = false; 3471 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3472 %} 3473 ins_pipe( pipe_slow ); 3474 %} 3475 3476 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 3477 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3478 match(Set dst (AddVF src (LoadVector mem))); 3479 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 3480 ins_encode %{ 3481 bool vector256 = false; 3482 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3483 %} 3484 ins_pipe( pipe_slow ); 3485 %} 3486 3487 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 3488 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3489 match(Set dst (AddVF src1 src2)); 3490 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 3491 ins_encode %{ 3492 bool vector256 = true; 3493 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3494 %} 3495 ins_pipe( pipe_slow ); 3496 %} 3497 3498 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 3499 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3500 match(Set dst (AddVF src (LoadVector mem))); 3501 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 3502 ins_encode %{ 3503 bool vector256 = true; 3504 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3505 %} 3506 ins_pipe( pipe_slow ); 3507 %} 3508 3509 // Doubles vector add 3510 instruct vadd2D(vecX dst, vecX src) %{ 3511 predicate(n->as_Vector()->length() == 2); 3512 match(Set dst (AddVD dst src)); 3513 format %{ "addpd $dst,$src\t! add packed2D" %} 3514 ins_encode %{ 3515 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 3516 %} 3517 ins_pipe( pipe_slow ); 3518 %} 3519 3520 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 3521 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3522 match(Set dst (AddVD src1 src2)); 3523 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 3524 ins_encode %{ 3525 bool vector256 = false; 3526 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3527 %} 3528 ins_pipe( pipe_slow ); 3529 %} 3530 3531 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 3532 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3533 match(Set dst (AddVD src (LoadVector mem))); 3534 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 3535 ins_encode %{ 3536 bool vector256 = false; 3537 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3538 %} 3539 ins_pipe( pipe_slow ); 3540 %} 3541 3542 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 3543 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3544 match(Set dst (AddVD src1 src2)); 3545 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 3546 ins_encode %{ 3547 bool vector256 = true; 3548 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3549 %} 3550 ins_pipe( pipe_slow ); 3551 %} 3552 3553 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 3554 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3555 match(Set dst (AddVD src (LoadVector mem))); 3556 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 3557 ins_encode %{ 3558 bool vector256 = true; 3559 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3560 %} 3561 ins_pipe( pipe_slow ); 3562 %} 3563 3564 // --------------------------------- SUB -------------------------------------- 3565 3566 // Bytes vector sub 3567 instruct vsub4B(vecS dst, vecS src) %{ 3568 predicate(n->as_Vector()->length() == 4); 3569 match(Set dst (SubVB dst src)); 3570 format %{ "psubb $dst,$src\t! sub packed4B" %} 3571 ins_encode %{ 3572 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3573 %} 3574 ins_pipe( pipe_slow ); 3575 %} 3576 3577 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 3578 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3579 match(Set dst (SubVB src1 src2)); 3580 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 3581 ins_encode %{ 3582 bool vector256 = false; 3583 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3584 %} 3585 ins_pipe( pipe_slow ); 3586 %} 3587 3588 instruct vsub8B(vecD dst, vecD src) %{ 3589 predicate(n->as_Vector()->length() == 8); 3590 match(Set dst (SubVB dst src)); 3591 format %{ "psubb $dst,$src\t! sub packed8B" %} 3592 ins_encode %{ 3593 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3594 %} 3595 ins_pipe( pipe_slow ); 3596 %} 3597 3598 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 3599 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3600 match(Set dst (SubVB src1 src2)); 3601 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 3602 ins_encode %{ 3603 bool vector256 = false; 3604 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3605 %} 3606 ins_pipe( pipe_slow ); 3607 %} 3608 3609 instruct vsub16B(vecX dst, vecX src) %{ 3610 predicate(n->as_Vector()->length() == 16); 3611 match(Set dst (SubVB dst src)); 3612 format %{ "psubb $dst,$src\t! sub packed16B" %} 3613 ins_encode %{ 3614 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3615 %} 3616 ins_pipe( pipe_slow ); 3617 %} 3618 3619 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 3620 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3621 match(Set dst (SubVB src1 src2)); 3622 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 3623 ins_encode %{ 3624 bool vector256 = false; 3625 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3626 %} 3627 ins_pipe( pipe_slow ); 3628 %} 3629 3630 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 3631 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3632 match(Set dst (SubVB src (LoadVector mem))); 3633 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 3634 ins_encode %{ 3635 bool vector256 = false; 3636 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3637 %} 3638 ins_pipe( pipe_slow ); 3639 %} 3640 3641 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 3642 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3643 match(Set dst (SubVB src1 src2)); 3644 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 3645 ins_encode %{ 3646 bool vector256 = true; 3647 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3648 %} 3649 ins_pipe( pipe_slow ); 3650 %} 3651 3652 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 3653 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3654 match(Set dst (SubVB src (LoadVector mem))); 3655 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 3656 ins_encode %{ 3657 bool vector256 = true; 3658 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3659 %} 3660 ins_pipe( pipe_slow ); 3661 %} 3662 3663 // Shorts/Chars vector sub 3664 instruct vsub2S(vecS dst, vecS src) %{ 3665 predicate(n->as_Vector()->length() == 2); 3666 match(Set dst (SubVS dst src)); 3667 format %{ "psubw $dst,$src\t! sub packed2S" %} 3668 ins_encode %{ 3669 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3670 %} 3671 ins_pipe( pipe_slow ); 3672 %} 3673 3674 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 3675 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3676 match(Set dst (SubVS src1 src2)); 3677 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 3678 ins_encode %{ 3679 bool vector256 = false; 3680 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3681 %} 3682 ins_pipe( pipe_slow ); 3683 %} 3684 3685 instruct vsub4S(vecD dst, vecD src) %{ 3686 predicate(n->as_Vector()->length() == 4); 3687 match(Set dst (SubVS dst src)); 3688 format %{ "psubw $dst,$src\t! sub packed4S" %} 3689 ins_encode %{ 3690 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3691 %} 3692 ins_pipe( pipe_slow ); 3693 %} 3694 3695 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 3696 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3697 match(Set dst (SubVS src1 src2)); 3698 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 3699 ins_encode %{ 3700 bool vector256 = false; 3701 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3702 %} 3703 ins_pipe( pipe_slow ); 3704 %} 3705 3706 instruct vsub8S(vecX dst, vecX src) %{ 3707 predicate(n->as_Vector()->length() == 8); 3708 match(Set dst (SubVS dst src)); 3709 format %{ "psubw $dst,$src\t! sub packed8S" %} 3710 ins_encode %{ 3711 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3712 %} 3713 ins_pipe( pipe_slow ); 3714 %} 3715 3716 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3717 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3718 match(Set dst (SubVS src1 src2)); 3719 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3720 ins_encode %{ 3721 bool vector256 = false; 3722 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3723 %} 3724 ins_pipe( pipe_slow ); 3725 %} 3726 3727 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3728 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3729 match(Set dst (SubVS src (LoadVector mem))); 3730 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3731 ins_encode %{ 3732 bool vector256 = false; 3733 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3734 %} 3735 ins_pipe( pipe_slow ); 3736 %} 3737 3738 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3739 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3740 match(Set dst (SubVS src1 src2)); 3741 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3742 ins_encode %{ 3743 bool vector256 = true; 3744 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3745 %} 3746 ins_pipe( pipe_slow ); 3747 %} 3748 3749 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3750 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3751 match(Set dst (SubVS src (LoadVector mem))); 3752 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3753 ins_encode %{ 3754 bool vector256 = true; 3755 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3756 %} 3757 ins_pipe( pipe_slow ); 3758 %} 3759 3760 // Integers vector sub 3761 instruct vsub2I(vecD dst, vecD src) %{ 3762 predicate(n->as_Vector()->length() == 2); 3763 match(Set dst (SubVI dst src)); 3764 format %{ "psubd $dst,$src\t! sub packed2I" %} 3765 ins_encode %{ 3766 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3767 %} 3768 ins_pipe( pipe_slow ); 3769 %} 3770 3771 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3772 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3773 match(Set dst (SubVI src1 src2)); 3774 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3775 ins_encode %{ 3776 bool vector256 = false; 3777 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3778 %} 3779 ins_pipe( pipe_slow ); 3780 %} 3781 3782 instruct vsub4I(vecX dst, vecX src) %{ 3783 predicate(n->as_Vector()->length() == 4); 3784 match(Set dst (SubVI dst src)); 3785 format %{ "psubd $dst,$src\t! sub packed4I" %} 3786 ins_encode %{ 3787 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3788 %} 3789 ins_pipe( pipe_slow ); 3790 %} 3791 3792 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3793 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3794 match(Set dst (SubVI src1 src2)); 3795 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3796 ins_encode %{ 3797 bool vector256 = false; 3798 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3799 %} 3800 ins_pipe( pipe_slow ); 3801 %} 3802 3803 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3804 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3805 match(Set dst (SubVI src (LoadVector mem))); 3806 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3807 ins_encode %{ 3808 bool vector256 = false; 3809 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3810 %} 3811 ins_pipe( pipe_slow ); 3812 %} 3813 3814 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3815 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3816 match(Set dst (SubVI src1 src2)); 3817 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3818 ins_encode %{ 3819 bool vector256 = true; 3820 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3821 %} 3822 ins_pipe( pipe_slow ); 3823 %} 3824 3825 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3826 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3827 match(Set dst (SubVI src (LoadVector mem))); 3828 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3829 ins_encode %{ 3830 bool vector256 = true; 3831 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3832 %} 3833 ins_pipe( pipe_slow ); 3834 %} 3835 3836 // Longs vector sub 3837 instruct vsub2L(vecX dst, vecX src) %{ 3838 predicate(n->as_Vector()->length() == 2); 3839 match(Set dst (SubVL dst src)); 3840 format %{ "psubq $dst,$src\t! sub packed2L" %} 3841 ins_encode %{ 3842 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3843 %} 3844 ins_pipe( pipe_slow ); 3845 %} 3846 3847 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3848 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3849 match(Set dst (SubVL src1 src2)); 3850 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3851 ins_encode %{ 3852 bool vector256 = false; 3853 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3859 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3860 match(Set dst (SubVL src (LoadVector mem))); 3861 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3862 ins_encode %{ 3863 bool vector256 = false; 3864 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3865 %} 3866 ins_pipe( pipe_slow ); 3867 %} 3868 3869 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3870 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3871 match(Set dst (SubVL src1 src2)); 3872 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3873 ins_encode %{ 3874 bool vector256 = true; 3875 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3876 %} 3877 ins_pipe( pipe_slow ); 3878 %} 3879 3880 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3881 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3882 match(Set dst (SubVL src (LoadVector mem))); 3883 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3884 ins_encode %{ 3885 bool vector256 = true; 3886 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3887 %} 3888 ins_pipe( pipe_slow ); 3889 %} 3890 3891 // Floats vector sub 3892 instruct vsub2F(vecD dst, vecD src) %{ 3893 predicate(n->as_Vector()->length() == 2); 3894 match(Set dst (SubVF dst src)); 3895 format %{ "subps $dst,$src\t! sub packed2F" %} 3896 ins_encode %{ 3897 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3903 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3904 match(Set dst (SubVF src1 src2)); 3905 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3906 ins_encode %{ 3907 bool vector256 = false; 3908 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3909 %} 3910 ins_pipe( pipe_slow ); 3911 %} 3912 3913 instruct vsub4F(vecX dst, vecX src) %{ 3914 predicate(n->as_Vector()->length() == 4); 3915 match(Set dst (SubVF dst src)); 3916 format %{ "subps $dst,$src\t! sub packed4F" %} 3917 ins_encode %{ 3918 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3919 %} 3920 ins_pipe( pipe_slow ); 3921 %} 3922 3923 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3924 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3925 match(Set dst (SubVF src1 src2)); 3926 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3927 ins_encode %{ 3928 bool vector256 = false; 3929 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3930 %} 3931 ins_pipe( pipe_slow ); 3932 %} 3933 3934 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3935 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3936 match(Set dst (SubVF src (LoadVector mem))); 3937 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3938 ins_encode %{ 3939 bool vector256 = false; 3940 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3941 %} 3942 ins_pipe( pipe_slow ); 3943 %} 3944 3945 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3946 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3947 match(Set dst (SubVF src1 src2)); 3948 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3949 ins_encode %{ 3950 bool vector256 = true; 3951 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3952 %} 3953 ins_pipe( pipe_slow ); 3954 %} 3955 3956 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3957 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3958 match(Set dst (SubVF src (LoadVector mem))); 3959 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3960 ins_encode %{ 3961 bool vector256 = true; 3962 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3963 %} 3964 ins_pipe( pipe_slow ); 3965 %} 3966 3967 // Doubles vector sub 3968 instruct vsub2D(vecX dst, vecX src) %{ 3969 predicate(n->as_Vector()->length() == 2); 3970 match(Set dst (SubVD dst src)); 3971 format %{ "subpd $dst,$src\t! sub packed2D" %} 3972 ins_encode %{ 3973 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3974 %} 3975 ins_pipe( pipe_slow ); 3976 %} 3977 3978 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3979 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3980 match(Set dst (SubVD src1 src2)); 3981 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3982 ins_encode %{ 3983 bool vector256 = false; 3984 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3985 %} 3986 ins_pipe( pipe_slow ); 3987 %} 3988 3989 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3990 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3991 match(Set dst (SubVD src (LoadVector mem))); 3992 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3993 ins_encode %{ 3994 bool vector256 = false; 3995 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3996 %} 3997 ins_pipe( pipe_slow ); 3998 %} 3999 4000 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 4001 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4002 match(Set dst (SubVD src1 src2)); 4003 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 4004 ins_encode %{ 4005 bool vector256 = true; 4006 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4007 %} 4008 ins_pipe( pipe_slow ); 4009 %} 4010 4011 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 4012 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4013 match(Set dst (SubVD src (LoadVector mem))); 4014 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 4015 ins_encode %{ 4016 bool vector256 = true; 4017 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4018 %} 4019 ins_pipe( pipe_slow ); 4020 %} 4021 4022 // --------------------------------- MUL -------------------------------------- 4023 4024 // Shorts/Chars vector mul 4025 instruct vmul2S(vecS dst, vecS src) %{ 4026 predicate(n->as_Vector()->length() == 2); 4027 match(Set dst (MulVS dst src)); 4028 format %{ "pmullw $dst,$src\t! mul packed2S" %} 4029 ins_encode %{ 4030 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4031 %} 4032 ins_pipe( pipe_slow ); 4033 %} 4034 4035 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 4036 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4037 match(Set dst (MulVS src1 src2)); 4038 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 4039 ins_encode %{ 4040 bool vector256 = false; 4041 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4042 %} 4043 ins_pipe( pipe_slow ); 4044 %} 4045 4046 instruct vmul4S(vecD dst, vecD src) %{ 4047 predicate(n->as_Vector()->length() == 4); 4048 match(Set dst (MulVS dst src)); 4049 format %{ "pmullw $dst,$src\t! mul packed4S" %} 4050 ins_encode %{ 4051 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4052 %} 4053 ins_pipe( pipe_slow ); 4054 %} 4055 4056 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 4057 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4058 match(Set dst (MulVS src1 src2)); 4059 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 4060 ins_encode %{ 4061 bool vector256 = false; 4062 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 instruct vmul8S(vecX dst, vecX src) %{ 4068 predicate(n->as_Vector()->length() == 8); 4069 match(Set dst (MulVS dst src)); 4070 format %{ "pmullw $dst,$src\t! mul packed8S" %} 4071 ins_encode %{ 4072 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4073 %} 4074 ins_pipe( pipe_slow ); 4075 %} 4076 4077 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 4078 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4079 match(Set dst (MulVS src1 src2)); 4080 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 4081 ins_encode %{ 4082 bool vector256 = false; 4083 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4084 %} 4085 ins_pipe( pipe_slow ); 4086 %} 4087 4088 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 4089 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4090 match(Set dst (MulVS src (LoadVector mem))); 4091 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 4092 ins_encode %{ 4093 bool vector256 = false; 4094 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4095 %} 4096 ins_pipe( pipe_slow ); 4097 %} 4098 4099 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 4100 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4101 match(Set dst (MulVS src1 src2)); 4102 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 4103 ins_encode %{ 4104 bool vector256 = true; 4105 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4106 %} 4107 ins_pipe( pipe_slow ); 4108 %} 4109 4110 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 4111 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4112 match(Set dst (MulVS src (LoadVector mem))); 4113 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 4114 ins_encode %{ 4115 bool vector256 = true; 4116 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4117 %} 4118 ins_pipe( pipe_slow ); 4119 %} 4120 4121 // Integers vector mul (sse4_1) 4122 instruct vmul2I(vecD dst, vecD src) %{ 4123 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 4124 match(Set dst (MulVI dst src)); 4125 format %{ "pmulld $dst,$src\t! mul packed2I" %} 4126 ins_encode %{ 4127 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 4128 %} 4129 ins_pipe( pipe_slow ); 4130 %} 4131 4132 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 4133 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4134 match(Set dst (MulVI src1 src2)); 4135 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 4136 ins_encode %{ 4137 bool vector256 = false; 4138 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4139 %} 4140 ins_pipe( pipe_slow ); 4141 %} 4142 4143 instruct vmul4I(vecX dst, vecX src) %{ 4144 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 4145 match(Set dst (MulVI dst src)); 4146 format %{ "pmulld $dst,$src\t! mul packed4I" %} 4147 ins_encode %{ 4148 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 4154 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4155 match(Set dst (MulVI src1 src2)); 4156 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 4157 ins_encode %{ 4158 bool vector256 = false; 4159 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 4164 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 4165 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4166 match(Set dst (MulVI src (LoadVector mem))); 4167 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 4168 ins_encode %{ 4169 bool vector256 = false; 4170 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4171 %} 4172 ins_pipe( pipe_slow ); 4173 %} 4174 4175 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 4176 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4177 match(Set dst (MulVI src1 src2)); 4178 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 4179 ins_encode %{ 4180 bool vector256 = true; 4181 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4182 %} 4183 ins_pipe( pipe_slow ); 4184 %} 4185 4186 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 4187 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4188 match(Set dst (MulVI src (LoadVector mem))); 4189 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 4190 ins_encode %{ 4191 bool vector256 = true; 4192 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 // Floats vector mul 4198 instruct vmul2F(vecD dst, vecD src) %{ 4199 predicate(n->as_Vector()->length() == 2); 4200 match(Set dst (MulVF dst src)); 4201 format %{ "mulps $dst,$src\t! mul packed2F" %} 4202 ins_encode %{ 4203 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 4204 %} 4205 ins_pipe( pipe_slow ); 4206 %} 4207 4208 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 4209 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4210 match(Set dst (MulVF src1 src2)); 4211 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 4212 ins_encode %{ 4213 bool vector256 = false; 4214 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4215 %} 4216 ins_pipe( pipe_slow ); 4217 %} 4218 4219 instruct vmul4F(vecX dst, vecX src) %{ 4220 predicate(n->as_Vector()->length() == 4); 4221 match(Set dst (MulVF dst src)); 4222 format %{ "mulps $dst,$src\t! mul packed4F" %} 4223 ins_encode %{ 4224 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 4225 %} 4226 ins_pipe( pipe_slow ); 4227 %} 4228 4229 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 4230 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4231 match(Set dst (MulVF src1 src2)); 4232 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 4233 ins_encode %{ 4234 bool vector256 = false; 4235 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4236 %} 4237 ins_pipe( pipe_slow ); 4238 %} 4239 4240 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 4241 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4242 match(Set dst (MulVF src (LoadVector mem))); 4243 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 4244 ins_encode %{ 4245 bool vector256 = false; 4246 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4247 %} 4248 ins_pipe( pipe_slow ); 4249 %} 4250 4251 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 4252 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4253 match(Set dst (MulVF src1 src2)); 4254 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 4255 ins_encode %{ 4256 bool vector256 = true; 4257 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4258 %} 4259 ins_pipe( pipe_slow ); 4260 %} 4261 4262 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 4263 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4264 match(Set dst (MulVF src (LoadVector mem))); 4265 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 4266 ins_encode %{ 4267 bool vector256 = true; 4268 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4269 %} 4270 ins_pipe( pipe_slow ); 4271 %} 4272 4273 // Doubles vector mul 4274 instruct vmul2D(vecX dst, vecX src) %{ 4275 predicate(n->as_Vector()->length() == 2); 4276 match(Set dst (MulVD dst src)); 4277 format %{ "mulpd $dst,$src\t! mul packed2D" %} 4278 ins_encode %{ 4279 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 4280 %} 4281 ins_pipe( pipe_slow ); 4282 %} 4283 4284 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 4285 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4286 match(Set dst (MulVD src1 src2)); 4287 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 4288 ins_encode %{ 4289 bool vector256 = false; 4290 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4291 %} 4292 ins_pipe( pipe_slow ); 4293 %} 4294 4295 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 4296 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4297 match(Set dst (MulVD src (LoadVector mem))); 4298 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 4299 ins_encode %{ 4300 bool vector256 = false; 4301 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4302 %} 4303 ins_pipe( pipe_slow ); 4304 %} 4305 4306 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 4307 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4308 match(Set dst (MulVD src1 src2)); 4309 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 4310 ins_encode %{ 4311 bool vector256 = true; 4312 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 4317 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 4318 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4319 match(Set dst (MulVD src (LoadVector mem))); 4320 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 4321 ins_encode %{ 4322 bool vector256 = true; 4323 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4324 %} 4325 ins_pipe( pipe_slow ); 4326 %} 4327 4328 // --------------------------------- DIV -------------------------------------- 4329 4330 // Floats vector div 4331 instruct vdiv2F(vecD dst, vecD src) %{ 4332 predicate(n->as_Vector()->length() == 2); 4333 match(Set dst (DivVF dst src)); 4334 format %{ "divps $dst,$src\t! div packed2F" %} 4335 ins_encode %{ 4336 __ divps($dst$$XMMRegister, $src$$XMMRegister); 4337 %} 4338 ins_pipe( pipe_slow ); 4339 %} 4340 4341 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 4342 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4343 match(Set dst (DivVF src1 src2)); 4344 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 4345 ins_encode %{ 4346 bool vector256 = false; 4347 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct vdiv4F(vecX dst, vecX src) %{ 4353 predicate(n->as_Vector()->length() == 4); 4354 match(Set dst (DivVF dst src)); 4355 format %{ "divps $dst,$src\t! div packed4F" %} 4356 ins_encode %{ 4357 __ divps($dst$$XMMRegister, $src$$XMMRegister); 4358 %} 4359 ins_pipe( pipe_slow ); 4360 %} 4361 4362 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 4363 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4364 match(Set dst (DivVF src1 src2)); 4365 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 4366 ins_encode %{ 4367 bool vector256 = false; 4368 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4369 %} 4370 ins_pipe( pipe_slow ); 4371 %} 4372 4373 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 4374 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4375 match(Set dst (DivVF src (LoadVector mem))); 4376 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 4377 ins_encode %{ 4378 bool vector256 = false; 4379 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4380 %} 4381 ins_pipe( pipe_slow ); 4382 %} 4383 4384 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 4385 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4386 match(Set dst (DivVF src1 src2)); 4387 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 4388 ins_encode %{ 4389 bool vector256 = true; 4390 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4391 %} 4392 ins_pipe( pipe_slow ); 4393 %} 4394 4395 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 4396 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4397 match(Set dst (DivVF src (LoadVector mem))); 4398 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 4399 ins_encode %{ 4400 bool vector256 = true; 4401 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4402 %} 4403 ins_pipe( pipe_slow ); 4404 %} 4405 4406 // Doubles vector div 4407 instruct vdiv2D(vecX dst, vecX src) %{ 4408 predicate(n->as_Vector()->length() == 2); 4409 match(Set dst (DivVD dst src)); 4410 format %{ "divpd $dst,$src\t! div packed2D" %} 4411 ins_encode %{ 4412 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 4413 %} 4414 ins_pipe( pipe_slow ); 4415 %} 4416 4417 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 4418 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4419 match(Set dst (DivVD src1 src2)); 4420 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 4421 ins_encode %{ 4422 bool vector256 = false; 4423 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4424 %} 4425 ins_pipe( pipe_slow ); 4426 %} 4427 4428 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 4429 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4430 match(Set dst (DivVD src (LoadVector mem))); 4431 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 4432 ins_encode %{ 4433 bool vector256 = false; 4434 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4435 %} 4436 ins_pipe( pipe_slow ); 4437 %} 4438 4439 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 4440 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4441 match(Set dst (DivVD src1 src2)); 4442 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 4443 ins_encode %{ 4444 bool vector256 = true; 4445 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4446 %} 4447 ins_pipe( pipe_slow ); 4448 %} 4449 4450 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 4451 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4452 match(Set dst (DivVD src (LoadVector mem))); 4453 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 4454 ins_encode %{ 4455 bool vector256 = true; 4456 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4457 %} 4458 ins_pipe( pipe_slow ); 4459 %} 4460 4461 // ------------------------------ Shift --------------------------------------- 4462 4463 // Left and right shift count vectors are the same on x86 4464 // (only lowest bits of xmm reg are used for count). 4465 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 4466 match(Set dst (LShiftCntV cnt)); 4467 match(Set dst (RShiftCntV cnt)); 4468 format %{ "movd $dst,$cnt\t! load shift count" %} 4469 ins_encode %{ 4470 __ movdl($dst$$XMMRegister, $cnt$$Register); 4471 %} 4472 ins_pipe( pipe_slow ); 4473 %} 4474 4475 // ------------------------------ LeftShift ----------------------------------- 4476 4477 // Shorts/Chars vector left shift 4478 instruct vsll2S(vecS dst, vecS shift) %{ 4479 predicate(n->as_Vector()->length() == 2); 4480 match(Set dst (LShiftVS dst shift)); 4481 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 4482 ins_encode %{ 4483 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 4484 %} 4485 ins_pipe( pipe_slow ); 4486 %} 4487 4488 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 4489 predicate(n->as_Vector()->length() == 2); 4490 match(Set dst (LShiftVS dst shift)); 4491 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 4492 ins_encode %{ 4493 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 4494 %} 4495 ins_pipe( pipe_slow ); 4496 %} 4497 4498 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 4499 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4500 match(Set dst (LShiftVS src shift)); 4501 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 4502 ins_encode %{ 4503 bool vector256 = false; 4504 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4505 %} 4506 ins_pipe( pipe_slow ); 4507 %} 4508 4509 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4510 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4511 match(Set dst (LShiftVS src shift)); 4512 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 4513 ins_encode %{ 4514 bool vector256 = false; 4515 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4516 %} 4517 ins_pipe( pipe_slow ); 4518 %} 4519 4520 instruct vsll4S(vecD dst, vecS shift) %{ 4521 predicate(n->as_Vector()->length() == 4); 4522 match(Set dst (LShiftVS dst shift)); 4523 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 4524 ins_encode %{ 4525 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 4531 predicate(n->as_Vector()->length() == 4); 4532 match(Set dst (LShiftVS dst shift)); 4533 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 4534 ins_encode %{ 4535 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 4536 %} 4537 ins_pipe( pipe_slow ); 4538 %} 4539 4540 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 4541 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4542 match(Set dst (LShiftVS src shift)); 4543 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 4544 ins_encode %{ 4545 bool vector256 = false; 4546 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4547 %} 4548 ins_pipe( pipe_slow ); 4549 %} 4550 4551 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4552 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4553 match(Set dst (LShiftVS src shift)); 4554 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 4555 ins_encode %{ 4556 bool vector256 = false; 4557 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4558 %} 4559 ins_pipe( pipe_slow ); 4560 %} 4561 4562 instruct vsll8S(vecX dst, vecS shift) %{ 4563 predicate(n->as_Vector()->length() == 8); 4564 match(Set dst (LShiftVS dst shift)); 4565 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 4566 ins_encode %{ 4567 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 4568 %} 4569 ins_pipe( pipe_slow ); 4570 %} 4571 4572 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 4573 predicate(n->as_Vector()->length() == 8); 4574 match(Set dst (LShiftVS dst shift)); 4575 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 4576 ins_encode %{ 4577 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 4578 %} 4579 ins_pipe( pipe_slow ); 4580 %} 4581 4582 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 4583 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4584 match(Set dst (LShiftVS src shift)); 4585 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4586 ins_encode %{ 4587 bool vector256 = false; 4588 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4589 %} 4590 ins_pipe( pipe_slow ); 4591 %} 4592 4593 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4594 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4595 match(Set dst (LShiftVS src shift)); 4596 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4597 ins_encode %{ 4598 bool vector256 = false; 4599 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4600 %} 4601 ins_pipe( pipe_slow ); 4602 %} 4603 4604 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 4605 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4606 match(Set dst (LShiftVS src shift)); 4607 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4608 ins_encode %{ 4609 bool vector256 = true; 4610 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4611 %} 4612 ins_pipe( pipe_slow ); 4613 %} 4614 4615 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4616 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4617 match(Set dst (LShiftVS src shift)); 4618 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4619 ins_encode %{ 4620 bool vector256 = true; 4621 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 4626 // Integers vector left shift 4627 instruct vsll2I(vecD dst, vecS shift) %{ 4628 predicate(n->as_Vector()->length() == 2); 4629 match(Set dst (LShiftVI dst shift)); 4630 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4631 ins_encode %{ 4632 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4633 %} 4634 ins_pipe( pipe_slow ); 4635 %} 4636 4637 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 4638 predicate(n->as_Vector()->length() == 2); 4639 match(Set dst (LShiftVI dst shift)); 4640 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4641 ins_encode %{ 4642 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4643 %} 4644 ins_pipe( pipe_slow ); 4645 %} 4646 4647 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 4648 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4649 match(Set dst (LShiftVI src shift)); 4650 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4651 ins_encode %{ 4652 bool vector256 = false; 4653 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4654 %} 4655 ins_pipe( pipe_slow ); 4656 %} 4657 4658 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4659 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4660 match(Set dst (LShiftVI src shift)); 4661 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4662 ins_encode %{ 4663 bool vector256 = false; 4664 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4665 %} 4666 ins_pipe( pipe_slow ); 4667 %} 4668 4669 instruct vsll4I(vecX dst, vecS shift) %{ 4670 predicate(n->as_Vector()->length() == 4); 4671 match(Set dst (LShiftVI dst shift)); 4672 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4673 ins_encode %{ 4674 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4675 %} 4676 ins_pipe( pipe_slow ); 4677 %} 4678 4679 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 4680 predicate(n->as_Vector()->length() == 4); 4681 match(Set dst (LShiftVI dst shift)); 4682 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4683 ins_encode %{ 4684 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 4690 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4691 match(Set dst (LShiftVI src shift)); 4692 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4693 ins_encode %{ 4694 bool vector256 = false; 4695 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4696 %} 4697 ins_pipe( pipe_slow ); 4698 %} 4699 4700 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4701 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4702 match(Set dst (LShiftVI src shift)); 4703 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4704 ins_encode %{ 4705 bool vector256 = false; 4706 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 4712 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4713 match(Set dst (LShiftVI src shift)); 4714 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4715 ins_encode %{ 4716 bool vector256 = true; 4717 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4723 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4724 match(Set dst (LShiftVI src shift)); 4725 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4726 ins_encode %{ 4727 bool vector256 = true; 4728 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4729 %} 4730 ins_pipe( pipe_slow ); 4731 %} 4732 4733 // Longs vector left shift 4734 instruct vsll2L(vecX dst, vecS shift) %{ 4735 predicate(n->as_Vector()->length() == 2); 4736 match(Set dst (LShiftVL dst shift)); 4737 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4738 ins_encode %{ 4739 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4740 %} 4741 ins_pipe( pipe_slow ); 4742 %} 4743 4744 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4745 predicate(n->as_Vector()->length() == 2); 4746 match(Set dst (LShiftVL dst shift)); 4747 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4748 ins_encode %{ 4749 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4750 %} 4751 ins_pipe( pipe_slow ); 4752 %} 4753 4754 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 4755 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4756 match(Set dst (LShiftVL src shift)); 4757 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4758 ins_encode %{ 4759 bool vector256 = false; 4760 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4761 %} 4762 ins_pipe( pipe_slow ); 4763 %} 4764 4765 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4766 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4767 match(Set dst (LShiftVL src shift)); 4768 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4769 ins_encode %{ 4770 bool vector256 = false; 4771 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4772 %} 4773 ins_pipe( pipe_slow ); 4774 %} 4775 4776 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 4777 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4778 match(Set dst (LShiftVL src shift)); 4779 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4780 ins_encode %{ 4781 bool vector256 = true; 4782 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4783 %} 4784 ins_pipe( pipe_slow ); 4785 %} 4786 4787 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4788 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4789 match(Set dst (LShiftVL src shift)); 4790 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4791 ins_encode %{ 4792 bool vector256 = true; 4793 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4794 %} 4795 ins_pipe( pipe_slow ); 4796 %} 4797 4798 // ----------------------- LogicalRightShift ----------------------------------- 4799 4800 // Shorts vector logical right shift produces incorrect Java result 4801 // for negative data because java code convert short value into int with 4802 // sign extension before a shift. But char vectors are fine since chars are 4803 // unsigned values. 4804 4805 instruct vsrl2S(vecS dst, vecS shift) %{ 4806 predicate(n->as_Vector()->length() == 2); 4807 match(Set dst (URShiftVS dst shift)); 4808 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4809 ins_encode %{ 4810 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4811 %} 4812 ins_pipe( pipe_slow ); 4813 %} 4814 4815 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 4816 predicate(n->as_Vector()->length() == 2); 4817 match(Set dst (URShiftVS dst shift)); 4818 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4819 ins_encode %{ 4820 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4821 %} 4822 ins_pipe( pipe_slow ); 4823 %} 4824 4825 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 4826 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4827 match(Set dst (URShiftVS src shift)); 4828 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4829 ins_encode %{ 4830 bool vector256 = false; 4831 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4832 %} 4833 ins_pipe( pipe_slow ); 4834 %} 4835 4836 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4837 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4838 match(Set dst (URShiftVS src shift)); 4839 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4840 ins_encode %{ 4841 bool vector256 = false; 4842 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4843 %} 4844 ins_pipe( pipe_slow ); 4845 %} 4846 4847 instruct vsrl4S(vecD dst, vecS shift) %{ 4848 predicate(n->as_Vector()->length() == 4); 4849 match(Set dst (URShiftVS dst shift)); 4850 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4851 ins_encode %{ 4852 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4853 %} 4854 ins_pipe( pipe_slow ); 4855 %} 4856 4857 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 4858 predicate(n->as_Vector()->length() == 4); 4859 match(Set dst (URShiftVS dst shift)); 4860 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4861 ins_encode %{ 4862 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4863 %} 4864 ins_pipe( pipe_slow ); 4865 %} 4866 4867 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 4868 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4869 match(Set dst (URShiftVS src shift)); 4870 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4871 ins_encode %{ 4872 bool vector256 = false; 4873 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4874 %} 4875 ins_pipe( pipe_slow ); 4876 %} 4877 4878 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4879 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4880 match(Set dst (URShiftVS src shift)); 4881 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4882 ins_encode %{ 4883 bool vector256 = false; 4884 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4885 %} 4886 ins_pipe( pipe_slow ); 4887 %} 4888 4889 instruct vsrl8S(vecX dst, vecS shift) %{ 4890 predicate(n->as_Vector()->length() == 8); 4891 match(Set dst (URShiftVS dst shift)); 4892 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4893 ins_encode %{ 4894 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4895 %} 4896 ins_pipe( pipe_slow ); 4897 %} 4898 4899 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 4900 predicate(n->as_Vector()->length() == 8); 4901 match(Set dst (URShiftVS dst shift)); 4902 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4903 ins_encode %{ 4904 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4905 %} 4906 ins_pipe( pipe_slow ); 4907 %} 4908 4909 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 4910 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4911 match(Set dst (URShiftVS src shift)); 4912 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4913 ins_encode %{ 4914 bool vector256 = false; 4915 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4916 %} 4917 ins_pipe( pipe_slow ); 4918 %} 4919 4920 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4921 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4922 match(Set dst (URShiftVS src shift)); 4923 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4924 ins_encode %{ 4925 bool vector256 = false; 4926 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4927 %} 4928 ins_pipe( pipe_slow ); 4929 %} 4930 4931 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 4932 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4933 match(Set dst (URShiftVS src shift)); 4934 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4935 ins_encode %{ 4936 bool vector256 = true; 4937 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4938 %} 4939 ins_pipe( pipe_slow ); 4940 %} 4941 4942 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4943 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4944 match(Set dst (URShiftVS src shift)); 4945 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4946 ins_encode %{ 4947 bool vector256 = true; 4948 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4949 %} 4950 ins_pipe( pipe_slow ); 4951 %} 4952 4953 // Integers vector logical right shift 4954 instruct vsrl2I(vecD dst, vecS shift) %{ 4955 predicate(n->as_Vector()->length() == 2); 4956 match(Set dst (URShiftVI dst shift)); 4957 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4958 ins_encode %{ 4959 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4965 predicate(n->as_Vector()->length() == 2); 4966 match(Set dst (URShiftVI dst shift)); 4967 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4968 ins_encode %{ 4969 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4970 %} 4971 ins_pipe( pipe_slow ); 4972 %} 4973 4974 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 4975 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4976 match(Set dst (URShiftVI src shift)); 4977 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4978 ins_encode %{ 4979 bool vector256 = false; 4980 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4981 %} 4982 ins_pipe( pipe_slow ); 4983 %} 4984 4985 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4986 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4987 match(Set dst (URShiftVI src shift)); 4988 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4989 ins_encode %{ 4990 bool vector256 = false; 4991 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 instruct vsrl4I(vecX dst, vecS shift) %{ 4997 predicate(n->as_Vector()->length() == 4); 4998 match(Set dst (URShiftVI dst shift)); 4999 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 5000 ins_encode %{ 5001 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 5002 %} 5003 ins_pipe( pipe_slow ); 5004 %} 5005 5006 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 5007 predicate(n->as_Vector()->length() == 4); 5008 match(Set dst (URShiftVI dst shift)); 5009 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 5010 ins_encode %{ 5011 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 5012 %} 5013 ins_pipe( pipe_slow ); 5014 %} 5015 5016 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 5017 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5018 match(Set dst (URShiftVI src shift)); 5019 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 5020 ins_encode %{ 5021 bool vector256 = false; 5022 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5023 %} 5024 ins_pipe( pipe_slow ); 5025 %} 5026 5027 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 5028 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5029 match(Set dst (URShiftVI src shift)); 5030 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 5031 ins_encode %{ 5032 bool vector256 = false; 5033 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5034 %} 5035 ins_pipe( pipe_slow ); 5036 %} 5037 5038 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 5039 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5040 match(Set dst (URShiftVI src shift)); 5041 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 5042 ins_encode %{ 5043 bool vector256 = true; 5044 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5045 %} 5046 ins_pipe( pipe_slow ); 5047 %} 5048 5049 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 5050 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5051 match(Set dst (URShiftVI src shift)); 5052 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 5053 ins_encode %{ 5054 bool vector256 = true; 5055 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5056 %} 5057 ins_pipe( pipe_slow ); 5058 %} 5059 5060 // Longs vector logical right shift 5061 instruct vsrl2L(vecX dst, vecS shift) %{ 5062 predicate(n->as_Vector()->length() == 2); 5063 match(Set dst (URShiftVL dst shift)); 5064 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 5065 ins_encode %{ 5066 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 5067 %} 5068 ins_pipe( pipe_slow ); 5069 %} 5070 5071 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 5072 predicate(n->as_Vector()->length() == 2); 5073 match(Set dst (URShiftVL dst shift)); 5074 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 5075 ins_encode %{ 5076 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 5077 %} 5078 ins_pipe( pipe_slow ); 5079 %} 5080 5081 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 5082 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5083 match(Set dst (URShiftVL src shift)); 5084 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 5085 ins_encode %{ 5086 bool vector256 = false; 5087 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5088 %} 5089 ins_pipe( pipe_slow ); 5090 %} 5091 5092 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 5093 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5094 match(Set dst (URShiftVL src shift)); 5095 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 5096 ins_encode %{ 5097 bool vector256 = false; 5098 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5099 %} 5100 ins_pipe( pipe_slow ); 5101 %} 5102 5103 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 5104 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5105 match(Set dst (URShiftVL src shift)); 5106 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 5107 ins_encode %{ 5108 bool vector256 = true; 5109 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5110 %} 5111 ins_pipe( pipe_slow ); 5112 %} 5113 5114 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 5115 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5116 match(Set dst (URShiftVL src shift)); 5117 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 5118 ins_encode %{ 5119 bool vector256 = true; 5120 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5121 %} 5122 ins_pipe( pipe_slow ); 5123 %} 5124 5125 // ------------------- ArithmeticRightShift ----------------------------------- 5126 5127 // Shorts/Chars vector arithmetic right shift 5128 instruct vsra2S(vecS dst, vecS shift) %{ 5129 predicate(n->as_Vector()->length() == 2); 5130 match(Set dst (RShiftVS dst shift)); 5131 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 5132 ins_encode %{ 5133 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 5134 %} 5135 ins_pipe( pipe_slow ); 5136 %} 5137 5138 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 5139 predicate(n->as_Vector()->length() == 2); 5140 match(Set dst (RShiftVS dst shift)); 5141 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 5142 ins_encode %{ 5143 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 5144 %} 5145 ins_pipe( pipe_slow ); 5146 %} 5147 5148 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 5149 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5150 match(Set dst (RShiftVS src shift)); 5151 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 5152 ins_encode %{ 5153 bool vector256 = false; 5154 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5155 %} 5156 ins_pipe( pipe_slow ); 5157 %} 5158 5159 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 5160 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5161 match(Set dst (RShiftVS src shift)); 5162 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 5163 ins_encode %{ 5164 bool vector256 = false; 5165 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5166 %} 5167 ins_pipe( pipe_slow ); 5168 %} 5169 5170 instruct vsra4S(vecD dst, vecS shift) %{ 5171 predicate(n->as_Vector()->length() == 4); 5172 match(Set dst (RShiftVS dst shift)); 5173 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 5174 ins_encode %{ 5175 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 5176 %} 5177 ins_pipe( pipe_slow ); 5178 %} 5179 5180 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 5181 predicate(n->as_Vector()->length() == 4); 5182 match(Set dst (RShiftVS dst shift)); 5183 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 5184 ins_encode %{ 5185 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 5186 %} 5187 ins_pipe( pipe_slow ); 5188 %} 5189 5190 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 5191 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5192 match(Set dst (RShiftVS src shift)); 5193 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 5194 ins_encode %{ 5195 bool vector256 = false; 5196 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5197 %} 5198 ins_pipe( pipe_slow ); 5199 %} 5200 5201 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 5202 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5203 match(Set dst (RShiftVS src shift)); 5204 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 5205 ins_encode %{ 5206 bool vector256 = false; 5207 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5208 %} 5209 ins_pipe( pipe_slow ); 5210 %} 5211 5212 instruct vsra8S(vecX dst, vecS shift) %{ 5213 predicate(n->as_Vector()->length() == 8); 5214 match(Set dst (RShiftVS dst shift)); 5215 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 5216 ins_encode %{ 5217 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 5218 %} 5219 ins_pipe( pipe_slow ); 5220 %} 5221 5222 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 5223 predicate(n->as_Vector()->length() == 8); 5224 match(Set dst (RShiftVS dst shift)); 5225 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 5226 ins_encode %{ 5227 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 5228 %} 5229 ins_pipe( pipe_slow ); 5230 %} 5231 5232 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 5233 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5234 match(Set dst (RShiftVS src shift)); 5235 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 5236 ins_encode %{ 5237 bool vector256 = false; 5238 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5239 %} 5240 ins_pipe( pipe_slow ); 5241 %} 5242 5243 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 5244 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5245 match(Set dst (RShiftVS src shift)); 5246 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 5247 ins_encode %{ 5248 bool vector256 = false; 5249 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5250 %} 5251 ins_pipe( pipe_slow ); 5252 %} 5253 5254 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 5255 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5256 match(Set dst (RShiftVS src shift)); 5257 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 5258 ins_encode %{ 5259 bool vector256 = true; 5260 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5261 %} 5262 ins_pipe( pipe_slow ); 5263 %} 5264 5265 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 5266 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5267 match(Set dst (RShiftVS src shift)); 5268 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 5269 ins_encode %{ 5270 bool vector256 = true; 5271 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5272 %} 5273 ins_pipe( pipe_slow ); 5274 %} 5275 5276 // Integers vector arithmetic right shift 5277 instruct vsra2I(vecD dst, vecS shift) %{ 5278 predicate(n->as_Vector()->length() == 2); 5279 match(Set dst (RShiftVI dst shift)); 5280 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 5281 ins_encode %{ 5282 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 5283 %} 5284 ins_pipe( pipe_slow ); 5285 %} 5286 5287 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 5288 predicate(n->as_Vector()->length() == 2); 5289 match(Set dst (RShiftVI dst shift)); 5290 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 5291 ins_encode %{ 5292 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 5293 %} 5294 ins_pipe( pipe_slow ); 5295 %} 5296 5297 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 5298 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5299 match(Set dst (RShiftVI src shift)); 5300 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 5301 ins_encode %{ 5302 bool vector256 = false; 5303 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5304 %} 5305 ins_pipe( pipe_slow ); 5306 %} 5307 5308 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 5309 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5310 match(Set dst (RShiftVI src shift)); 5311 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 5312 ins_encode %{ 5313 bool vector256 = false; 5314 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5315 %} 5316 ins_pipe( pipe_slow ); 5317 %} 5318 5319 instruct vsra4I(vecX dst, vecS shift) %{ 5320 predicate(n->as_Vector()->length() == 4); 5321 match(Set dst (RShiftVI dst shift)); 5322 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 5323 ins_encode %{ 5324 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 5325 %} 5326 ins_pipe( pipe_slow ); 5327 %} 5328 5329 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 5330 predicate(n->as_Vector()->length() == 4); 5331 match(Set dst (RShiftVI dst shift)); 5332 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 5333 ins_encode %{ 5334 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 5335 %} 5336 ins_pipe( pipe_slow ); 5337 %} 5338 5339 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 5340 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5341 match(Set dst (RShiftVI src shift)); 5342 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 5343 ins_encode %{ 5344 bool vector256 = false; 5345 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5346 %} 5347 ins_pipe( pipe_slow ); 5348 %} 5349 5350 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 5351 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5352 match(Set dst (RShiftVI src shift)); 5353 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 5354 ins_encode %{ 5355 bool vector256 = false; 5356 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5357 %} 5358 ins_pipe( pipe_slow ); 5359 %} 5360 5361 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 5362 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5363 match(Set dst (RShiftVI src shift)); 5364 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 5365 ins_encode %{ 5366 bool vector256 = true; 5367 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5368 %} 5369 ins_pipe( pipe_slow ); 5370 %} 5371 5372 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 5373 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5374 match(Set dst (RShiftVI src shift)); 5375 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 5376 ins_encode %{ 5377 bool vector256 = true; 5378 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5379 %} 5380 ins_pipe( pipe_slow ); 5381 %} 5382 5383 // There are no longs vector arithmetic right shift instructions. 5384 5385 5386 // --------------------------------- AND -------------------------------------- 5387 5388 instruct vand4B(vecS dst, vecS src) %{ 5389 predicate(n->as_Vector()->length_in_bytes() == 4); 5390 match(Set dst (AndV dst src)); 5391 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 5392 ins_encode %{ 5393 __ pand($dst$$XMMRegister, $src$$XMMRegister); 5394 %} 5395 ins_pipe( pipe_slow ); 5396 %} 5397 5398 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 5399 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5400 match(Set dst (AndV src1 src2)); 5401 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 5402 ins_encode %{ 5403 bool vector256 = false; 5404 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5408 5409 instruct vand8B(vecD dst, vecD src) %{ 5410 predicate(n->as_Vector()->length_in_bytes() == 8); 5411 match(Set dst (AndV dst src)); 5412 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 5413 ins_encode %{ 5414 __ pand($dst$$XMMRegister, $src$$XMMRegister); 5415 %} 5416 ins_pipe( pipe_slow ); 5417 %} 5418 5419 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 5420 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5421 match(Set dst (AndV src1 src2)); 5422 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 5423 ins_encode %{ 5424 bool vector256 = false; 5425 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5426 %} 5427 ins_pipe( pipe_slow ); 5428 %} 5429 5430 instruct vand16B(vecX dst, vecX src) %{ 5431 predicate(n->as_Vector()->length_in_bytes() == 16); 5432 match(Set dst (AndV dst src)); 5433 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 5434 ins_encode %{ 5435 __ pand($dst$$XMMRegister, $src$$XMMRegister); 5436 %} 5437 ins_pipe( pipe_slow ); 5438 %} 5439 5440 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 5441 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5442 match(Set dst (AndV src1 src2)); 5443 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 5444 ins_encode %{ 5445 bool vector256 = false; 5446 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5447 %} 5448 ins_pipe( pipe_slow ); 5449 %} 5450 5451 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 5452 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5453 match(Set dst (AndV src (LoadVector mem))); 5454 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 5455 ins_encode %{ 5456 bool vector256 = false; 5457 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5458 %} 5459 ins_pipe( pipe_slow ); 5460 %} 5461 5462 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 5463 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5464 match(Set dst (AndV src1 src2)); 5465 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 5466 ins_encode %{ 5467 bool vector256 = true; 5468 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5469 %} 5470 ins_pipe( pipe_slow ); 5471 %} 5472 5473 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 5474 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5475 match(Set dst (AndV src (LoadVector mem))); 5476 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 5477 ins_encode %{ 5478 bool vector256 = true; 5479 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5480 %} 5481 ins_pipe( pipe_slow ); 5482 %} 5483 5484 // --------------------------------- OR --------------------------------------- 5485 5486 instruct vor4B(vecS dst, vecS src) %{ 5487 predicate(n->as_Vector()->length_in_bytes() == 4); 5488 match(Set dst (OrV dst src)); 5489 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 5490 ins_encode %{ 5491 __ por($dst$$XMMRegister, $src$$XMMRegister); 5492 %} 5493 ins_pipe( pipe_slow ); 5494 %} 5495 5496 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 5497 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5498 match(Set dst (OrV src1 src2)); 5499 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 5500 ins_encode %{ 5501 bool vector256 = false; 5502 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5503 %} 5504 ins_pipe( pipe_slow ); 5505 %} 5506 5507 instruct vor8B(vecD dst, vecD src) %{ 5508 predicate(n->as_Vector()->length_in_bytes() == 8); 5509 match(Set dst (OrV dst src)); 5510 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 5511 ins_encode %{ 5512 __ por($dst$$XMMRegister, $src$$XMMRegister); 5513 %} 5514 ins_pipe( pipe_slow ); 5515 %} 5516 5517 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 5518 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5519 match(Set dst (OrV src1 src2)); 5520 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 5521 ins_encode %{ 5522 bool vector256 = false; 5523 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5524 %} 5525 ins_pipe( pipe_slow ); 5526 %} 5527 5528 instruct vor16B(vecX dst, vecX src) %{ 5529 predicate(n->as_Vector()->length_in_bytes() == 16); 5530 match(Set dst (OrV dst src)); 5531 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 5532 ins_encode %{ 5533 __ por($dst$$XMMRegister, $src$$XMMRegister); 5534 %} 5535 ins_pipe( pipe_slow ); 5536 %} 5537 5538 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 5539 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5540 match(Set dst (OrV src1 src2)); 5541 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 5542 ins_encode %{ 5543 bool vector256 = false; 5544 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5545 %} 5546 ins_pipe( pipe_slow ); 5547 %} 5548 5549 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 5550 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5551 match(Set dst (OrV src (LoadVector mem))); 5552 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 5553 ins_encode %{ 5554 bool vector256 = false; 5555 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5556 %} 5557 ins_pipe( pipe_slow ); 5558 %} 5559 5560 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 5561 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5562 match(Set dst (OrV src1 src2)); 5563 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 5564 ins_encode %{ 5565 bool vector256 = true; 5566 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5567 %} 5568 ins_pipe( pipe_slow ); 5569 %} 5570 5571 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 5572 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5573 match(Set dst (OrV src (LoadVector mem))); 5574 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 5575 ins_encode %{ 5576 bool vector256 = true; 5577 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 5582 // --------------------------------- XOR -------------------------------------- 5583 5584 instruct vxor4B(vecS dst, vecS src) %{ 5585 predicate(n->as_Vector()->length_in_bytes() == 4); 5586 match(Set dst (XorV dst src)); 5587 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 5588 ins_encode %{ 5589 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5590 %} 5591 ins_pipe( pipe_slow ); 5592 %} 5593 5594 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 5595 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5596 match(Set dst (XorV src1 src2)); 5597 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 5598 ins_encode %{ 5599 bool vector256 = false; 5600 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5601 %} 5602 ins_pipe( pipe_slow ); 5603 %} 5604 5605 instruct vxor8B(vecD dst, vecD src) %{ 5606 predicate(n->as_Vector()->length_in_bytes() == 8); 5607 match(Set dst (XorV dst src)); 5608 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 5609 ins_encode %{ 5610 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5611 %} 5612 ins_pipe( pipe_slow ); 5613 %} 5614 5615 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 5616 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5617 match(Set dst (XorV src1 src2)); 5618 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 5619 ins_encode %{ 5620 bool vector256 = false; 5621 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5622 %} 5623 ins_pipe( pipe_slow ); 5624 %} 5625 5626 instruct vxor16B(vecX dst, vecX src) %{ 5627 predicate(n->as_Vector()->length_in_bytes() == 16); 5628 match(Set dst (XorV dst src)); 5629 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 5630 ins_encode %{ 5631 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 5637 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5638 match(Set dst (XorV src1 src2)); 5639 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 5640 ins_encode %{ 5641 bool vector256 = false; 5642 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5643 %} 5644 ins_pipe( pipe_slow ); 5645 %} 5646 5647 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 5648 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5649 match(Set dst (XorV src (LoadVector mem))); 5650 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 5651 ins_encode %{ 5652 bool vector256 = false; 5653 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5654 %} 5655 ins_pipe( pipe_slow ); 5656 %} 5657 5658 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 5659 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5660 match(Set dst (XorV src1 src2)); 5661 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 5662 ins_encode %{ 5663 bool vector256 = true; 5664 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5665 %} 5666 ins_pipe( pipe_slow ); 5667 %} 5668 5669 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 5670 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5671 match(Set dst (XorV src (LoadVector mem))); 5672 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 5673 ins_encode %{ 5674 bool vector256 = true; 5675 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5676 %} 5677 ins_pipe( pipe_slow ); 5678 %} 5679