1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340 #endif 341 ); 342 343 // flags allocation class should be last. 344 alloc_class chunk2(RFLAGS); 345 346 // Singleton class for condition codes 347 reg_class int_flags(RFLAGS); 348 349 // Class for all float registers 350 reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358 #ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367 #endif 368 ); 369 370 // Class for all double registers 371 reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379 #ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388 #endif 389 ); 390 391 // Class for all 32bit vector registers 392 reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400 #ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409 #endif 410 ); 411 412 // Class for all 64bit vector registers 413 reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421 #ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430 #endif 431 ); 432 433 // Class for all 128bit vector registers 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442 #ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451 #endif 452 ); 453 454 // Class for all 256bit vector registers 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463 #ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472 #endif 473 ); 474 475 %} 476 477 478 //----------SOURCE BLOCK------------------------------------------------------- 479 // This is a block of C++ code which provides values, functions, and 480 // definitions necessary in the rest of the architecture description 481 482 source_hpp %{ 483 // Header information of the source block. 484 // Method declarations/definitions which are used outside 485 // the ad-scope can conveniently be defined here. 486 // 487 // To keep related declarations/definitions/uses close together, 488 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 489 490 class NativeJump; 491 492 class CallStubImpl { 493 494 //-------------------------------------------------------------- 495 //---< Used for optimization in Compile::shorten_branches >--- 496 //-------------------------------------------------------------- 497 498 public: 499 // Size of call trampoline stub. 500 static uint size_call_trampoline() { 501 return 0; // no call trampolines on this platform 502 } 503 504 // number of relocations needed by a call trampoline stub 505 static uint reloc_call_trampoline() { 506 return 0; // no call trampolines on this platform 507 } 508 }; 509 510 class HandlerImpl { 511 512 public: 513 514 static int emit_exception_handler(CodeBuffer &cbuf); 515 static int emit_deopt_handler(CodeBuffer& cbuf); 516 517 static uint size_exception_handler() { 518 // NativeCall instruction size is the same as NativeJump. 519 // exception handler starts out as jump and can be patched to 520 // a call be deoptimization. (4932387) 521 // Note that this value is also credited (in output.cpp) to 522 // the size of the code section. 523 return NativeJump::instruction_size; 524 } 525 526 #ifdef _LP64 527 static uint size_deopt_handler() { 528 // three 5 byte instructions 529 return 15; 530 } 531 #else 532 static uint size_deopt_handler() { 533 // NativeCall instruction size is the same as NativeJump. 534 // exception handler starts out as jump and can be patched to 535 // a call be deoptimization. (4932387) 536 // Note that this value is also credited (in output.cpp) to 537 // the size of the code section. 538 return 5 + NativeJump::instruction_size; // pushl(); jmp; 539 } 540 #endif 541 }; 542 543 %} // end source_hpp 544 545 source %{ 546 547 // Emit exception handler code. 548 // Stuff framesize into a register and call a VM stub routine. 549 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 550 551 // Note that the code buffer's insts_mark is always relative to insts. 552 // That's why we must use the macroassembler to generate a handler. 553 MacroAssembler _masm(&cbuf); 554 address base = __ start_a_stub(size_exception_handler()); 555 if (base == NULL) return 0; // CodeBuffer::expand failed 556 int offset = __ offset(); 557 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 558 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 559 __ end_a_stub(); 560 return offset; 561 } 562 563 // Emit deopt handler code. 564 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 565 566 // Note that the code buffer's insts_mark is always relative to insts. 567 // That's why we must use the macroassembler to generate a handler. 568 MacroAssembler _masm(&cbuf); 569 address base = __ start_a_stub(size_deopt_handler()); 570 if (base == NULL) return 0; // CodeBuffer::expand failed 571 int offset = __ offset(); 572 573 #ifdef _LP64 574 address the_pc = (address) __ pc(); 575 Label next; 576 // push a "the_pc" on the stack without destroying any registers 577 // as they all may be live. 578 579 // push address of "next" 580 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 581 __ bind(next); 582 // adjust it so it matches "the_pc" 583 __ subptr(Address(rsp, 0), __ offset() - offset); 584 #else 585 InternalAddress here(__ pc()); 586 __ pushptr(here.addr()); 587 #endif 588 589 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 590 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 591 __ end_a_stub(); 592 return offset; 593 } 594 595 596 //============================================================================= 597 598 // Float masks come from different places depending on platform. 599 #ifdef _LP64 600 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 601 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 602 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 603 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 604 #else 605 static address float_signmask() { return (address)float_signmask_pool; } 606 static address float_signflip() { return (address)float_signflip_pool; } 607 static address double_signmask() { return (address)double_signmask_pool; } 608 static address double_signflip() { return (address)double_signflip_pool; } 609 #endif 610 611 612 const bool Matcher::match_rule_supported(int opcode) { 613 if (!has_match_rule(opcode)) 614 return false; 615 616 switch (opcode) { 617 case Op_PopCountI: 618 case Op_PopCountL: 619 if (!UsePopCountInstruction) 620 return false; 621 break; 622 case Op_MulVI: 623 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 624 return false; 625 break; 626 case Op_AddReductionVL: 627 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 628 return false; 629 case Op_AddReductionVI: 630 if (UseSSE < 3) // requires at least SSE3 631 return false; 632 case Op_MulReductionVI: 633 if (UseSSE < 4) // requires at least SSE4 634 return false; 635 case Op_AddReductionVF: 636 case Op_AddReductionVD: 637 case Op_MulReductionVF: 638 case Op_MulReductionVD: 639 if (UseSSE < 1) // requires at least SSE 640 return false; 641 break; 642 case Op_CompareAndSwapL: 643 #ifdef _LP64 644 case Op_CompareAndSwapP: 645 #endif 646 if (!VM_Version::supports_cx8()) 647 return false; 648 break; 649 } 650 651 return true; // Per default match rules are supported. 652 } 653 654 // Max vector size in bytes. 0 if not supported. 655 const int Matcher::vector_width_in_bytes(BasicType bt) { 656 assert(is_java_primitive(bt), "only primitive type vectors"); 657 if (UseSSE < 2) return 0; 658 // SSE2 supports 128bit vectors for all types. 659 // AVX2 supports 256bit vectors for all types. 660 int size = (UseAVX > 1) ? 32 : 16; 661 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 662 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 663 size = 32; 664 // Use flag to limit vector size. 665 size = MIN2(size,(int)MaxVectorSize); 666 // Minimum 2 values in vector (or 4 for bytes). 667 switch (bt) { 668 case T_DOUBLE: 669 case T_LONG: 670 if (size < 16) return 0; 671 case T_FLOAT: 672 case T_INT: 673 if (size < 8) return 0; 674 case T_BOOLEAN: 675 case T_BYTE: 676 case T_CHAR: 677 case T_SHORT: 678 if (size < 4) return 0; 679 break; 680 default: 681 ShouldNotReachHere(); 682 } 683 return size; 684 } 685 686 // Limits on vector size (number of elements) loaded into vector. 687 const int Matcher::max_vector_size(const BasicType bt) { 688 return vector_width_in_bytes(bt)/type2aelembytes(bt); 689 } 690 const int Matcher::min_vector_size(const BasicType bt) { 691 int max_size = max_vector_size(bt); 692 // Min size which can be loaded into vector is 4 bytes. 693 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 694 return MIN2(size,max_size); 695 } 696 697 // Vector ideal reg corresponding to specidied size in bytes 698 const int Matcher::vector_ideal_reg(int size) { 699 assert(MaxVectorSize >= size, ""); 700 switch(size) { 701 case 4: return Op_VecS; 702 case 8: return Op_VecD; 703 case 16: return Op_VecX; 704 case 32: return Op_VecY; 705 } 706 ShouldNotReachHere(); 707 return 0; 708 } 709 710 // Only lowest bits of xmm reg are used for vector shift count. 711 const int Matcher::vector_shift_count_ideal_reg(int size) { 712 return Op_VecS; 713 } 714 715 // x86 supports misaligned vectors store/load. 716 const bool Matcher::misaligned_vectors_ok() { 717 return !AlignVector; // can be changed by flag 718 } 719 720 // x86 AES instructions are compatible with SunJCE expanded 721 // keys, hence we do not need to pass the original key to stubs 722 const bool Matcher::pass_original_key_for_aes() { 723 return false; 724 } 725 726 // Helper methods for MachSpillCopyNode::implementation(). 727 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 728 int src_hi, int dst_hi, uint ireg, outputStream* st) { 729 // In 64-bit VM size calculation is very complex. Emitting instructions 730 // into scratch buffer is used to get size in 64-bit VM. 731 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 732 assert(ireg == Op_VecS || // 32bit vector 733 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 734 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 735 "no non-adjacent vector moves" ); 736 if (cbuf) { 737 MacroAssembler _masm(cbuf); 738 int offset = __ offset(); 739 switch (ireg) { 740 case Op_VecS: // copy whole register 741 case Op_VecD: 742 case Op_VecX: 743 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 744 break; 745 case Op_VecY: 746 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 747 break; 748 default: 749 ShouldNotReachHere(); 750 } 751 int size = __ offset() - offset; 752 #ifdef ASSERT 753 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 754 assert(!do_size || size == 4, "incorrect size calculattion"); 755 #endif 756 return size; 757 #ifndef PRODUCT 758 } else if (!do_size) { 759 switch (ireg) { 760 case Op_VecS: 761 case Op_VecD: 762 case Op_VecX: 763 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 764 break; 765 case Op_VecY: 766 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 767 break; 768 default: 769 ShouldNotReachHere(); 770 } 771 #endif 772 } 773 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 774 return 4; 775 } 776 777 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 778 int stack_offset, int reg, uint ireg, outputStream* st) { 779 // In 64-bit VM size calculation is very complex. Emitting instructions 780 // into scratch buffer is used to get size in 64-bit VM. 781 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 782 if (cbuf) { 783 MacroAssembler _masm(cbuf); 784 int offset = __ offset(); 785 if (is_load) { 786 switch (ireg) { 787 case Op_VecS: 788 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 789 break; 790 case Op_VecD: 791 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 792 break; 793 case Op_VecX: 794 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 795 break; 796 case Op_VecY: 797 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 798 break; 799 default: 800 ShouldNotReachHere(); 801 } 802 } else { // store 803 switch (ireg) { 804 case Op_VecS: 805 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 806 break; 807 case Op_VecD: 808 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 809 break; 810 case Op_VecX: 811 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 812 break; 813 case Op_VecY: 814 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 815 break; 816 default: 817 ShouldNotReachHere(); 818 } 819 } 820 int size = __ offset() - offset; 821 #ifdef ASSERT 822 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 823 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 824 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 825 #endif 826 return size; 827 #ifndef PRODUCT 828 } else if (!do_size) { 829 if (is_load) { 830 switch (ireg) { 831 case Op_VecS: 832 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 833 break; 834 case Op_VecD: 835 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 836 break; 837 case Op_VecX: 838 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 839 break; 840 case Op_VecY: 841 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 842 break; 843 default: 844 ShouldNotReachHere(); 845 } 846 } else { // store 847 switch (ireg) { 848 case Op_VecS: 849 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 850 break; 851 case Op_VecD: 852 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 853 break; 854 case Op_VecX: 855 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 856 break; 857 case Op_VecY: 858 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 859 break; 860 default: 861 ShouldNotReachHere(); 862 } 863 } 864 #endif 865 } 866 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 867 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 868 return 5+offset_size; 869 } 870 871 static inline jfloat replicate4_imm(int con, int width) { 872 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 873 assert(width == 1 || width == 2, "only byte or short types here"); 874 int bit_width = width * 8; 875 jint val = con; 876 val &= (1 << bit_width) - 1; // mask off sign bits 877 while(bit_width < 32) { 878 val |= (val << bit_width); 879 bit_width <<= 1; 880 } 881 jfloat fval = *((jfloat*) &val); // coerce to float type 882 return fval; 883 } 884 885 static inline jdouble replicate8_imm(int con, int width) { 886 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 887 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 888 int bit_width = width * 8; 889 jlong val = con; 890 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 891 while(bit_width < 64) { 892 val |= (val << bit_width); 893 bit_width <<= 1; 894 } 895 jdouble dval = *((jdouble*) &val); // coerce to double type 896 return dval; 897 } 898 899 #ifndef PRODUCT 900 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 901 st->print("nop \t# %d bytes pad for loops and calls", _count); 902 } 903 #endif 904 905 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 906 MacroAssembler _masm(&cbuf); 907 __ nop(_count); 908 } 909 910 uint MachNopNode::size(PhaseRegAlloc*) const { 911 return _count; 912 } 913 914 #ifndef PRODUCT 915 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 916 st->print("# breakpoint"); 917 } 918 #endif 919 920 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 921 MacroAssembler _masm(&cbuf); 922 __ int3(); 923 } 924 925 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 926 return MachNode::size(ra_); 927 } 928 929 %} 930 931 encode %{ 932 933 enc_class preserve_SP %{ 934 debug_only(int off0 = cbuf.insts_size()); 935 MacroAssembler _masm(&cbuf); 936 // RBP is preserved across all calls, even compiled calls. 937 // Use it to preserve RSP in places where the callee might change the SP. 938 __ movptr(rbp_mh_SP_save, rsp); 939 debug_only(int off1 = cbuf.insts_size()); 940 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 941 %} 942 943 enc_class restore_SP %{ 944 MacroAssembler _masm(&cbuf); 945 __ movptr(rsp, rbp_mh_SP_save); 946 %} 947 948 enc_class call_epilog %{ 949 if (VerifyStackAtCalls) { 950 // Check that stack depth is unchanged: find majik cookie on stack 951 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 952 MacroAssembler _masm(&cbuf); 953 Label L; 954 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 955 __ jccb(Assembler::equal, L); 956 // Die if stack mismatch 957 __ int3(); 958 __ bind(L); 959 } 960 %} 961 962 %} 963 964 965 //----------OPERANDS----------------------------------------------------------- 966 // Operand definitions must precede instruction definitions for correct parsing 967 // in the ADLC because operands constitute user defined types which are used in 968 // instruction definitions. 969 970 // Vectors 971 operand vecS() %{ 972 constraint(ALLOC_IN_RC(vectors_reg)); 973 match(VecS); 974 975 format %{ %} 976 interface(REG_INTER); 977 %} 978 979 operand vecD() %{ 980 constraint(ALLOC_IN_RC(vectord_reg)); 981 match(VecD); 982 983 format %{ %} 984 interface(REG_INTER); 985 %} 986 987 operand vecX() %{ 988 constraint(ALLOC_IN_RC(vectorx_reg)); 989 match(VecX); 990 991 format %{ %} 992 interface(REG_INTER); 993 %} 994 995 operand vecY() %{ 996 constraint(ALLOC_IN_RC(vectory_reg)); 997 match(VecY); 998 999 format %{ %} 1000 interface(REG_INTER); 1001 %} 1002 1003 1004 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1005 1006 // ============================================================================ 1007 1008 instruct ShouldNotReachHere() %{ 1009 match(Halt); 1010 format %{ "int3\t# ShouldNotReachHere" %} 1011 ins_encode %{ 1012 __ int3(); 1013 %} 1014 ins_pipe(pipe_slow); 1015 %} 1016 1017 // ============================================================================ 1018 1019 instruct addF_reg(regF dst, regF src) %{ 1020 predicate((UseSSE>=1) && (UseAVX == 0)); 1021 match(Set dst (AddF dst src)); 1022 1023 format %{ "addss $dst, $src" %} 1024 ins_cost(150); 1025 ins_encode %{ 1026 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1027 %} 1028 ins_pipe(pipe_slow); 1029 %} 1030 1031 instruct addF_mem(regF dst, memory src) %{ 1032 predicate((UseSSE>=1) && (UseAVX == 0)); 1033 match(Set dst (AddF dst (LoadF src))); 1034 1035 format %{ "addss $dst, $src" %} 1036 ins_cost(150); 1037 ins_encode %{ 1038 __ addss($dst$$XMMRegister, $src$$Address); 1039 %} 1040 ins_pipe(pipe_slow); 1041 %} 1042 1043 instruct addF_imm(regF dst, immF con) %{ 1044 predicate((UseSSE>=1) && (UseAVX == 0)); 1045 match(Set dst (AddF dst con)); 1046 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1047 ins_cost(150); 1048 ins_encode %{ 1049 __ addss($dst$$XMMRegister, $constantaddress($con)); 1050 %} 1051 ins_pipe(pipe_slow); 1052 %} 1053 1054 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1055 predicate(UseAVX > 0); 1056 match(Set dst (AddF src1 src2)); 1057 1058 format %{ "vaddss $dst, $src1, $src2" %} 1059 ins_cost(150); 1060 ins_encode %{ 1061 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1062 %} 1063 ins_pipe(pipe_slow); 1064 %} 1065 1066 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1067 predicate(UseAVX > 0); 1068 match(Set dst (AddF src1 (LoadF src2))); 1069 1070 format %{ "vaddss $dst, $src1, $src2" %} 1071 ins_cost(150); 1072 ins_encode %{ 1073 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1074 %} 1075 ins_pipe(pipe_slow); 1076 %} 1077 1078 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1079 predicate(UseAVX > 0); 1080 match(Set dst (AddF src con)); 1081 1082 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1083 ins_cost(150); 1084 ins_encode %{ 1085 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1086 %} 1087 ins_pipe(pipe_slow); 1088 %} 1089 1090 instruct addD_reg(regD dst, regD src) %{ 1091 predicate((UseSSE>=2) && (UseAVX == 0)); 1092 match(Set dst (AddD dst src)); 1093 1094 format %{ "addsd $dst, $src" %} 1095 ins_cost(150); 1096 ins_encode %{ 1097 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1098 %} 1099 ins_pipe(pipe_slow); 1100 %} 1101 1102 instruct addD_mem(regD dst, memory src) %{ 1103 predicate((UseSSE>=2) && (UseAVX == 0)); 1104 match(Set dst (AddD dst (LoadD src))); 1105 1106 format %{ "addsd $dst, $src" %} 1107 ins_cost(150); 1108 ins_encode %{ 1109 __ addsd($dst$$XMMRegister, $src$$Address); 1110 %} 1111 ins_pipe(pipe_slow); 1112 %} 1113 1114 instruct addD_imm(regD dst, immD con) %{ 1115 predicate((UseSSE>=2) && (UseAVX == 0)); 1116 match(Set dst (AddD dst con)); 1117 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1118 ins_cost(150); 1119 ins_encode %{ 1120 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1121 %} 1122 ins_pipe(pipe_slow); 1123 %} 1124 1125 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1126 predicate(UseAVX > 0); 1127 match(Set dst (AddD src1 src2)); 1128 1129 format %{ "vaddsd $dst, $src1, $src2" %} 1130 ins_cost(150); 1131 ins_encode %{ 1132 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1133 %} 1134 ins_pipe(pipe_slow); 1135 %} 1136 1137 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1138 predicate(UseAVX > 0); 1139 match(Set dst (AddD src1 (LoadD src2))); 1140 1141 format %{ "vaddsd $dst, $src1, $src2" %} 1142 ins_cost(150); 1143 ins_encode %{ 1144 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1145 %} 1146 ins_pipe(pipe_slow); 1147 %} 1148 1149 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1150 predicate(UseAVX > 0); 1151 match(Set dst (AddD src con)); 1152 1153 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1154 ins_cost(150); 1155 ins_encode %{ 1156 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1157 %} 1158 ins_pipe(pipe_slow); 1159 %} 1160 1161 instruct subF_reg(regF dst, regF src) %{ 1162 predicate((UseSSE>=1) && (UseAVX == 0)); 1163 match(Set dst (SubF dst src)); 1164 1165 format %{ "subss $dst, $src" %} 1166 ins_cost(150); 1167 ins_encode %{ 1168 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1169 %} 1170 ins_pipe(pipe_slow); 1171 %} 1172 1173 instruct subF_mem(regF dst, memory src) %{ 1174 predicate((UseSSE>=1) && (UseAVX == 0)); 1175 match(Set dst (SubF dst (LoadF src))); 1176 1177 format %{ "subss $dst, $src" %} 1178 ins_cost(150); 1179 ins_encode %{ 1180 __ subss($dst$$XMMRegister, $src$$Address); 1181 %} 1182 ins_pipe(pipe_slow); 1183 %} 1184 1185 instruct subF_imm(regF dst, immF con) %{ 1186 predicate((UseSSE>=1) && (UseAVX == 0)); 1187 match(Set dst (SubF dst con)); 1188 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1189 ins_cost(150); 1190 ins_encode %{ 1191 __ subss($dst$$XMMRegister, $constantaddress($con)); 1192 %} 1193 ins_pipe(pipe_slow); 1194 %} 1195 1196 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1197 predicate(UseAVX > 0); 1198 match(Set dst (SubF src1 src2)); 1199 1200 format %{ "vsubss $dst, $src1, $src2" %} 1201 ins_cost(150); 1202 ins_encode %{ 1203 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1204 %} 1205 ins_pipe(pipe_slow); 1206 %} 1207 1208 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1209 predicate(UseAVX > 0); 1210 match(Set dst (SubF src1 (LoadF src2))); 1211 1212 format %{ "vsubss $dst, $src1, $src2" %} 1213 ins_cost(150); 1214 ins_encode %{ 1215 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1216 %} 1217 ins_pipe(pipe_slow); 1218 %} 1219 1220 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1221 predicate(UseAVX > 0); 1222 match(Set dst (SubF src con)); 1223 1224 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1225 ins_cost(150); 1226 ins_encode %{ 1227 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1228 %} 1229 ins_pipe(pipe_slow); 1230 %} 1231 1232 instruct subD_reg(regD dst, regD src) %{ 1233 predicate((UseSSE>=2) && (UseAVX == 0)); 1234 match(Set dst (SubD dst src)); 1235 1236 format %{ "subsd $dst, $src" %} 1237 ins_cost(150); 1238 ins_encode %{ 1239 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1240 %} 1241 ins_pipe(pipe_slow); 1242 %} 1243 1244 instruct subD_mem(regD dst, memory src) %{ 1245 predicate((UseSSE>=2) && (UseAVX == 0)); 1246 match(Set dst (SubD dst (LoadD src))); 1247 1248 format %{ "subsd $dst, $src" %} 1249 ins_cost(150); 1250 ins_encode %{ 1251 __ subsd($dst$$XMMRegister, $src$$Address); 1252 %} 1253 ins_pipe(pipe_slow); 1254 %} 1255 1256 instruct subD_imm(regD dst, immD con) %{ 1257 predicate((UseSSE>=2) && (UseAVX == 0)); 1258 match(Set dst (SubD dst con)); 1259 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1260 ins_cost(150); 1261 ins_encode %{ 1262 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1263 %} 1264 ins_pipe(pipe_slow); 1265 %} 1266 1267 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1268 predicate(UseAVX > 0); 1269 match(Set dst (SubD src1 src2)); 1270 1271 format %{ "vsubsd $dst, $src1, $src2" %} 1272 ins_cost(150); 1273 ins_encode %{ 1274 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1275 %} 1276 ins_pipe(pipe_slow); 1277 %} 1278 1279 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1280 predicate(UseAVX > 0); 1281 match(Set dst (SubD src1 (LoadD src2))); 1282 1283 format %{ "vsubsd $dst, $src1, $src2" %} 1284 ins_cost(150); 1285 ins_encode %{ 1286 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1287 %} 1288 ins_pipe(pipe_slow); 1289 %} 1290 1291 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1292 predicate(UseAVX > 0); 1293 match(Set dst (SubD src con)); 1294 1295 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1296 ins_cost(150); 1297 ins_encode %{ 1298 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1299 %} 1300 ins_pipe(pipe_slow); 1301 %} 1302 1303 instruct mulF_reg(regF dst, regF src) %{ 1304 predicate((UseSSE>=1) && (UseAVX == 0)); 1305 match(Set dst (MulF dst src)); 1306 1307 format %{ "mulss $dst, $src" %} 1308 ins_cost(150); 1309 ins_encode %{ 1310 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1311 %} 1312 ins_pipe(pipe_slow); 1313 %} 1314 1315 instruct mulF_mem(regF dst, memory src) %{ 1316 predicate((UseSSE>=1) && (UseAVX == 0)); 1317 match(Set dst (MulF dst (LoadF src))); 1318 1319 format %{ "mulss $dst, $src" %} 1320 ins_cost(150); 1321 ins_encode %{ 1322 __ mulss($dst$$XMMRegister, $src$$Address); 1323 %} 1324 ins_pipe(pipe_slow); 1325 %} 1326 1327 instruct mulF_imm(regF dst, immF con) %{ 1328 predicate((UseSSE>=1) && (UseAVX == 0)); 1329 match(Set dst (MulF dst con)); 1330 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1331 ins_cost(150); 1332 ins_encode %{ 1333 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1334 %} 1335 ins_pipe(pipe_slow); 1336 %} 1337 1338 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1339 predicate(UseAVX > 0); 1340 match(Set dst (MulF src1 src2)); 1341 1342 format %{ "vmulss $dst, $src1, $src2" %} 1343 ins_cost(150); 1344 ins_encode %{ 1345 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1346 %} 1347 ins_pipe(pipe_slow); 1348 %} 1349 1350 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1351 predicate(UseAVX > 0); 1352 match(Set dst (MulF src1 (LoadF src2))); 1353 1354 format %{ "vmulss $dst, $src1, $src2" %} 1355 ins_cost(150); 1356 ins_encode %{ 1357 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1358 %} 1359 ins_pipe(pipe_slow); 1360 %} 1361 1362 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1363 predicate(UseAVX > 0); 1364 match(Set dst (MulF src con)); 1365 1366 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1367 ins_cost(150); 1368 ins_encode %{ 1369 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1370 %} 1371 ins_pipe(pipe_slow); 1372 %} 1373 1374 instruct mulD_reg(regD dst, regD src) %{ 1375 predicate((UseSSE>=2) && (UseAVX == 0)); 1376 match(Set dst (MulD dst src)); 1377 1378 format %{ "mulsd $dst, $src" %} 1379 ins_cost(150); 1380 ins_encode %{ 1381 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1382 %} 1383 ins_pipe(pipe_slow); 1384 %} 1385 1386 instruct mulD_mem(regD dst, memory src) %{ 1387 predicate((UseSSE>=2) && (UseAVX == 0)); 1388 match(Set dst (MulD dst (LoadD src))); 1389 1390 format %{ "mulsd $dst, $src" %} 1391 ins_cost(150); 1392 ins_encode %{ 1393 __ mulsd($dst$$XMMRegister, $src$$Address); 1394 %} 1395 ins_pipe(pipe_slow); 1396 %} 1397 1398 instruct mulD_imm(regD dst, immD con) %{ 1399 predicate((UseSSE>=2) && (UseAVX == 0)); 1400 match(Set dst (MulD dst con)); 1401 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1402 ins_cost(150); 1403 ins_encode %{ 1404 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1405 %} 1406 ins_pipe(pipe_slow); 1407 %} 1408 1409 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1410 predicate(UseAVX > 0); 1411 match(Set dst (MulD src1 src2)); 1412 1413 format %{ "vmulsd $dst, $src1, $src2" %} 1414 ins_cost(150); 1415 ins_encode %{ 1416 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1417 %} 1418 ins_pipe(pipe_slow); 1419 %} 1420 1421 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1422 predicate(UseAVX > 0); 1423 match(Set dst (MulD src1 (LoadD src2))); 1424 1425 format %{ "vmulsd $dst, $src1, $src2" %} 1426 ins_cost(150); 1427 ins_encode %{ 1428 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1429 %} 1430 ins_pipe(pipe_slow); 1431 %} 1432 1433 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1434 predicate(UseAVX > 0); 1435 match(Set dst (MulD src con)); 1436 1437 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1438 ins_cost(150); 1439 ins_encode %{ 1440 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1441 %} 1442 ins_pipe(pipe_slow); 1443 %} 1444 1445 instruct divF_reg(regF dst, regF src) %{ 1446 predicate((UseSSE>=1) && (UseAVX == 0)); 1447 match(Set dst (DivF dst src)); 1448 1449 format %{ "divss $dst, $src" %} 1450 ins_cost(150); 1451 ins_encode %{ 1452 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1453 %} 1454 ins_pipe(pipe_slow); 1455 %} 1456 1457 instruct divF_mem(regF dst, memory src) %{ 1458 predicate((UseSSE>=1) && (UseAVX == 0)); 1459 match(Set dst (DivF dst (LoadF src))); 1460 1461 format %{ "divss $dst, $src" %} 1462 ins_cost(150); 1463 ins_encode %{ 1464 __ divss($dst$$XMMRegister, $src$$Address); 1465 %} 1466 ins_pipe(pipe_slow); 1467 %} 1468 1469 instruct divF_imm(regF dst, immF con) %{ 1470 predicate((UseSSE>=1) && (UseAVX == 0)); 1471 match(Set dst (DivF dst con)); 1472 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1473 ins_cost(150); 1474 ins_encode %{ 1475 __ divss($dst$$XMMRegister, $constantaddress($con)); 1476 %} 1477 ins_pipe(pipe_slow); 1478 %} 1479 1480 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1481 predicate(UseAVX > 0); 1482 match(Set dst (DivF src1 src2)); 1483 1484 format %{ "vdivss $dst, $src1, $src2" %} 1485 ins_cost(150); 1486 ins_encode %{ 1487 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1488 %} 1489 ins_pipe(pipe_slow); 1490 %} 1491 1492 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1493 predicate(UseAVX > 0); 1494 match(Set dst (DivF src1 (LoadF src2))); 1495 1496 format %{ "vdivss $dst, $src1, $src2" %} 1497 ins_cost(150); 1498 ins_encode %{ 1499 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1500 %} 1501 ins_pipe(pipe_slow); 1502 %} 1503 1504 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1505 predicate(UseAVX > 0); 1506 match(Set dst (DivF src con)); 1507 1508 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1509 ins_cost(150); 1510 ins_encode %{ 1511 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1512 %} 1513 ins_pipe(pipe_slow); 1514 %} 1515 1516 instruct divD_reg(regD dst, regD src) %{ 1517 predicate((UseSSE>=2) && (UseAVX == 0)); 1518 match(Set dst (DivD dst src)); 1519 1520 format %{ "divsd $dst, $src" %} 1521 ins_cost(150); 1522 ins_encode %{ 1523 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1524 %} 1525 ins_pipe(pipe_slow); 1526 %} 1527 1528 instruct divD_mem(regD dst, memory src) %{ 1529 predicate((UseSSE>=2) && (UseAVX == 0)); 1530 match(Set dst (DivD dst (LoadD src))); 1531 1532 format %{ "divsd $dst, $src" %} 1533 ins_cost(150); 1534 ins_encode %{ 1535 __ divsd($dst$$XMMRegister, $src$$Address); 1536 %} 1537 ins_pipe(pipe_slow); 1538 %} 1539 1540 instruct divD_imm(regD dst, immD con) %{ 1541 predicate((UseSSE>=2) && (UseAVX == 0)); 1542 match(Set dst (DivD dst con)); 1543 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1544 ins_cost(150); 1545 ins_encode %{ 1546 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1547 %} 1548 ins_pipe(pipe_slow); 1549 %} 1550 1551 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1552 predicate(UseAVX > 0); 1553 match(Set dst (DivD src1 src2)); 1554 1555 format %{ "vdivsd $dst, $src1, $src2" %} 1556 ins_cost(150); 1557 ins_encode %{ 1558 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1559 %} 1560 ins_pipe(pipe_slow); 1561 %} 1562 1563 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1564 predicate(UseAVX > 0); 1565 match(Set dst (DivD src1 (LoadD src2))); 1566 1567 format %{ "vdivsd $dst, $src1, $src2" %} 1568 ins_cost(150); 1569 ins_encode %{ 1570 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1571 %} 1572 ins_pipe(pipe_slow); 1573 %} 1574 1575 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1576 predicate(UseAVX > 0); 1577 match(Set dst (DivD src con)); 1578 1579 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1580 ins_cost(150); 1581 ins_encode %{ 1582 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1583 %} 1584 ins_pipe(pipe_slow); 1585 %} 1586 1587 instruct absF_reg(regF dst) %{ 1588 predicate((UseSSE>=1) && (UseAVX == 0)); 1589 match(Set dst (AbsF dst)); 1590 ins_cost(150); 1591 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1592 ins_encode %{ 1593 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1594 %} 1595 ins_pipe(pipe_slow); 1596 %} 1597 1598 instruct absF_reg_reg(regF dst, regF src) %{ 1599 predicate(UseAVX > 0); 1600 match(Set dst (AbsF src)); 1601 ins_cost(150); 1602 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1603 ins_encode %{ 1604 bool vector256 = false; 1605 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1606 ExternalAddress(float_signmask()), vector256); 1607 %} 1608 ins_pipe(pipe_slow); 1609 %} 1610 1611 instruct absD_reg(regD dst) %{ 1612 predicate((UseSSE>=2) && (UseAVX == 0)); 1613 match(Set dst (AbsD dst)); 1614 ins_cost(150); 1615 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1616 "# abs double by sign masking" %} 1617 ins_encode %{ 1618 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1619 %} 1620 ins_pipe(pipe_slow); 1621 %} 1622 1623 instruct absD_reg_reg(regD dst, regD src) %{ 1624 predicate(UseAVX > 0); 1625 match(Set dst (AbsD src)); 1626 ins_cost(150); 1627 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1628 "# abs double by sign masking" %} 1629 ins_encode %{ 1630 bool vector256 = false; 1631 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1632 ExternalAddress(double_signmask()), vector256); 1633 %} 1634 ins_pipe(pipe_slow); 1635 %} 1636 1637 instruct negF_reg(regF dst) %{ 1638 predicate((UseSSE>=1) && (UseAVX == 0)); 1639 match(Set dst (NegF dst)); 1640 ins_cost(150); 1641 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1642 ins_encode %{ 1643 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1644 %} 1645 ins_pipe(pipe_slow); 1646 %} 1647 1648 instruct negF_reg_reg(regF dst, regF src) %{ 1649 predicate(UseAVX > 0); 1650 match(Set dst (NegF src)); 1651 ins_cost(150); 1652 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1653 ins_encode %{ 1654 bool vector256 = false; 1655 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1656 ExternalAddress(float_signflip()), vector256); 1657 %} 1658 ins_pipe(pipe_slow); 1659 %} 1660 1661 instruct negD_reg(regD dst) %{ 1662 predicate((UseSSE>=2) && (UseAVX == 0)); 1663 match(Set dst (NegD dst)); 1664 ins_cost(150); 1665 format %{ "xorpd $dst, [0x8000000000000000]\t" 1666 "# neg double by sign flipping" %} 1667 ins_encode %{ 1668 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1669 %} 1670 ins_pipe(pipe_slow); 1671 %} 1672 1673 instruct negD_reg_reg(regD dst, regD src) %{ 1674 predicate(UseAVX > 0); 1675 match(Set dst (NegD src)); 1676 ins_cost(150); 1677 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1678 "# neg double by sign flipping" %} 1679 ins_encode %{ 1680 bool vector256 = false; 1681 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1682 ExternalAddress(double_signflip()), vector256); 1683 %} 1684 ins_pipe(pipe_slow); 1685 %} 1686 1687 instruct sqrtF_reg(regF dst, regF src) %{ 1688 predicate(UseSSE>=1); 1689 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1690 1691 format %{ "sqrtss $dst, $src" %} 1692 ins_cost(150); 1693 ins_encode %{ 1694 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1695 %} 1696 ins_pipe(pipe_slow); 1697 %} 1698 1699 instruct sqrtF_mem(regF dst, memory src) %{ 1700 predicate(UseSSE>=1); 1701 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1702 1703 format %{ "sqrtss $dst, $src" %} 1704 ins_cost(150); 1705 ins_encode %{ 1706 __ sqrtss($dst$$XMMRegister, $src$$Address); 1707 %} 1708 ins_pipe(pipe_slow); 1709 %} 1710 1711 instruct sqrtF_imm(regF dst, immF con) %{ 1712 predicate(UseSSE>=1); 1713 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1714 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1715 ins_cost(150); 1716 ins_encode %{ 1717 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1718 %} 1719 ins_pipe(pipe_slow); 1720 %} 1721 1722 instruct sqrtD_reg(regD dst, regD src) %{ 1723 predicate(UseSSE>=2); 1724 match(Set dst (SqrtD src)); 1725 1726 format %{ "sqrtsd $dst, $src" %} 1727 ins_cost(150); 1728 ins_encode %{ 1729 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1730 %} 1731 ins_pipe(pipe_slow); 1732 %} 1733 1734 instruct sqrtD_mem(regD dst, memory src) %{ 1735 predicate(UseSSE>=2); 1736 match(Set dst (SqrtD (LoadD src))); 1737 1738 format %{ "sqrtsd $dst, $src" %} 1739 ins_cost(150); 1740 ins_encode %{ 1741 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1742 %} 1743 ins_pipe(pipe_slow); 1744 %} 1745 1746 instruct sqrtD_imm(regD dst, immD con) %{ 1747 predicate(UseSSE>=2); 1748 match(Set dst (SqrtD con)); 1749 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1750 ins_cost(150); 1751 ins_encode %{ 1752 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1753 %} 1754 ins_pipe(pipe_slow); 1755 %} 1756 1757 1758 // ====================VECTOR INSTRUCTIONS===================================== 1759 1760 // Load vectors (4 bytes long) 1761 instruct loadV4(vecS dst, memory mem) %{ 1762 predicate(n->as_LoadVector()->memory_size() == 4); 1763 match(Set dst (LoadVector mem)); 1764 ins_cost(125); 1765 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1766 ins_encode %{ 1767 __ movdl($dst$$XMMRegister, $mem$$Address); 1768 %} 1769 ins_pipe( pipe_slow ); 1770 %} 1771 1772 // Load vectors (8 bytes long) 1773 instruct loadV8(vecD dst, memory mem) %{ 1774 predicate(n->as_LoadVector()->memory_size() == 8); 1775 match(Set dst (LoadVector mem)); 1776 ins_cost(125); 1777 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1778 ins_encode %{ 1779 __ movq($dst$$XMMRegister, $mem$$Address); 1780 %} 1781 ins_pipe( pipe_slow ); 1782 %} 1783 1784 // Load vectors (16 bytes long) 1785 instruct loadV16(vecX dst, memory mem) %{ 1786 predicate(n->as_LoadVector()->memory_size() == 16); 1787 match(Set dst (LoadVector mem)); 1788 ins_cost(125); 1789 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1790 ins_encode %{ 1791 __ movdqu($dst$$XMMRegister, $mem$$Address); 1792 %} 1793 ins_pipe( pipe_slow ); 1794 %} 1795 1796 // Load vectors (32 bytes long) 1797 instruct loadV32(vecY dst, memory mem) %{ 1798 predicate(n->as_LoadVector()->memory_size() == 32); 1799 match(Set dst (LoadVector mem)); 1800 ins_cost(125); 1801 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1802 ins_encode %{ 1803 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1804 %} 1805 ins_pipe( pipe_slow ); 1806 %} 1807 1808 // Store vectors 1809 instruct storeV4(memory mem, vecS src) %{ 1810 predicate(n->as_StoreVector()->memory_size() == 4); 1811 match(Set mem (StoreVector mem src)); 1812 ins_cost(145); 1813 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1814 ins_encode %{ 1815 __ movdl($mem$$Address, $src$$XMMRegister); 1816 %} 1817 ins_pipe( pipe_slow ); 1818 %} 1819 1820 instruct storeV8(memory mem, vecD src) %{ 1821 predicate(n->as_StoreVector()->memory_size() == 8); 1822 match(Set mem (StoreVector mem src)); 1823 ins_cost(145); 1824 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1825 ins_encode %{ 1826 __ movq($mem$$Address, $src$$XMMRegister); 1827 %} 1828 ins_pipe( pipe_slow ); 1829 %} 1830 1831 instruct storeV16(memory mem, vecX src) %{ 1832 predicate(n->as_StoreVector()->memory_size() == 16); 1833 match(Set mem (StoreVector mem src)); 1834 ins_cost(145); 1835 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1836 ins_encode %{ 1837 __ movdqu($mem$$Address, $src$$XMMRegister); 1838 %} 1839 ins_pipe( pipe_slow ); 1840 %} 1841 1842 instruct storeV32(memory mem, vecY src) %{ 1843 predicate(n->as_StoreVector()->memory_size() == 32); 1844 match(Set mem (StoreVector mem src)); 1845 ins_cost(145); 1846 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1847 ins_encode %{ 1848 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1849 %} 1850 ins_pipe( pipe_slow ); 1851 %} 1852 1853 // Replicate byte scalar to be vector 1854 instruct Repl4B(vecS dst, rRegI src) %{ 1855 predicate(n->as_Vector()->length() == 4); 1856 match(Set dst (ReplicateB src)); 1857 format %{ "movd $dst,$src\n\t" 1858 "punpcklbw $dst,$dst\n\t" 1859 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1860 ins_encode %{ 1861 __ movdl($dst$$XMMRegister, $src$$Register); 1862 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1863 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1864 %} 1865 ins_pipe( pipe_slow ); 1866 %} 1867 1868 instruct Repl8B(vecD dst, rRegI src) %{ 1869 predicate(n->as_Vector()->length() == 8); 1870 match(Set dst (ReplicateB src)); 1871 format %{ "movd $dst,$src\n\t" 1872 "punpcklbw $dst,$dst\n\t" 1873 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1874 ins_encode %{ 1875 __ movdl($dst$$XMMRegister, $src$$Register); 1876 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1877 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1878 %} 1879 ins_pipe( pipe_slow ); 1880 %} 1881 1882 instruct Repl16B(vecX dst, rRegI src) %{ 1883 predicate(n->as_Vector()->length() == 16); 1884 match(Set dst (ReplicateB src)); 1885 format %{ "movd $dst,$src\n\t" 1886 "punpcklbw $dst,$dst\n\t" 1887 "pshuflw $dst,$dst,0x00\n\t" 1888 "punpcklqdq $dst,$dst\t! replicate16B" %} 1889 ins_encode %{ 1890 __ movdl($dst$$XMMRegister, $src$$Register); 1891 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1892 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1893 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1894 %} 1895 ins_pipe( pipe_slow ); 1896 %} 1897 1898 instruct Repl32B(vecY dst, rRegI src) %{ 1899 predicate(n->as_Vector()->length() == 32); 1900 match(Set dst (ReplicateB src)); 1901 format %{ "movd $dst,$src\n\t" 1902 "punpcklbw $dst,$dst\n\t" 1903 "pshuflw $dst,$dst,0x00\n\t" 1904 "punpcklqdq $dst,$dst\n\t" 1905 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1906 ins_encode %{ 1907 __ movdl($dst$$XMMRegister, $src$$Register); 1908 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1909 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1910 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1911 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1912 %} 1913 ins_pipe( pipe_slow ); 1914 %} 1915 1916 // Replicate byte scalar immediate to be vector by loading from const table. 1917 instruct Repl4B_imm(vecS dst, immI con) %{ 1918 predicate(n->as_Vector()->length() == 4); 1919 match(Set dst (ReplicateB con)); 1920 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1921 ins_encode %{ 1922 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1923 %} 1924 ins_pipe( pipe_slow ); 1925 %} 1926 1927 instruct Repl8B_imm(vecD dst, immI con) %{ 1928 predicate(n->as_Vector()->length() == 8); 1929 match(Set dst (ReplicateB con)); 1930 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1931 ins_encode %{ 1932 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1933 %} 1934 ins_pipe( pipe_slow ); 1935 %} 1936 1937 instruct Repl16B_imm(vecX dst, immI con) %{ 1938 predicate(n->as_Vector()->length() == 16); 1939 match(Set dst (ReplicateB con)); 1940 format %{ "movq $dst,[$constantaddress]\n\t" 1941 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1942 ins_encode %{ 1943 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1944 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1945 %} 1946 ins_pipe( pipe_slow ); 1947 %} 1948 1949 instruct Repl32B_imm(vecY dst, immI con) %{ 1950 predicate(n->as_Vector()->length() == 32); 1951 match(Set dst (ReplicateB con)); 1952 format %{ "movq $dst,[$constantaddress]\n\t" 1953 "punpcklqdq $dst,$dst\n\t" 1954 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1955 ins_encode %{ 1956 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1957 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1958 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1959 %} 1960 ins_pipe( pipe_slow ); 1961 %} 1962 1963 // Replicate byte scalar zero to be vector 1964 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1965 predicate(n->as_Vector()->length() == 4); 1966 match(Set dst (ReplicateB zero)); 1967 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1968 ins_encode %{ 1969 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1970 %} 1971 ins_pipe( fpu_reg_reg ); 1972 %} 1973 1974 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1975 predicate(n->as_Vector()->length() == 8); 1976 match(Set dst (ReplicateB zero)); 1977 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1978 ins_encode %{ 1979 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1980 %} 1981 ins_pipe( fpu_reg_reg ); 1982 %} 1983 1984 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1985 predicate(n->as_Vector()->length() == 16); 1986 match(Set dst (ReplicateB zero)); 1987 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1988 ins_encode %{ 1989 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1990 %} 1991 ins_pipe( fpu_reg_reg ); 1992 %} 1993 1994 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1995 predicate(n->as_Vector()->length() == 32); 1996 match(Set dst (ReplicateB zero)); 1997 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1998 ins_encode %{ 1999 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2000 bool vector256 = true; 2001 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2002 %} 2003 ins_pipe( fpu_reg_reg ); 2004 %} 2005 2006 // Replicate char/short (2 byte) scalar to be vector 2007 instruct Repl2S(vecS dst, rRegI src) %{ 2008 predicate(n->as_Vector()->length() == 2); 2009 match(Set dst (ReplicateS src)); 2010 format %{ "movd $dst,$src\n\t" 2011 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 2012 ins_encode %{ 2013 __ movdl($dst$$XMMRegister, $src$$Register); 2014 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2015 %} 2016 ins_pipe( fpu_reg_reg ); 2017 %} 2018 2019 instruct Repl4S(vecD dst, rRegI src) %{ 2020 predicate(n->as_Vector()->length() == 4); 2021 match(Set dst (ReplicateS src)); 2022 format %{ "movd $dst,$src\n\t" 2023 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 2024 ins_encode %{ 2025 __ movdl($dst$$XMMRegister, $src$$Register); 2026 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2027 %} 2028 ins_pipe( fpu_reg_reg ); 2029 %} 2030 2031 instruct Repl8S(vecX dst, rRegI src) %{ 2032 predicate(n->as_Vector()->length() == 8); 2033 match(Set dst (ReplicateS src)); 2034 format %{ "movd $dst,$src\n\t" 2035 "pshuflw $dst,$dst,0x00\n\t" 2036 "punpcklqdq $dst,$dst\t! replicate8S" %} 2037 ins_encode %{ 2038 __ movdl($dst$$XMMRegister, $src$$Register); 2039 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2040 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2041 %} 2042 ins_pipe( pipe_slow ); 2043 %} 2044 2045 instruct Repl16S(vecY dst, rRegI src) %{ 2046 predicate(n->as_Vector()->length() == 16); 2047 match(Set dst (ReplicateS src)); 2048 format %{ "movd $dst,$src\n\t" 2049 "pshuflw $dst,$dst,0x00\n\t" 2050 "punpcklqdq $dst,$dst\n\t" 2051 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 2052 ins_encode %{ 2053 __ movdl($dst$$XMMRegister, $src$$Register); 2054 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2055 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2056 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2057 %} 2058 ins_pipe( pipe_slow ); 2059 %} 2060 2061 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 2062 instruct Repl2S_imm(vecS dst, immI con) %{ 2063 predicate(n->as_Vector()->length() == 2); 2064 match(Set dst (ReplicateS con)); 2065 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 2066 ins_encode %{ 2067 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 2068 %} 2069 ins_pipe( fpu_reg_reg ); 2070 %} 2071 2072 instruct Repl4S_imm(vecD dst, immI con) %{ 2073 predicate(n->as_Vector()->length() == 4); 2074 match(Set dst (ReplicateS con)); 2075 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 2076 ins_encode %{ 2077 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2078 %} 2079 ins_pipe( fpu_reg_reg ); 2080 %} 2081 2082 instruct Repl8S_imm(vecX dst, immI con) %{ 2083 predicate(n->as_Vector()->length() == 8); 2084 match(Set dst (ReplicateS con)); 2085 format %{ "movq $dst,[$constantaddress]\n\t" 2086 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 2087 ins_encode %{ 2088 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2089 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2090 %} 2091 ins_pipe( pipe_slow ); 2092 %} 2093 2094 instruct Repl16S_imm(vecY dst, immI con) %{ 2095 predicate(n->as_Vector()->length() == 16); 2096 match(Set dst (ReplicateS con)); 2097 format %{ "movq $dst,[$constantaddress]\n\t" 2098 "punpcklqdq $dst,$dst\n\t" 2099 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 2100 ins_encode %{ 2101 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2102 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2103 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2104 %} 2105 ins_pipe( pipe_slow ); 2106 %} 2107 2108 // Replicate char/short (2 byte) scalar zero to be vector 2109 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 2110 predicate(n->as_Vector()->length() == 2); 2111 match(Set dst (ReplicateS zero)); 2112 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 2113 ins_encode %{ 2114 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2115 %} 2116 ins_pipe( fpu_reg_reg ); 2117 %} 2118 2119 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 2120 predicate(n->as_Vector()->length() == 4); 2121 match(Set dst (ReplicateS zero)); 2122 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 2123 ins_encode %{ 2124 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2125 %} 2126 ins_pipe( fpu_reg_reg ); 2127 %} 2128 2129 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 2130 predicate(n->as_Vector()->length() == 8); 2131 match(Set dst (ReplicateS zero)); 2132 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 2133 ins_encode %{ 2134 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2135 %} 2136 ins_pipe( fpu_reg_reg ); 2137 %} 2138 2139 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 2140 predicate(n->as_Vector()->length() == 16); 2141 match(Set dst (ReplicateS zero)); 2142 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 2143 ins_encode %{ 2144 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2145 bool vector256 = true; 2146 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2147 %} 2148 ins_pipe( fpu_reg_reg ); 2149 %} 2150 2151 // Replicate integer (4 byte) scalar to be vector 2152 instruct Repl2I(vecD dst, rRegI src) %{ 2153 predicate(n->as_Vector()->length() == 2); 2154 match(Set dst (ReplicateI src)); 2155 format %{ "movd $dst,$src\n\t" 2156 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2157 ins_encode %{ 2158 __ movdl($dst$$XMMRegister, $src$$Register); 2159 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2160 %} 2161 ins_pipe( fpu_reg_reg ); 2162 %} 2163 2164 instruct Repl4I(vecX dst, rRegI src) %{ 2165 predicate(n->as_Vector()->length() == 4); 2166 match(Set dst (ReplicateI src)); 2167 format %{ "movd $dst,$src\n\t" 2168 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2169 ins_encode %{ 2170 __ movdl($dst$$XMMRegister, $src$$Register); 2171 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2172 %} 2173 ins_pipe( pipe_slow ); 2174 %} 2175 2176 instruct Repl8I(vecY dst, rRegI src) %{ 2177 predicate(n->as_Vector()->length() == 8); 2178 match(Set dst (ReplicateI src)); 2179 format %{ "movd $dst,$src\n\t" 2180 "pshufd $dst,$dst,0x00\n\t" 2181 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2182 ins_encode %{ 2183 __ movdl($dst$$XMMRegister, $src$$Register); 2184 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2185 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2186 %} 2187 ins_pipe( pipe_slow ); 2188 %} 2189 2190 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2191 instruct Repl2I_imm(vecD dst, immI con) %{ 2192 predicate(n->as_Vector()->length() == 2); 2193 match(Set dst (ReplicateI con)); 2194 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2195 ins_encode %{ 2196 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2197 %} 2198 ins_pipe( fpu_reg_reg ); 2199 %} 2200 2201 instruct Repl4I_imm(vecX dst, immI con) %{ 2202 predicate(n->as_Vector()->length() == 4); 2203 match(Set dst (ReplicateI con)); 2204 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2205 "punpcklqdq $dst,$dst" %} 2206 ins_encode %{ 2207 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2208 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2209 %} 2210 ins_pipe( pipe_slow ); 2211 %} 2212 2213 instruct Repl8I_imm(vecY dst, immI con) %{ 2214 predicate(n->as_Vector()->length() == 8); 2215 match(Set dst (ReplicateI con)); 2216 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2217 "punpcklqdq $dst,$dst\n\t" 2218 "vinserti128h $dst,$dst,$dst" %} 2219 ins_encode %{ 2220 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2221 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2222 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2223 %} 2224 ins_pipe( pipe_slow ); 2225 %} 2226 2227 // Integer could be loaded into xmm register directly from memory. 2228 instruct Repl2I_mem(vecD dst, memory mem) %{ 2229 predicate(n->as_Vector()->length() == 2); 2230 match(Set dst (ReplicateI (LoadI mem))); 2231 format %{ "movd $dst,$mem\n\t" 2232 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2233 ins_encode %{ 2234 __ movdl($dst$$XMMRegister, $mem$$Address); 2235 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2236 %} 2237 ins_pipe( fpu_reg_reg ); 2238 %} 2239 2240 instruct Repl4I_mem(vecX dst, memory mem) %{ 2241 predicate(n->as_Vector()->length() == 4); 2242 match(Set dst (ReplicateI (LoadI mem))); 2243 format %{ "movd $dst,$mem\n\t" 2244 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2245 ins_encode %{ 2246 __ movdl($dst$$XMMRegister, $mem$$Address); 2247 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2248 %} 2249 ins_pipe( pipe_slow ); 2250 %} 2251 2252 instruct Repl8I_mem(vecY dst, memory mem) %{ 2253 predicate(n->as_Vector()->length() == 8); 2254 match(Set dst (ReplicateI (LoadI mem))); 2255 format %{ "movd $dst,$mem\n\t" 2256 "pshufd $dst,$dst,0x00\n\t" 2257 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2258 ins_encode %{ 2259 __ movdl($dst$$XMMRegister, $mem$$Address); 2260 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2261 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2262 %} 2263 ins_pipe( pipe_slow ); 2264 %} 2265 2266 // Replicate integer (4 byte) scalar zero to be vector 2267 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2268 predicate(n->as_Vector()->length() == 2); 2269 match(Set dst (ReplicateI zero)); 2270 format %{ "pxor $dst,$dst\t! replicate2I" %} 2271 ins_encode %{ 2272 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2273 %} 2274 ins_pipe( fpu_reg_reg ); 2275 %} 2276 2277 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2278 predicate(n->as_Vector()->length() == 4); 2279 match(Set dst (ReplicateI zero)); 2280 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2281 ins_encode %{ 2282 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2283 %} 2284 ins_pipe( fpu_reg_reg ); 2285 %} 2286 2287 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2288 predicate(n->as_Vector()->length() == 8); 2289 match(Set dst (ReplicateI zero)); 2290 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2291 ins_encode %{ 2292 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2293 bool vector256 = true; 2294 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2295 %} 2296 ins_pipe( fpu_reg_reg ); 2297 %} 2298 2299 // Replicate long (8 byte) scalar to be vector 2300 #ifdef _LP64 2301 instruct Repl2L(vecX dst, rRegL src) %{ 2302 predicate(n->as_Vector()->length() == 2); 2303 match(Set dst (ReplicateL src)); 2304 format %{ "movdq $dst,$src\n\t" 2305 "punpcklqdq $dst,$dst\t! replicate2L" %} 2306 ins_encode %{ 2307 __ movdq($dst$$XMMRegister, $src$$Register); 2308 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2309 %} 2310 ins_pipe( pipe_slow ); 2311 %} 2312 2313 instruct Repl4L(vecY dst, rRegL src) %{ 2314 predicate(n->as_Vector()->length() == 4); 2315 match(Set dst (ReplicateL src)); 2316 format %{ "movdq $dst,$src\n\t" 2317 "punpcklqdq $dst,$dst\n\t" 2318 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2319 ins_encode %{ 2320 __ movdq($dst$$XMMRegister, $src$$Register); 2321 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2322 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2323 %} 2324 ins_pipe( pipe_slow ); 2325 %} 2326 #else // _LP64 2327 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2328 predicate(n->as_Vector()->length() == 2); 2329 match(Set dst (ReplicateL src)); 2330 effect(TEMP dst, USE src, TEMP tmp); 2331 format %{ "movdl $dst,$src.lo\n\t" 2332 "movdl $tmp,$src.hi\n\t" 2333 "punpckldq $dst,$tmp\n\t" 2334 "punpcklqdq $dst,$dst\t! replicate2L"%} 2335 ins_encode %{ 2336 __ movdl($dst$$XMMRegister, $src$$Register); 2337 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2338 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2339 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2340 %} 2341 ins_pipe( pipe_slow ); 2342 %} 2343 2344 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2345 predicate(n->as_Vector()->length() == 4); 2346 match(Set dst (ReplicateL src)); 2347 effect(TEMP dst, USE src, TEMP tmp); 2348 format %{ "movdl $dst,$src.lo\n\t" 2349 "movdl $tmp,$src.hi\n\t" 2350 "punpckldq $dst,$tmp\n\t" 2351 "punpcklqdq $dst,$dst\n\t" 2352 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2353 ins_encode %{ 2354 __ movdl($dst$$XMMRegister, $src$$Register); 2355 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2356 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2357 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2358 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2359 %} 2360 ins_pipe( pipe_slow ); 2361 %} 2362 #endif // _LP64 2363 2364 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2365 instruct Repl2L_imm(vecX dst, immL con) %{ 2366 predicate(n->as_Vector()->length() == 2); 2367 match(Set dst (ReplicateL con)); 2368 format %{ "movq $dst,[$constantaddress]\n\t" 2369 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2370 ins_encode %{ 2371 __ movq($dst$$XMMRegister, $constantaddress($con)); 2372 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2373 %} 2374 ins_pipe( pipe_slow ); 2375 %} 2376 2377 instruct Repl4L_imm(vecY dst, immL con) %{ 2378 predicate(n->as_Vector()->length() == 4); 2379 match(Set dst (ReplicateL con)); 2380 format %{ "movq $dst,[$constantaddress]\n\t" 2381 "punpcklqdq $dst,$dst\n\t" 2382 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2383 ins_encode %{ 2384 __ movq($dst$$XMMRegister, $constantaddress($con)); 2385 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2386 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2387 %} 2388 ins_pipe( pipe_slow ); 2389 %} 2390 2391 // Long could be loaded into xmm register directly from memory. 2392 instruct Repl2L_mem(vecX dst, memory mem) %{ 2393 predicate(n->as_Vector()->length() == 2); 2394 match(Set dst (ReplicateL (LoadL mem))); 2395 format %{ "movq $dst,$mem\n\t" 2396 "punpcklqdq $dst,$dst\t! replicate2L" %} 2397 ins_encode %{ 2398 __ movq($dst$$XMMRegister, $mem$$Address); 2399 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2400 %} 2401 ins_pipe( pipe_slow ); 2402 %} 2403 2404 instruct Repl4L_mem(vecY dst, memory mem) %{ 2405 predicate(n->as_Vector()->length() == 4); 2406 match(Set dst (ReplicateL (LoadL mem))); 2407 format %{ "movq $dst,$mem\n\t" 2408 "punpcklqdq $dst,$dst\n\t" 2409 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2410 ins_encode %{ 2411 __ movq($dst$$XMMRegister, $mem$$Address); 2412 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2413 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2414 %} 2415 ins_pipe( pipe_slow ); 2416 %} 2417 2418 // Replicate long (8 byte) scalar zero to be vector 2419 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2420 predicate(n->as_Vector()->length() == 2); 2421 match(Set dst (ReplicateL zero)); 2422 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2423 ins_encode %{ 2424 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2425 %} 2426 ins_pipe( fpu_reg_reg ); 2427 %} 2428 2429 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2430 predicate(n->as_Vector()->length() == 4); 2431 match(Set dst (ReplicateL zero)); 2432 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2433 ins_encode %{ 2434 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2435 bool vector256 = true; 2436 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2437 %} 2438 ins_pipe( fpu_reg_reg ); 2439 %} 2440 2441 // Replicate float (4 byte) scalar to be vector 2442 instruct Repl2F(vecD dst, regF src) %{ 2443 predicate(n->as_Vector()->length() == 2); 2444 match(Set dst (ReplicateF src)); 2445 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2446 ins_encode %{ 2447 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2448 %} 2449 ins_pipe( fpu_reg_reg ); 2450 %} 2451 2452 instruct Repl4F(vecX dst, regF src) %{ 2453 predicate(n->as_Vector()->length() == 4); 2454 match(Set dst (ReplicateF src)); 2455 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2456 ins_encode %{ 2457 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2458 %} 2459 ins_pipe( pipe_slow ); 2460 %} 2461 2462 instruct Repl8F(vecY dst, regF src) %{ 2463 predicate(n->as_Vector()->length() == 8); 2464 match(Set dst (ReplicateF src)); 2465 format %{ "pshufd $dst,$src,0x00\n\t" 2466 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2467 ins_encode %{ 2468 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2469 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2470 %} 2471 ins_pipe( pipe_slow ); 2472 %} 2473 2474 // Replicate float (4 byte) scalar zero to be vector 2475 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2476 predicate(n->as_Vector()->length() == 2); 2477 match(Set dst (ReplicateF zero)); 2478 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2479 ins_encode %{ 2480 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2481 %} 2482 ins_pipe( fpu_reg_reg ); 2483 %} 2484 2485 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2486 predicate(n->as_Vector()->length() == 4); 2487 match(Set dst (ReplicateF zero)); 2488 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2489 ins_encode %{ 2490 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2491 %} 2492 ins_pipe( fpu_reg_reg ); 2493 %} 2494 2495 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2496 predicate(n->as_Vector()->length() == 8); 2497 match(Set dst (ReplicateF zero)); 2498 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2499 ins_encode %{ 2500 bool vector256 = true; 2501 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2502 %} 2503 ins_pipe( fpu_reg_reg ); 2504 %} 2505 2506 // Replicate double (8 bytes) scalar to be vector 2507 instruct Repl2D(vecX dst, regD src) %{ 2508 predicate(n->as_Vector()->length() == 2); 2509 match(Set dst (ReplicateD src)); 2510 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2511 ins_encode %{ 2512 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2513 %} 2514 ins_pipe( pipe_slow ); 2515 %} 2516 2517 instruct Repl4D(vecY dst, regD src) %{ 2518 predicate(n->as_Vector()->length() == 4); 2519 match(Set dst (ReplicateD src)); 2520 format %{ "pshufd $dst,$src,0x44\n\t" 2521 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2522 ins_encode %{ 2523 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2524 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2525 %} 2526 ins_pipe( pipe_slow ); 2527 %} 2528 2529 // Replicate double (8 byte) scalar zero to be vector 2530 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2531 predicate(n->as_Vector()->length() == 2); 2532 match(Set dst (ReplicateD zero)); 2533 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2534 ins_encode %{ 2535 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2536 %} 2537 ins_pipe( fpu_reg_reg ); 2538 %} 2539 2540 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2541 predicate(n->as_Vector()->length() == 4); 2542 match(Set dst (ReplicateD zero)); 2543 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2544 ins_encode %{ 2545 bool vector256 = true; 2546 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2547 %} 2548 ins_pipe( fpu_reg_reg ); 2549 %} 2550 2551 // ====================REDUCTION ARITHMETIC======================================= 2552 2553 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 2554 predicate(UseSSE > 2 && UseAVX == 0); 2555 match(Set dst (AddReductionVI src1 src2)); 2556 effect(TEMP tmp2, TEMP tmp); 2557 format %{ "movdqu $tmp2,$src2\n\t" 2558 "phaddd $tmp2,$tmp2\n\t" 2559 "movd $tmp,$src1\n\t" 2560 "paddd $tmp,$tmp2\n\t" 2561 "movd $dst,$tmp\t! add reduction2I" %} 2562 ins_encode %{ 2563 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 2564 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 2565 __ movdl($tmp$$XMMRegister, $src1$$Register); 2566 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 2567 __ movdl($dst$$Register, $tmp$$XMMRegister); 2568 %} 2569 ins_pipe( pipe_slow ); 2570 %} 2571 2572 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 2573 predicate(UseAVX > 0); 2574 match(Set dst (AddReductionVI src1 src2)); 2575 effect(TEMP tmp, TEMP tmp2); 2576 format %{ "vphaddd $tmp,$src2,$src2\n\t" 2577 "movd $tmp2,$src1\n\t" 2578 "vpaddd $tmp2,$tmp2,$tmp\n\t" 2579 "movd $dst,$tmp2\t! add reduction2I" %} 2580 ins_encode %{ 2581 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); 2582 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2583 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); 2584 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2585 %} 2586 ins_pipe( pipe_slow ); 2587 %} 2588 2589 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 2590 predicate(UseSSE > 2 && UseAVX == 0); 2591 match(Set dst (AddReductionVI src1 src2)); 2592 effect(TEMP tmp2, TEMP tmp); 2593 format %{ "movdqu $tmp2,$src2\n\t" 2594 "phaddd $tmp2,$tmp2\n\t" 2595 "phaddd $tmp2,$tmp2\n\t" 2596 "movd $tmp,$src1\n\t" 2597 "paddd $tmp,$tmp2\n\t" 2598 "movd $dst,$tmp\t! add reduction4I" %} 2599 ins_encode %{ 2600 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 2601 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 2602 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 2603 __ movdl($tmp$$XMMRegister, $src1$$Register); 2604 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 2605 __ movdl($dst$$Register, $tmp$$XMMRegister); 2606 %} 2607 ins_pipe( pipe_slow ); 2608 %} 2609 2610 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 2611 predicate(UseAVX > 0); 2612 match(Set dst (AddReductionVI src1 src2)); 2613 effect(TEMP tmp, TEMP tmp2); 2614 format %{ "vphaddd $tmp,$src2,$src2\n\t" 2615 "vphaddd $tmp,$tmp,$tmp2\n\t" 2616 "movd $tmp2,$src1\n\t" 2617 "vpaddd $tmp2,$tmp2,$tmp\n\t" 2618 "movd $dst,$tmp2\t! add reduction4I" %} 2619 ins_encode %{ 2620 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); 2621 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2622 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2623 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); 2624 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2625 %} 2626 ins_pipe( pipe_slow ); 2627 %} 2628 2629 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 2630 predicate(UseAVX > 0); 2631 match(Set dst (AddReductionVI src1 src2)); 2632 effect(TEMP tmp, TEMP tmp2); 2633 format %{ "vphaddd $tmp,$src2,$src2\n\t" 2634 "vphaddd $tmp,$tmp,$tmp2\n\t" 2635 "vextractf128 $tmp2,$tmp\n\t" 2636 "vpaddd $tmp,$tmp,$tmp2\n\t" 2637 "movd $tmp2,$src1\n\t" 2638 "vpaddd $tmp2,$tmp2,$tmp\n\t" 2639 "movd $dst,$tmp2\t! add reduction8I" %} 2640 ins_encode %{ 2641 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true); 2642 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true); 2643 __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 2644 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2645 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2646 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); 2647 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2648 %} 2649 ins_pipe( pipe_slow ); 2650 %} 2651 2652 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 2653 predicate(UseSSE >= 1 && UseAVX == 0); 2654 match(Set dst (AddReductionVF src1 src2)); 2655 effect(TEMP tmp, TEMP tmp2); 2656 format %{ "movdqu $tmp,$src1\n\t" 2657 "addss $tmp,$src2\n\t" 2658 "pshufd $tmp2,$src2,0x01\n\t" 2659 "addss $tmp,$tmp2\n\t" 2660 "movdqu $dst,$tmp\t! add reduction2F" %} 2661 ins_encode %{ 2662 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2663 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 2664 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 2665 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2666 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 2667 %} 2668 ins_pipe( pipe_slow ); 2669 %} 2670 2671 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 2672 predicate(UseAVX > 0); 2673 match(Set dst (AddReductionVF src1 src2)); 2674 effect(TEMP tmp2, TEMP tmp); 2675 format %{ "vaddss $tmp2,$src1,$src2\n\t" 2676 "pshufd $tmp,$src2,0x01\n\t" 2677 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 2678 ins_encode %{ 2679 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2680 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 2681 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2682 %} 2683 ins_pipe( pipe_slow ); 2684 %} 2685 2686 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 2687 predicate(UseSSE >= 1 && UseAVX == 0); 2688 match(Set dst (AddReductionVF src1 src2)); 2689 effect(TEMP tmp, TEMP tmp2); 2690 format %{ "movdqu $tmp,$src1\n\t" 2691 "addss $tmp,$src2\n\t" 2692 "pshufd $tmp2,$src2,0x01\n\t" 2693 "addss $tmp,$tmp2\n\t" 2694 "pshufd $tmp2,$src2,0x02\n\t" 2695 "addss $tmp,$tmp2\n\t" 2696 "pshufd $tmp2,$src2,0x03\n\t" 2697 "addss $tmp,$tmp2\n\t" 2698 "movdqu $dst,$tmp\t! add reduction4F" %} 2699 ins_encode %{ 2700 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2701 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 2702 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 2703 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2704 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 2705 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2706 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 2707 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2708 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 2709 %} 2710 ins_pipe( pipe_slow ); 2711 %} 2712 2713 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 2714 predicate(UseAVX > 0); 2715 match(Set dst (AddReductionVF src1 src2)); 2716 effect(TEMP tmp, TEMP tmp2); 2717 format %{ "vaddss $tmp2,$src1,$src2\n\t" 2718 "pshufd $tmp,$src2,0x01\n\t" 2719 "vaddss $tmp2,$tmp2,$tmp\n\t" 2720 "pshufd $tmp,$src2,0x02\n\t" 2721 "vaddss $tmp2,$tmp2,$tmp\n\t" 2722 "pshufd $tmp,$src2,0x03\n\t" 2723 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 2724 ins_encode %{ 2725 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2726 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 2727 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2728 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 2729 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2730 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 2731 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2732 %} 2733 ins_pipe( pipe_slow ); 2734 %} 2735 2736 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 2737 predicate(UseAVX > 0); 2738 match(Set dst (AddReductionVF src1 src2)); 2739 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 2740 format %{ "vaddss $tmp2,$src1,$src2\n\t" 2741 "pshufd $tmp,$src2,0x01\n\t" 2742 "vaddss $tmp2,$tmp2,$tmp\n\t" 2743 "pshufd $tmp,$src2,0x02\n\t" 2744 "vaddss $tmp2,$tmp2,$tmp\n\t" 2745 "pshufd $tmp,$src2,0x03\n\t" 2746 "vaddss $tmp2,$tmp2,$tmp\n\t" 2747 "vextractf128 $tmp3,$src2\n\t" 2748 "vaddss $tmp2,$tmp2,$tmp3\n\t" 2749 "pshufd $tmp,$tmp3,0x01\n\t" 2750 "vaddss $tmp2,$tmp2,$tmp\n\t" 2751 "pshufd $tmp,$tmp3,0x02\n\t" 2752 "vaddss $tmp2,$tmp2,$tmp\n\t" 2753 "pshufd $tmp,$tmp3,0x03\n\t" 2754 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 2755 ins_encode %{ 2756 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2757 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 2758 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2759 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 2760 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2761 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 2762 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2763 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 2764 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 2765 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 2766 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2767 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 2768 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2769 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 2770 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2771 %} 2772 ins_pipe( pipe_slow ); 2773 %} 2774 2775 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 2776 predicate(UseSSE >= 1 && UseAVX == 0); 2777 match(Set dst (AddReductionVD src1 src2)); 2778 effect(TEMP tmp, TEMP dst); 2779 format %{ "movdqu $tmp,$src1\n\t" 2780 "addsd $tmp,$src2\n\t" 2781 "pshufd $dst,$src2,0xE\n\t" 2782 "addsd $dst,$tmp\t! add reduction2D" %} 2783 ins_encode %{ 2784 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2785 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 2786 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 2787 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 2788 %} 2789 ins_pipe( pipe_slow ); 2790 %} 2791 2792 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 2793 predicate(UseAVX > 0); 2794 match(Set dst (AddReductionVD src1 src2)); 2795 effect(TEMP tmp, TEMP tmp2); 2796 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 2797 "pshufd $tmp,$src2,0xE\n\t" 2798 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 2799 ins_encode %{ 2800 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2801 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 2802 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2803 %} 2804 ins_pipe( pipe_slow ); 2805 %} 2806 2807 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 2808 predicate(UseAVX > 0); 2809 match(Set dst (AddReductionVD src1 src2)); 2810 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 2811 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 2812 "pshufd $tmp,$src2,0xE\n\t" 2813 "vaddsd $tmp2,$tmp2,$tmp\n\t" 2814 "vextractf128 $tmp3,$src2\n\t" 2815 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 2816 "pshufd $tmp,$tmp3,0xE\n\t" 2817 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 2818 ins_encode %{ 2819 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2820 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 2821 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2822 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 2823 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 2824 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 2825 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2826 %} 2827 ins_pipe( pipe_slow ); 2828 %} 2829 2830 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 2831 predicate(UseSSE > 3 && UseAVX == 0); 2832 match(Set dst (MulReductionVI src1 src2)); 2833 effect(TEMP tmp, TEMP tmp2); 2834 format %{ "pshufd $tmp2,$src2,0x1\n\t" 2835 "pmulld $tmp2,$src2\n\t" 2836 "movd $tmp,$src1\n\t" 2837 "pmulld $tmp2,$tmp\n\t" 2838 "movd $dst,$tmp2\t! mul reduction2I" %} 2839 ins_encode %{ 2840 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 2841 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 2842 __ movdl($tmp$$XMMRegister, $src1$$Register); 2843 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 2844 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2845 %} 2846 ins_pipe( pipe_slow ); 2847 %} 2848 2849 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 2850 predicate(UseAVX > 0); 2851 match(Set dst (MulReductionVI src1 src2)); 2852 effect(TEMP tmp, TEMP tmp2); 2853 format %{ "pshufd $tmp2,$src2,0x1\n\t" 2854 "vpmulld $tmp,$src2,$tmp2\n\t" 2855 "movd $tmp2,$src1\n\t" 2856 "vpmulld $tmp2,$tmp,$tmp2\n\t" 2857 "movd $dst,$tmp2\t! mul reduction2I" %} 2858 ins_encode %{ 2859 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 2860 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false); 2861 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2862 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2863 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2864 %} 2865 ins_pipe( pipe_slow ); 2866 %} 2867 2868 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 2869 predicate(UseSSE > 3 && UseAVX == 0); 2870 match(Set dst (MulReductionVI src1 src2)); 2871 effect(TEMP tmp, TEMP tmp2); 2872 format %{ "pshufd $tmp2,$src2,0xE\n\t" 2873 "pmulld $tmp2,$src2\n\t" 2874 "pshufd $tmp,$tmp2,0x1\n\t" 2875 "pmulld $tmp2,$tmp\n\t" 2876 "movd $tmp,$src1\n\t" 2877 "pmulld $tmp2,$tmp\n\t" 2878 "movd $dst,$tmp2\t! mul reduction4I" %} 2879 ins_encode %{ 2880 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 2881 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 2882 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 2883 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 2884 __ movdl($tmp$$XMMRegister, $src1$$Register); 2885 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 2886 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2887 %} 2888 ins_pipe( pipe_slow ); 2889 %} 2890 2891 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 2892 predicate(UseAVX > 0); 2893 match(Set dst (MulReductionVI src1 src2)); 2894 effect(TEMP tmp, TEMP tmp2); 2895 format %{ "pshufd $tmp2,$src2,0xE\n\t" 2896 "vpmulld $tmp,$src2,$tmp2\n\t" 2897 "pshufd $tmp2,$tmp,0x1\n\t" 2898 "vpmulld $tmp,$tmp,$tmp2\n\t" 2899 "movd $tmp2,$src1\n\t" 2900 "vpmulld $tmp2,$tmp,$tmp2\n\t" 2901 "movd $dst,$tmp2\t! mul reduction4I" %} 2902 ins_encode %{ 2903 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 2904 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false); 2905 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 2906 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2907 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2908 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2909 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2910 %} 2911 ins_pipe( pipe_slow ); 2912 %} 2913 2914 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 2915 predicate(UseAVX > 0); 2916 match(Set dst (MulReductionVI src1 src2)); 2917 effect(TEMP tmp, TEMP tmp2); 2918 format %{ "vextractf128 $tmp,$src2\n\t" 2919 "vpmulld $tmp,$tmp,$src2\n\t" 2920 "pshufd $tmp2,$tmp,0xE\n\t" 2921 "vpmulld $tmp,$tmp,$tmp2\n\t" 2922 "pshufd $tmp2,$tmp,0x1\n\t" 2923 "vpmulld $tmp,$tmp,$tmp2\n\t" 2924 "movd $tmp2,$src1\n\t" 2925 "vpmulld $tmp2,$tmp,$tmp2\n\t" 2926 "movd $dst,$tmp2\t! mul reduction8I" %} 2927 ins_encode %{ 2928 __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister); 2929 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false); 2930 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 2931 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2932 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 2933 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2934 __ movdl($tmp2$$XMMRegister, $src1$$Register); 2935 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); 2936 __ movdl($dst$$Register, $tmp2$$XMMRegister); 2937 %} 2938 ins_pipe( pipe_slow ); 2939 %} 2940 2941 instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 2942 predicate(UseSSE >= 1 && UseAVX == 0); 2943 match(Set dst (MulReductionVF src1 src2)); 2944 effect(TEMP tmp, TEMP tmp2); 2945 format %{ "movdqu $tmp,$src1\n\t" 2946 "mulss $tmp,$src2\n\t" 2947 "pshufd $tmp2,$src2,0x01\n\t" 2948 "mulss $tmp,$tmp2\n\t" 2949 "movdqu $dst,$tmp\t! add reduction2F" %} 2950 ins_encode %{ 2951 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2952 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 2953 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 2954 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2955 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 2956 %} 2957 ins_pipe( pipe_slow ); 2958 %} 2959 2960 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 2961 predicate(UseAVX > 0); 2962 match(Set dst (MulReductionVF src1 src2)); 2963 effect(TEMP tmp, TEMP tmp2); 2964 format %{ "vmulss $tmp2,$src1,$src2\n\t" 2965 "pshufd $tmp,$src2,0x01\n\t" 2966 "vmulss $dst,$tmp2,$tmp\t! add reduction2F" %} 2967 ins_encode %{ 2968 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2969 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 2970 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 2971 %} 2972 ins_pipe( pipe_slow ); 2973 %} 2974 2975 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 2976 predicate(UseSSE >= 1 && UseAVX == 0); 2977 match(Set dst (MulReductionVF src1 src2)); 2978 effect(TEMP tmp, TEMP tmp2); 2979 format %{ "movdqu $tmp,$src1\n\t" 2980 "mulss $tmp,$src2\n\t" 2981 "pshufd $tmp2,$src2,0x01\n\t" 2982 "mulss $tmp,$tmp2\n\t" 2983 "pshufd $tmp2,$src2,0x02\n\t" 2984 "mulss $tmp,$tmp2\n\t" 2985 "pshufd $tmp2,$src2,0x03\n\t" 2986 "mulss $tmp,$tmp2\n\t" 2987 "movdqu $dst,$tmp\t! add reduction4F" %} 2988 ins_encode %{ 2989 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 2990 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 2991 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 2992 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2993 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 2994 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2995 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 2996 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 2997 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 2998 %} 2999 ins_pipe( pipe_slow ); 3000 %} 3001 3002 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 3003 predicate(UseAVX > 0); 3004 match(Set dst (MulReductionVF src1 src2)); 3005 effect(TEMP tmp, TEMP tmp2); 3006 format %{ "vmulss $tmp2,$src1,$src2\n\t" 3007 "pshufd $tmp,$src2,0x01\n\t" 3008 "vmulss $tmp2,$tmp2,$tmp\n\t" 3009 "pshufd $tmp,$src2,0x02\n\t" 3010 "vmulss $tmp2,$tmp2,$tmp\n\t" 3011 "pshufd $tmp,$src2,0x03\n\t" 3012 "vmulss $dst,$tmp2,$tmp\t! add reduction4F" %} 3013 ins_encode %{ 3014 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3015 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 3016 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3017 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 3018 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3019 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 3020 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3021 %} 3022 ins_pipe( pipe_slow ); 3023 %} 3024 3025 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 3026 predicate(UseAVX > 0); 3027 match(Set dst (MulReductionVF src1 src2)); 3028 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 3029 format %{ "vmulss $tmp2,$src1,$src2\n\t" 3030 "pshufd $tmp,$src2,0x01\n\t" 3031 "vmulss $tmp2,$tmp2,$tmp\n\t" 3032 "pshufd $tmp,$src2,0x02\n\t" 3033 "vmulss $tmp2,$tmp2,$tmp\n\t" 3034 "pshufd $tmp,$src2,0x03\n\t" 3035 "vmulss $tmp2,$tmp2,$tmp\n\t" 3036 "vextractf128 $tmp3,$src2\n\t" 3037 "vmulss $tmp2,$tmp2,$tmp3\n\t" 3038 "pshufd $tmp,$tmp3,0x01\n\t" 3039 "vmulss $tmp2,$tmp2,$tmp\n\t" 3040 "pshufd $tmp,$tmp3,0x02\n\t" 3041 "vmulss $tmp2,$tmp2,$tmp\n\t" 3042 "pshufd $tmp,$tmp3,0x03\n\t" 3043 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 3044 ins_encode %{ 3045 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3046 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 3047 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3048 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 3049 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3050 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 3051 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3052 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 3053 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 3054 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 3055 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3056 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 3057 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3058 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 3059 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3060 %} 3061 ins_pipe( pipe_slow ); 3062 %} 3063 3064 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 3065 predicate(UseSSE >= 1 && UseAVX == 0); 3066 match(Set dst (MulReductionVD src1 src2)); 3067 effect(TEMP tmp, TEMP dst); 3068 format %{ "movdqu $tmp,$src1\n\t" 3069 "mulsd $tmp,$src2\n\t" 3070 "pshufd $dst,$src2,0xE\n\t" 3071 "mulsd $dst,$tmp\t! add reduction2D" %} 3072 ins_encode %{ 3073 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 3074 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 3075 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 3076 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 3077 %} 3078 ins_pipe( pipe_slow ); 3079 %} 3080 3081 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 3082 predicate(UseAVX > 0); 3083 match(Set dst (MulReductionVD src1 src2)); 3084 effect(TEMP tmp, TEMP tmp2); 3085 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 3086 "pshufd $tmp,$src2,0xE\n\t" 3087 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 3088 ins_encode %{ 3089 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3090 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 3091 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3092 %} 3093 ins_pipe( pipe_slow ); 3094 %} 3095 3096 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 3097 predicate(UseAVX > 0); 3098 match(Set dst (MulReductionVD src1 src2)); 3099 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 3100 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 3101 "pshufd $tmp,$src2,0xE\n\t" 3102 "vmulsd $tmp2,$tmp2,$tmp\n\t" 3103 "vextractf128 $tmp3,$src2\n\t" 3104 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 3105 "pshufd $tmp,$tmp3,0xE\n\t" 3106 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 3107 ins_encode %{ 3108 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3109 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 3110 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3111 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 3112 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 3113 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 3114 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 // ====================VECTOR ARITHMETIC======================================= 3120 3121 // --------------------------------- ADD -------------------------------------- 3122 3123 // Bytes vector add 3124 instruct vadd4B(vecS dst, vecS src) %{ 3125 predicate(n->as_Vector()->length() == 4); 3126 match(Set dst (AddVB dst src)); 3127 format %{ "paddb $dst,$src\t! add packed4B" %} 3128 ins_encode %{ 3129 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3130 %} 3131 ins_pipe( pipe_slow ); 3132 %} 3133 3134 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 3135 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3136 match(Set dst (AddVB src1 src2)); 3137 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 3138 ins_encode %{ 3139 bool vector256 = false; 3140 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3141 %} 3142 ins_pipe( pipe_slow ); 3143 %} 3144 3145 instruct vadd8B(vecD dst, vecD src) %{ 3146 predicate(n->as_Vector()->length() == 8); 3147 match(Set dst (AddVB dst src)); 3148 format %{ "paddb $dst,$src\t! add packed8B" %} 3149 ins_encode %{ 3150 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3151 %} 3152 ins_pipe( pipe_slow ); 3153 %} 3154 3155 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 3156 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3157 match(Set dst (AddVB src1 src2)); 3158 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 3159 ins_encode %{ 3160 bool vector256 = false; 3161 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3162 %} 3163 ins_pipe( pipe_slow ); 3164 %} 3165 3166 instruct vadd16B(vecX dst, vecX src) %{ 3167 predicate(n->as_Vector()->length() == 16); 3168 match(Set dst (AddVB dst src)); 3169 format %{ "paddb $dst,$src\t! add packed16B" %} 3170 ins_encode %{ 3171 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 3172 %} 3173 ins_pipe( pipe_slow ); 3174 %} 3175 3176 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 3177 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3178 match(Set dst (AddVB src1 src2)); 3179 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 3180 ins_encode %{ 3181 bool vector256 = false; 3182 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3183 %} 3184 ins_pipe( pipe_slow ); 3185 %} 3186 3187 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 3188 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3189 match(Set dst (AddVB src (LoadVector mem))); 3190 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 3191 ins_encode %{ 3192 bool vector256 = false; 3193 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3194 %} 3195 ins_pipe( pipe_slow ); 3196 %} 3197 3198 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 3199 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3200 match(Set dst (AddVB src1 src2)); 3201 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 3202 ins_encode %{ 3203 bool vector256 = true; 3204 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3205 %} 3206 ins_pipe( pipe_slow ); 3207 %} 3208 3209 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 3210 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3211 match(Set dst (AddVB src (LoadVector mem))); 3212 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 3213 ins_encode %{ 3214 bool vector256 = true; 3215 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3216 %} 3217 ins_pipe( pipe_slow ); 3218 %} 3219 3220 // Shorts/Chars vector add 3221 instruct vadd2S(vecS dst, vecS src) %{ 3222 predicate(n->as_Vector()->length() == 2); 3223 match(Set dst (AddVS dst src)); 3224 format %{ "paddw $dst,$src\t! add packed2S" %} 3225 ins_encode %{ 3226 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 3227 %} 3228 ins_pipe( pipe_slow ); 3229 %} 3230 3231 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 3232 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3233 match(Set dst (AddVS src1 src2)); 3234 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 3235 ins_encode %{ 3236 bool vector256 = false; 3237 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3238 %} 3239 ins_pipe( pipe_slow ); 3240 %} 3241 3242 instruct vadd4S(vecD dst, vecD src) %{ 3243 predicate(n->as_Vector()->length() == 4); 3244 match(Set dst (AddVS dst src)); 3245 format %{ "paddw $dst,$src\t! add packed4S" %} 3246 ins_encode %{ 3247 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 3248 %} 3249 ins_pipe( pipe_slow ); 3250 %} 3251 3252 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 3253 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3254 match(Set dst (AddVS src1 src2)); 3255 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 3256 ins_encode %{ 3257 bool vector256 = false; 3258 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3259 %} 3260 ins_pipe( pipe_slow ); 3261 %} 3262 3263 instruct vadd8S(vecX dst, vecX src) %{ 3264 predicate(n->as_Vector()->length() == 8); 3265 match(Set dst (AddVS dst src)); 3266 format %{ "paddw $dst,$src\t! add packed8S" %} 3267 ins_encode %{ 3268 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 3269 %} 3270 ins_pipe( pipe_slow ); 3271 %} 3272 3273 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 3274 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3275 match(Set dst (AddVS src1 src2)); 3276 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 3277 ins_encode %{ 3278 bool vector256 = false; 3279 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3280 %} 3281 ins_pipe( pipe_slow ); 3282 %} 3283 3284 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 3285 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3286 match(Set dst (AddVS src (LoadVector mem))); 3287 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 3288 ins_encode %{ 3289 bool vector256 = false; 3290 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3291 %} 3292 ins_pipe( pipe_slow ); 3293 %} 3294 3295 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 3296 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3297 match(Set dst (AddVS src1 src2)); 3298 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 3299 ins_encode %{ 3300 bool vector256 = true; 3301 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3302 %} 3303 ins_pipe( pipe_slow ); 3304 %} 3305 3306 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 3307 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3308 match(Set dst (AddVS src (LoadVector mem))); 3309 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 3310 ins_encode %{ 3311 bool vector256 = true; 3312 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3313 %} 3314 ins_pipe( pipe_slow ); 3315 %} 3316 3317 // Integers vector add 3318 instruct vadd2I(vecD dst, vecD src) %{ 3319 predicate(n->as_Vector()->length() == 2); 3320 match(Set dst (AddVI dst src)); 3321 format %{ "paddd $dst,$src\t! add packed2I" %} 3322 ins_encode %{ 3323 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 3324 %} 3325 ins_pipe( pipe_slow ); 3326 %} 3327 3328 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 3329 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3330 match(Set dst (AddVI src1 src2)); 3331 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 3332 ins_encode %{ 3333 bool vector256 = false; 3334 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3335 %} 3336 ins_pipe( pipe_slow ); 3337 %} 3338 3339 instruct vadd4I(vecX dst, vecX src) %{ 3340 predicate(n->as_Vector()->length() == 4); 3341 match(Set dst (AddVI dst src)); 3342 format %{ "paddd $dst,$src\t! add packed4I" %} 3343 ins_encode %{ 3344 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 3345 %} 3346 ins_pipe( pipe_slow ); 3347 %} 3348 3349 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 3350 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3351 match(Set dst (AddVI src1 src2)); 3352 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 3353 ins_encode %{ 3354 bool vector256 = false; 3355 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3356 %} 3357 ins_pipe( pipe_slow ); 3358 %} 3359 3360 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 3361 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3362 match(Set dst (AddVI src (LoadVector mem))); 3363 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 3364 ins_encode %{ 3365 bool vector256 = false; 3366 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3367 %} 3368 ins_pipe( pipe_slow ); 3369 %} 3370 3371 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 3372 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3373 match(Set dst (AddVI src1 src2)); 3374 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 3375 ins_encode %{ 3376 bool vector256 = true; 3377 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3378 %} 3379 ins_pipe( pipe_slow ); 3380 %} 3381 3382 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 3383 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3384 match(Set dst (AddVI src (LoadVector mem))); 3385 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 3386 ins_encode %{ 3387 bool vector256 = true; 3388 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3389 %} 3390 ins_pipe( pipe_slow ); 3391 %} 3392 3393 // Longs vector add 3394 instruct vadd2L(vecX dst, vecX src) %{ 3395 predicate(n->as_Vector()->length() == 2); 3396 match(Set dst (AddVL dst src)); 3397 format %{ "paddq $dst,$src\t! add packed2L" %} 3398 ins_encode %{ 3399 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 3400 %} 3401 ins_pipe( pipe_slow ); 3402 %} 3403 3404 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 3405 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3406 match(Set dst (AddVL src1 src2)); 3407 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 3408 ins_encode %{ 3409 bool vector256 = false; 3410 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3411 %} 3412 ins_pipe( pipe_slow ); 3413 %} 3414 3415 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 3416 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3417 match(Set dst (AddVL src (LoadVector mem))); 3418 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 3419 ins_encode %{ 3420 bool vector256 = false; 3421 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3422 %} 3423 ins_pipe( pipe_slow ); 3424 %} 3425 3426 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 3427 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3428 match(Set dst (AddVL src1 src2)); 3429 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 3430 ins_encode %{ 3431 bool vector256 = true; 3432 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3433 %} 3434 ins_pipe( pipe_slow ); 3435 %} 3436 3437 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 3438 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3439 match(Set dst (AddVL src (LoadVector mem))); 3440 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 3441 ins_encode %{ 3442 bool vector256 = true; 3443 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3444 %} 3445 ins_pipe( pipe_slow ); 3446 %} 3447 3448 // Floats vector add 3449 instruct vadd2F(vecD dst, vecD src) %{ 3450 predicate(n->as_Vector()->length() == 2); 3451 match(Set dst (AddVF dst src)); 3452 format %{ "addps $dst,$src\t! add packed2F" %} 3453 ins_encode %{ 3454 __ addps($dst$$XMMRegister, $src$$XMMRegister); 3455 %} 3456 ins_pipe( pipe_slow ); 3457 %} 3458 3459 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 3460 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3461 match(Set dst (AddVF src1 src2)); 3462 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 3463 ins_encode %{ 3464 bool vector256 = false; 3465 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3466 %} 3467 ins_pipe( pipe_slow ); 3468 %} 3469 3470 instruct vadd4F(vecX dst, vecX src) %{ 3471 predicate(n->as_Vector()->length() == 4); 3472 match(Set dst (AddVF dst src)); 3473 format %{ "addps $dst,$src\t! add packed4F" %} 3474 ins_encode %{ 3475 __ addps($dst$$XMMRegister, $src$$XMMRegister); 3476 %} 3477 ins_pipe( pipe_slow ); 3478 %} 3479 3480 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 3481 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3482 match(Set dst (AddVF src1 src2)); 3483 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 3484 ins_encode %{ 3485 bool vector256 = false; 3486 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3487 %} 3488 ins_pipe( pipe_slow ); 3489 %} 3490 3491 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 3492 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3493 match(Set dst (AddVF src (LoadVector mem))); 3494 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 3495 ins_encode %{ 3496 bool vector256 = false; 3497 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3498 %} 3499 ins_pipe( pipe_slow ); 3500 %} 3501 3502 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 3503 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3504 match(Set dst (AddVF src1 src2)); 3505 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 3506 ins_encode %{ 3507 bool vector256 = true; 3508 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3509 %} 3510 ins_pipe( pipe_slow ); 3511 %} 3512 3513 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 3514 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3515 match(Set dst (AddVF src (LoadVector mem))); 3516 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 3517 ins_encode %{ 3518 bool vector256 = true; 3519 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3520 %} 3521 ins_pipe( pipe_slow ); 3522 %} 3523 3524 // Doubles vector add 3525 instruct vadd2D(vecX dst, vecX src) %{ 3526 predicate(n->as_Vector()->length() == 2); 3527 match(Set dst (AddVD dst src)); 3528 format %{ "addpd $dst,$src\t! add packed2D" %} 3529 ins_encode %{ 3530 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 3531 %} 3532 ins_pipe( pipe_slow ); 3533 %} 3534 3535 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 3536 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3537 match(Set dst (AddVD src1 src2)); 3538 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 3539 ins_encode %{ 3540 bool vector256 = false; 3541 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3542 %} 3543 ins_pipe( pipe_slow ); 3544 %} 3545 3546 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 3547 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3548 match(Set dst (AddVD src (LoadVector mem))); 3549 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 3550 ins_encode %{ 3551 bool vector256 = false; 3552 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3553 %} 3554 ins_pipe( pipe_slow ); 3555 %} 3556 3557 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 3558 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3559 match(Set dst (AddVD src1 src2)); 3560 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 3561 ins_encode %{ 3562 bool vector256 = true; 3563 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3564 %} 3565 ins_pipe( pipe_slow ); 3566 %} 3567 3568 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 3569 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3570 match(Set dst (AddVD src (LoadVector mem))); 3571 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 3572 ins_encode %{ 3573 bool vector256 = true; 3574 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3575 %} 3576 ins_pipe( pipe_slow ); 3577 %} 3578 3579 // --------------------------------- SUB -------------------------------------- 3580 3581 // Bytes vector sub 3582 instruct vsub4B(vecS dst, vecS src) %{ 3583 predicate(n->as_Vector()->length() == 4); 3584 match(Set dst (SubVB dst src)); 3585 format %{ "psubb $dst,$src\t! sub packed4B" %} 3586 ins_encode %{ 3587 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3588 %} 3589 ins_pipe( pipe_slow ); 3590 %} 3591 3592 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 3593 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3594 match(Set dst (SubVB src1 src2)); 3595 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 3596 ins_encode %{ 3597 bool vector256 = false; 3598 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3599 %} 3600 ins_pipe( pipe_slow ); 3601 %} 3602 3603 instruct vsub8B(vecD dst, vecD src) %{ 3604 predicate(n->as_Vector()->length() == 8); 3605 match(Set dst (SubVB dst src)); 3606 format %{ "psubb $dst,$src\t! sub packed8B" %} 3607 ins_encode %{ 3608 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3609 %} 3610 ins_pipe( pipe_slow ); 3611 %} 3612 3613 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 3614 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3615 match(Set dst (SubVB src1 src2)); 3616 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 3617 ins_encode %{ 3618 bool vector256 = false; 3619 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3620 %} 3621 ins_pipe( pipe_slow ); 3622 %} 3623 3624 instruct vsub16B(vecX dst, vecX src) %{ 3625 predicate(n->as_Vector()->length() == 16); 3626 match(Set dst (SubVB dst src)); 3627 format %{ "psubb $dst,$src\t! sub packed16B" %} 3628 ins_encode %{ 3629 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3630 %} 3631 ins_pipe( pipe_slow ); 3632 %} 3633 3634 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 3635 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3636 match(Set dst (SubVB src1 src2)); 3637 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 3638 ins_encode %{ 3639 bool vector256 = false; 3640 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3641 %} 3642 ins_pipe( pipe_slow ); 3643 %} 3644 3645 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 3646 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3647 match(Set dst (SubVB src (LoadVector mem))); 3648 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 3649 ins_encode %{ 3650 bool vector256 = false; 3651 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3652 %} 3653 ins_pipe( pipe_slow ); 3654 %} 3655 3656 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 3657 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3658 match(Set dst (SubVB src1 src2)); 3659 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 3660 ins_encode %{ 3661 bool vector256 = true; 3662 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3663 %} 3664 ins_pipe( pipe_slow ); 3665 %} 3666 3667 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 3668 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3669 match(Set dst (SubVB src (LoadVector mem))); 3670 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 3671 ins_encode %{ 3672 bool vector256 = true; 3673 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3674 %} 3675 ins_pipe( pipe_slow ); 3676 %} 3677 3678 // Shorts/Chars vector sub 3679 instruct vsub2S(vecS dst, vecS src) %{ 3680 predicate(n->as_Vector()->length() == 2); 3681 match(Set dst (SubVS dst src)); 3682 format %{ "psubw $dst,$src\t! sub packed2S" %} 3683 ins_encode %{ 3684 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3685 %} 3686 ins_pipe( pipe_slow ); 3687 %} 3688 3689 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 3690 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3691 match(Set dst (SubVS src1 src2)); 3692 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 3693 ins_encode %{ 3694 bool vector256 = false; 3695 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3696 %} 3697 ins_pipe( pipe_slow ); 3698 %} 3699 3700 instruct vsub4S(vecD dst, vecD src) %{ 3701 predicate(n->as_Vector()->length() == 4); 3702 match(Set dst (SubVS dst src)); 3703 format %{ "psubw $dst,$src\t! sub packed4S" %} 3704 ins_encode %{ 3705 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3706 %} 3707 ins_pipe( pipe_slow ); 3708 %} 3709 3710 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 3711 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3712 match(Set dst (SubVS src1 src2)); 3713 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 3714 ins_encode %{ 3715 bool vector256 = false; 3716 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3717 %} 3718 ins_pipe( pipe_slow ); 3719 %} 3720 3721 instruct vsub8S(vecX dst, vecX src) %{ 3722 predicate(n->as_Vector()->length() == 8); 3723 match(Set dst (SubVS dst src)); 3724 format %{ "psubw $dst,$src\t! sub packed8S" %} 3725 ins_encode %{ 3726 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3733 match(Set dst (SubVS src1 src2)); 3734 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3735 ins_encode %{ 3736 bool vector256 = false; 3737 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3738 %} 3739 ins_pipe( pipe_slow ); 3740 %} 3741 3742 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3743 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3744 match(Set dst (SubVS src (LoadVector mem))); 3745 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3746 ins_encode %{ 3747 bool vector256 = false; 3748 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3749 %} 3750 ins_pipe( pipe_slow ); 3751 %} 3752 3753 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3754 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3755 match(Set dst (SubVS src1 src2)); 3756 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3757 ins_encode %{ 3758 bool vector256 = true; 3759 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3760 %} 3761 ins_pipe( pipe_slow ); 3762 %} 3763 3764 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3765 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3766 match(Set dst (SubVS src (LoadVector mem))); 3767 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3768 ins_encode %{ 3769 bool vector256 = true; 3770 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3771 %} 3772 ins_pipe( pipe_slow ); 3773 %} 3774 3775 // Integers vector sub 3776 instruct vsub2I(vecD dst, vecD src) %{ 3777 predicate(n->as_Vector()->length() == 2); 3778 match(Set dst (SubVI dst src)); 3779 format %{ "psubd $dst,$src\t! sub packed2I" %} 3780 ins_encode %{ 3781 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3782 %} 3783 ins_pipe( pipe_slow ); 3784 %} 3785 3786 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3787 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3788 match(Set dst (SubVI src1 src2)); 3789 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3790 ins_encode %{ 3791 bool vector256 = false; 3792 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3793 %} 3794 ins_pipe( pipe_slow ); 3795 %} 3796 3797 instruct vsub4I(vecX dst, vecX src) %{ 3798 predicate(n->as_Vector()->length() == 4); 3799 match(Set dst (SubVI dst src)); 3800 format %{ "psubd $dst,$src\t! sub packed4I" %} 3801 ins_encode %{ 3802 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3803 %} 3804 ins_pipe( pipe_slow ); 3805 %} 3806 3807 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3808 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3809 match(Set dst (SubVI src1 src2)); 3810 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3811 ins_encode %{ 3812 bool vector256 = false; 3813 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3814 %} 3815 ins_pipe( pipe_slow ); 3816 %} 3817 3818 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3819 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3820 match(Set dst (SubVI src (LoadVector mem))); 3821 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3822 ins_encode %{ 3823 bool vector256 = false; 3824 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3825 %} 3826 ins_pipe( pipe_slow ); 3827 %} 3828 3829 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3830 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3831 match(Set dst (SubVI src1 src2)); 3832 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3833 ins_encode %{ 3834 bool vector256 = true; 3835 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3836 %} 3837 ins_pipe( pipe_slow ); 3838 %} 3839 3840 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3841 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3842 match(Set dst (SubVI src (LoadVector mem))); 3843 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3844 ins_encode %{ 3845 bool vector256 = true; 3846 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3847 %} 3848 ins_pipe( pipe_slow ); 3849 %} 3850 3851 // Longs vector sub 3852 instruct vsub2L(vecX dst, vecX src) %{ 3853 predicate(n->as_Vector()->length() == 2); 3854 match(Set dst (SubVL dst src)); 3855 format %{ "psubq $dst,$src\t! sub packed2L" %} 3856 ins_encode %{ 3857 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3858 %} 3859 ins_pipe( pipe_slow ); 3860 %} 3861 3862 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3863 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3864 match(Set dst (SubVL src1 src2)); 3865 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3866 ins_encode %{ 3867 bool vector256 = false; 3868 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3869 %} 3870 ins_pipe( pipe_slow ); 3871 %} 3872 3873 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3874 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3875 match(Set dst (SubVL src (LoadVector mem))); 3876 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3877 ins_encode %{ 3878 bool vector256 = false; 3879 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3880 %} 3881 ins_pipe( pipe_slow ); 3882 %} 3883 3884 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3885 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3886 match(Set dst (SubVL src1 src2)); 3887 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3888 ins_encode %{ 3889 bool vector256 = true; 3890 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3891 %} 3892 ins_pipe( pipe_slow ); 3893 %} 3894 3895 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3896 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3897 match(Set dst (SubVL src (LoadVector mem))); 3898 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3899 ins_encode %{ 3900 bool vector256 = true; 3901 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3902 %} 3903 ins_pipe( pipe_slow ); 3904 %} 3905 3906 // Floats vector sub 3907 instruct vsub2F(vecD dst, vecD src) %{ 3908 predicate(n->as_Vector()->length() == 2); 3909 match(Set dst (SubVF dst src)); 3910 format %{ "subps $dst,$src\t! sub packed2F" %} 3911 ins_encode %{ 3912 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3913 %} 3914 ins_pipe( pipe_slow ); 3915 %} 3916 3917 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3918 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3919 match(Set dst (SubVF src1 src2)); 3920 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3921 ins_encode %{ 3922 bool vector256 = false; 3923 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3924 %} 3925 ins_pipe( pipe_slow ); 3926 %} 3927 3928 instruct vsub4F(vecX dst, vecX src) %{ 3929 predicate(n->as_Vector()->length() == 4); 3930 match(Set dst (SubVF dst src)); 3931 format %{ "subps $dst,$src\t! sub packed4F" %} 3932 ins_encode %{ 3933 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3934 %} 3935 ins_pipe( pipe_slow ); 3936 %} 3937 3938 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3939 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3940 match(Set dst (SubVF src1 src2)); 3941 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3942 ins_encode %{ 3943 bool vector256 = false; 3944 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3945 %} 3946 ins_pipe( pipe_slow ); 3947 %} 3948 3949 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3950 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3951 match(Set dst (SubVF src (LoadVector mem))); 3952 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3953 ins_encode %{ 3954 bool vector256 = false; 3955 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3956 %} 3957 ins_pipe( pipe_slow ); 3958 %} 3959 3960 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3961 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3962 match(Set dst (SubVF src1 src2)); 3963 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3964 ins_encode %{ 3965 bool vector256 = true; 3966 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3967 %} 3968 ins_pipe( pipe_slow ); 3969 %} 3970 3971 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3972 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3973 match(Set dst (SubVF src (LoadVector mem))); 3974 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3975 ins_encode %{ 3976 bool vector256 = true; 3977 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 // Doubles vector sub 3983 instruct vsub2D(vecX dst, vecX src) %{ 3984 predicate(n->as_Vector()->length() == 2); 3985 match(Set dst (SubVD dst src)); 3986 format %{ "subpd $dst,$src\t! sub packed2D" %} 3987 ins_encode %{ 3988 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3989 %} 3990 ins_pipe( pipe_slow ); 3991 %} 3992 3993 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3994 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3995 match(Set dst (SubVD src1 src2)); 3996 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3997 ins_encode %{ 3998 bool vector256 = false; 3999 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4000 %} 4001 ins_pipe( pipe_slow ); 4002 %} 4003 4004 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 4005 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4006 match(Set dst (SubVD src (LoadVector mem))); 4007 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 4008 ins_encode %{ 4009 bool vector256 = false; 4010 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4011 %} 4012 ins_pipe( pipe_slow ); 4013 %} 4014 4015 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 4016 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4017 match(Set dst (SubVD src1 src2)); 4018 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 4019 ins_encode %{ 4020 bool vector256 = true; 4021 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4022 %} 4023 ins_pipe( pipe_slow ); 4024 %} 4025 4026 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 4027 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4028 match(Set dst (SubVD src (LoadVector mem))); 4029 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 4030 ins_encode %{ 4031 bool vector256 = true; 4032 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4033 %} 4034 ins_pipe( pipe_slow ); 4035 %} 4036 4037 // --------------------------------- MUL -------------------------------------- 4038 4039 // Shorts/Chars vector mul 4040 instruct vmul2S(vecS dst, vecS src) %{ 4041 predicate(n->as_Vector()->length() == 2); 4042 match(Set dst (MulVS dst src)); 4043 format %{ "pmullw $dst,$src\t! mul packed2S" %} 4044 ins_encode %{ 4045 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4046 %} 4047 ins_pipe( pipe_slow ); 4048 %} 4049 4050 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 4051 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4052 match(Set dst (MulVS src1 src2)); 4053 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 4054 ins_encode %{ 4055 bool vector256 = false; 4056 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4057 %} 4058 ins_pipe( pipe_slow ); 4059 %} 4060 4061 instruct vmul4S(vecD dst, vecD src) %{ 4062 predicate(n->as_Vector()->length() == 4); 4063 match(Set dst (MulVS dst src)); 4064 format %{ "pmullw $dst,$src\t! mul packed4S" %} 4065 ins_encode %{ 4066 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4067 %} 4068 ins_pipe( pipe_slow ); 4069 %} 4070 4071 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 4072 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4073 match(Set dst (MulVS src1 src2)); 4074 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 4075 ins_encode %{ 4076 bool vector256 = false; 4077 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4078 %} 4079 ins_pipe( pipe_slow ); 4080 %} 4081 4082 instruct vmul8S(vecX dst, vecX src) %{ 4083 predicate(n->as_Vector()->length() == 8); 4084 match(Set dst (MulVS dst src)); 4085 format %{ "pmullw $dst,$src\t! mul packed8S" %} 4086 ins_encode %{ 4087 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 4088 %} 4089 ins_pipe( pipe_slow ); 4090 %} 4091 4092 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 4093 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4094 match(Set dst (MulVS src1 src2)); 4095 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 4096 ins_encode %{ 4097 bool vector256 = false; 4098 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4099 %} 4100 ins_pipe( pipe_slow ); 4101 %} 4102 4103 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 4104 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4105 match(Set dst (MulVS src (LoadVector mem))); 4106 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 4107 ins_encode %{ 4108 bool vector256 = false; 4109 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 4114 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 4115 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4116 match(Set dst (MulVS src1 src2)); 4117 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 4118 ins_encode %{ 4119 bool vector256 = true; 4120 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4121 %} 4122 ins_pipe( pipe_slow ); 4123 %} 4124 4125 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 4126 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4127 match(Set dst (MulVS src (LoadVector mem))); 4128 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 4129 ins_encode %{ 4130 bool vector256 = true; 4131 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4132 %} 4133 ins_pipe( pipe_slow ); 4134 %} 4135 4136 // Integers vector mul (sse4_1) 4137 instruct vmul2I(vecD dst, vecD src) %{ 4138 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 4139 match(Set dst (MulVI dst src)); 4140 format %{ "pmulld $dst,$src\t! mul packed2I" %} 4141 ins_encode %{ 4142 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 4143 %} 4144 ins_pipe( pipe_slow ); 4145 %} 4146 4147 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 4148 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4149 match(Set dst (MulVI src1 src2)); 4150 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 4151 ins_encode %{ 4152 bool vector256 = false; 4153 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4154 %} 4155 ins_pipe( pipe_slow ); 4156 %} 4157 4158 instruct vmul4I(vecX dst, vecX src) %{ 4159 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 4160 match(Set dst (MulVI dst src)); 4161 format %{ "pmulld $dst,$src\t! mul packed4I" %} 4162 ins_encode %{ 4163 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 4164 %} 4165 ins_pipe( pipe_slow ); 4166 %} 4167 4168 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 4169 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4170 match(Set dst (MulVI src1 src2)); 4171 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 4172 ins_encode %{ 4173 bool vector256 = false; 4174 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4175 %} 4176 ins_pipe( pipe_slow ); 4177 %} 4178 4179 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 4180 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4181 match(Set dst (MulVI src (LoadVector mem))); 4182 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 4183 ins_encode %{ 4184 bool vector256 = false; 4185 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4186 %} 4187 ins_pipe( pipe_slow ); 4188 %} 4189 4190 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 4191 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4192 match(Set dst (MulVI src1 src2)); 4193 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 4194 ins_encode %{ 4195 bool vector256 = true; 4196 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4197 %} 4198 ins_pipe( pipe_slow ); 4199 %} 4200 4201 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 4202 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4203 match(Set dst (MulVI src (LoadVector mem))); 4204 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 4205 ins_encode %{ 4206 bool vector256 = true; 4207 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4208 %} 4209 ins_pipe( pipe_slow ); 4210 %} 4211 4212 // Floats vector mul 4213 instruct vmul2F(vecD dst, vecD src) %{ 4214 predicate(n->as_Vector()->length() == 2); 4215 match(Set dst (MulVF dst src)); 4216 format %{ "mulps $dst,$src\t! mul packed2F" %} 4217 ins_encode %{ 4218 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 4219 %} 4220 ins_pipe( pipe_slow ); 4221 %} 4222 4223 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 4224 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4225 match(Set dst (MulVF src1 src2)); 4226 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 4227 ins_encode %{ 4228 bool vector256 = false; 4229 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4230 %} 4231 ins_pipe( pipe_slow ); 4232 %} 4233 4234 instruct vmul4F(vecX dst, vecX src) %{ 4235 predicate(n->as_Vector()->length() == 4); 4236 match(Set dst (MulVF dst src)); 4237 format %{ "mulps $dst,$src\t! mul packed4F" %} 4238 ins_encode %{ 4239 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 4240 %} 4241 ins_pipe( pipe_slow ); 4242 %} 4243 4244 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 4245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4246 match(Set dst (MulVF src1 src2)); 4247 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 4248 ins_encode %{ 4249 bool vector256 = false; 4250 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4251 %} 4252 ins_pipe( pipe_slow ); 4253 %} 4254 4255 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 4256 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4257 match(Set dst (MulVF src (LoadVector mem))); 4258 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 4259 ins_encode %{ 4260 bool vector256 = false; 4261 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4262 %} 4263 ins_pipe( pipe_slow ); 4264 %} 4265 4266 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 4267 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4268 match(Set dst (MulVF src1 src2)); 4269 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 4270 ins_encode %{ 4271 bool vector256 = true; 4272 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4273 %} 4274 ins_pipe( pipe_slow ); 4275 %} 4276 4277 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 4278 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4279 match(Set dst (MulVF src (LoadVector mem))); 4280 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 4281 ins_encode %{ 4282 bool vector256 = true; 4283 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4284 %} 4285 ins_pipe( pipe_slow ); 4286 %} 4287 4288 // Doubles vector mul 4289 instruct vmul2D(vecX dst, vecX src) %{ 4290 predicate(n->as_Vector()->length() == 2); 4291 match(Set dst (MulVD dst src)); 4292 format %{ "mulpd $dst,$src\t! mul packed2D" %} 4293 ins_encode %{ 4294 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 4295 %} 4296 ins_pipe( pipe_slow ); 4297 %} 4298 4299 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 4300 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4301 match(Set dst (MulVD src1 src2)); 4302 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 4303 ins_encode %{ 4304 bool vector256 = false; 4305 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4306 %} 4307 ins_pipe( pipe_slow ); 4308 %} 4309 4310 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 4311 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4312 match(Set dst (MulVD src (LoadVector mem))); 4313 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 4314 ins_encode %{ 4315 bool vector256 = false; 4316 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4317 %} 4318 ins_pipe( pipe_slow ); 4319 %} 4320 4321 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 4322 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4323 match(Set dst (MulVD src1 src2)); 4324 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 4325 ins_encode %{ 4326 bool vector256 = true; 4327 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4328 %} 4329 ins_pipe( pipe_slow ); 4330 %} 4331 4332 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 4333 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4334 match(Set dst (MulVD src (LoadVector mem))); 4335 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 4336 ins_encode %{ 4337 bool vector256 = true; 4338 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4339 %} 4340 ins_pipe( pipe_slow ); 4341 %} 4342 4343 // --------------------------------- DIV -------------------------------------- 4344 4345 // Floats vector div 4346 instruct vdiv2F(vecD dst, vecD src) %{ 4347 predicate(n->as_Vector()->length() == 2); 4348 match(Set dst (DivVF dst src)); 4349 format %{ "divps $dst,$src\t! div packed2F" %} 4350 ins_encode %{ 4351 __ divps($dst$$XMMRegister, $src$$XMMRegister); 4352 %} 4353 ins_pipe( pipe_slow ); 4354 %} 4355 4356 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 4357 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4358 match(Set dst (DivVF src1 src2)); 4359 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 4360 ins_encode %{ 4361 bool vector256 = false; 4362 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4363 %} 4364 ins_pipe( pipe_slow ); 4365 %} 4366 4367 instruct vdiv4F(vecX dst, vecX src) %{ 4368 predicate(n->as_Vector()->length() == 4); 4369 match(Set dst (DivVF dst src)); 4370 format %{ "divps $dst,$src\t! div packed4F" %} 4371 ins_encode %{ 4372 __ divps($dst$$XMMRegister, $src$$XMMRegister); 4373 %} 4374 ins_pipe( pipe_slow ); 4375 %} 4376 4377 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 4378 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4379 match(Set dst (DivVF src1 src2)); 4380 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 4381 ins_encode %{ 4382 bool vector256 = false; 4383 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4384 %} 4385 ins_pipe( pipe_slow ); 4386 %} 4387 4388 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 4389 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4390 match(Set dst (DivVF src (LoadVector mem))); 4391 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 4392 ins_encode %{ 4393 bool vector256 = false; 4394 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4395 %} 4396 ins_pipe( pipe_slow ); 4397 %} 4398 4399 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 4400 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4401 match(Set dst (DivVF src1 src2)); 4402 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 4403 ins_encode %{ 4404 bool vector256 = true; 4405 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4406 %} 4407 ins_pipe( pipe_slow ); 4408 %} 4409 4410 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 4411 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4412 match(Set dst (DivVF src (LoadVector mem))); 4413 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 4414 ins_encode %{ 4415 bool vector256 = true; 4416 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4417 %} 4418 ins_pipe( pipe_slow ); 4419 %} 4420 4421 // Doubles vector div 4422 instruct vdiv2D(vecX dst, vecX src) %{ 4423 predicate(n->as_Vector()->length() == 2); 4424 match(Set dst (DivVD dst src)); 4425 format %{ "divpd $dst,$src\t! div packed2D" %} 4426 ins_encode %{ 4427 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 4428 %} 4429 ins_pipe( pipe_slow ); 4430 %} 4431 4432 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 4433 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4434 match(Set dst (DivVD src1 src2)); 4435 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 4436 ins_encode %{ 4437 bool vector256 = false; 4438 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4439 %} 4440 ins_pipe( pipe_slow ); 4441 %} 4442 4443 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 4444 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4445 match(Set dst (DivVD src (LoadVector mem))); 4446 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 4447 ins_encode %{ 4448 bool vector256 = false; 4449 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4450 %} 4451 ins_pipe( pipe_slow ); 4452 %} 4453 4454 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 4455 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4456 match(Set dst (DivVD src1 src2)); 4457 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 4458 ins_encode %{ 4459 bool vector256 = true; 4460 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4461 %} 4462 ins_pipe( pipe_slow ); 4463 %} 4464 4465 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 4466 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4467 match(Set dst (DivVD src (LoadVector mem))); 4468 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 4469 ins_encode %{ 4470 bool vector256 = true; 4471 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4472 %} 4473 ins_pipe( pipe_slow ); 4474 %} 4475 4476 // ------------------------------ Shift --------------------------------------- 4477 4478 // Left and right shift count vectors are the same on x86 4479 // (only lowest bits of xmm reg are used for count). 4480 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 4481 match(Set dst (LShiftCntV cnt)); 4482 match(Set dst (RShiftCntV cnt)); 4483 format %{ "movd $dst,$cnt\t! load shift count" %} 4484 ins_encode %{ 4485 __ movdl($dst$$XMMRegister, $cnt$$Register); 4486 %} 4487 ins_pipe( pipe_slow ); 4488 %} 4489 4490 // ------------------------------ LeftShift ----------------------------------- 4491 4492 // Shorts/Chars vector left shift 4493 instruct vsll2S(vecS dst, vecS shift) %{ 4494 predicate(n->as_Vector()->length() == 2); 4495 match(Set dst (LShiftVS dst shift)); 4496 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 4497 ins_encode %{ 4498 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 4499 %} 4500 ins_pipe( pipe_slow ); 4501 %} 4502 4503 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 4504 predicate(n->as_Vector()->length() == 2); 4505 match(Set dst (LShiftVS dst shift)); 4506 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 4507 ins_encode %{ 4508 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 4509 %} 4510 ins_pipe( pipe_slow ); 4511 %} 4512 4513 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 4514 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4515 match(Set dst (LShiftVS src shift)); 4516 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 4517 ins_encode %{ 4518 bool vector256 = false; 4519 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4520 %} 4521 ins_pipe( pipe_slow ); 4522 %} 4523 4524 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4525 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4526 match(Set dst (LShiftVS src shift)); 4527 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 4528 ins_encode %{ 4529 bool vector256 = false; 4530 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4531 %} 4532 ins_pipe( pipe_slow ); 4533 %} 4534 4535 instruct vsll4S(vecD dst, vecS shift) %{ 4536 predicate(n->as_Vector()->length() == 4); 4537 match(Set dst (LShiftVS dst shift)); 4538 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 4539 ins_encode %{ 4540 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 4541 %} 4542 ins_pipe( pipe_slow ); 4543 %} 4544 4545 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 4546 predicate(n->as_Vector()->length() == 4); 4547 match(Set dst (LShiftVS dst shift)); 4548 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 4549 ins_encode %{ 4550 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 4551 %} 4552 ins_pipe( pipe_slow ); 4553 %} 4554 4555 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 4556 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4557 match(Set dst (LShiftVS src shift)); 4558 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 4559 ins_encode %{ 4560 bool vector256 = false; 4561 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4562 %} 4563 ins_pipe( pipe_slow ); 4564 %} 4565 4566 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4567 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4568 match(Set dst (LShiftVS src shift)); 4569 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 4570 ins_encode %{ 4571 bool vector256 = false; 4572 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4573 %} 4574 ins_pipe( pipe_slow ); 4575 %} 4576 4577 instruct vsll8S(vecX dst, vecS shift) %{ 4578 predicate(n->as_Vector()->length() == 8); 4579 match(Set dst (LShiftVS dst shift)); 4580 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 4581 ins_encode %{ 4582 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 4583 %} 4584 ins_pipe( pipe_slow ); 4585 %} 4586 4587 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 4588 predicate(n->as_Vector()->length() == 8); 4589 match(Set dst (LShiftVS dst shift)); 4590 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 4591 ins_encode %{ 4592 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 4593 %} 4594 ins_pipe( pipe_slow ); 4595 %} 4596 4597 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 4598 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4599 match(Set dst (LShiftVS src shift)); 4600 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4601 ins_encode %{ 4602 bool vector256 = false; 4603 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4604 %} 4605 ins_pipe( pipe_slow ); 4606 %} 4607 4608 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4609 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4610 match(Set dst (LShiftVS src shift)); 4611 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4612 ins_encode %{ 4613 bool vector256 = false; 4614 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4615 %} 4616 ins_pipe( pipe_slow ); 4617 %} 4618 4619 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 4620 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4621 match(Set dst (LShiftVS src shift)); 4622 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4623 ins_encode %{ 4624 bool vector256 = true; 4625 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4626 %} 4627 ins_pipe( pipe_slow ); 4628 %} 4629 4630 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4631 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4632 match(Set dst (LShiftVS src shift)); 4633 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4634 ins_encode %{ 4635 bool vector256 = true; 4636 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4637 %} 4638 ins_pipe( pipe_slow ); 4639 %} 4640 4641 // Integers vector left shift 4642 instruct vsll2I(vecD dst, vecS shift) %{ 4643 predicate(n->as_Vector()->length() == 2); 4644 match(Set dst (LShiftVI dst shift)); 4645 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4646 ins_encode %{ 4647 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4648 %} 4649 ins_pipe( pipe_slow ); 4650 %} 4651 4652 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 4653 predicate(n->as_Vector()->length() == 2); 4654 match(Set dst (LShiftVI dst shift)); 4655 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4656 ins_encode %{ 4657 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4658 %} 4659 ins_pipe( pipe_slow ); 4660 %} 4661 4662 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 4663 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4664 match(Set dst (LShiftVI src shift)); 4665 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4666 ins_encode %{ 4667 bool vector256 = false; 4668 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4669 %} 4670 ins_pipe( pipe_slow ); 4671 %} 4672 4673 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4674 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4675 match(Set dst (LShiftVI src shift)); 4676 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4677 ins_encode %{ 4678 bool vector256 = false; 4679 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4680 %} 4681 ins_pipe( pipe_slow ); 4682 %} 4683 4684 instruct vsll4I(vecX dst, vecS shift) %{ 4685 predicate(n->as_Vector()->length() == 4); 4686 match(Set dst (LShiftVI dst shift)); 4687 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4688 ins_encode %{ 4689 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4690 %} 4691 ins_pipe( pipe_slow ); 4692 %} 4693 4694 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 4695 predicate(n->as_Vector()->length() == 4); 4696 match(Set dst (LShiftVI dst shift)); 4697 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4698 ins_encode %{ 4699 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4700 %} 4701 ins_pipe( pipe_slow ); 4702 %} 4703 4704 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 4705 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4706 match(Set dst (LShiftVI src shift)); 4707 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4708 ins_encode %{ 4709 bool vector256 = false; 4710 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4711 %} 4712 ins_pipe( pipe_slow ); 4713 %} 4714 4715 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4716 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4717 match(Set dst (LShiftVI src shift)); 4718 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4719 ins_encode %{ 4720 bool vector256 = false; 4721 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4722 %} 4723 ins_pipe( pipe_slow ); 4724 %} 4725 4726 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 4727 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4728 match(Set dst (LShiftVI src shift)); 4729 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4730 ins_encode %{ 4731 bool vector256 = true; 4732 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4738 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4739 match(Set dst (LShiftVI src shift)); 4740 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4741 ins_encode %{ 4742 bool vector256 = true; 4743 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4744 %} 4745 ins_pipe( pipe_slow ); 4746 %} 4747 4748 // Longs vector left shift 4749 instruct vsll2L(vecX dst, vecS shift) %{ 4750 predicate(n->as_Vector()->length() == 2); 4751 match(Set dst (LShiftVL dst shift)); 4752 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4753 ins_encode %{ 4754 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4755 %} 4756 ins_pipe( pipe_slow ); 4757 %} 4758 4759 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4760 predicate(n->as_Vector()->length() == 2); 4761 match(Set dst (LShiftVL dst shift)); 4762 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4763 ins_encode %{ 4764 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4765 %} 4766 ins_pipe( pipe_slow ); 4767 %} 4768 4769 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 4770 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4771 match(Set dst (LShiftVL src shift)); 4772 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4773 ins_encode %{ 4774 bool vector256 = false; 4775 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4776 %} 4777 ins_pipe( pipe_slow ); 4778 %} 4779 4780 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4781 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4782 match(Set dst (LShiftVL src shift)); 4783 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4784 ins_encode %{ 4785 bool vector256 = false; 4786 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4787 %} 4788 ins_pipe( pipe_slow ); 4789 %} 4790 4791 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 4792 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4793 match(Set dst (LShiftVL src shift)); 4794 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4795 ins_encode %{ 4796 bool vector256 = true; 4797 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4798 %} 4799 ins_pipe( pipe_slow ); 4800 %} 4801 4802 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4803 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4804 match(Set dst (LShiftVL src shift)); 4805 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4806 ins_encode %{ 4807 bool vector256 = true; 4808 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4809 %} 4810 ins_pipe( pipe_slow ); 4811 %} 4812 4813 // ----------------------- LogicalRightShift ----------------------------------- 4814 4815 // Shorts vector logical right shift produces incorrect Java result 4816 // for negative data because java code convert short value into int with 4817 // sign extension before a shift. But char vectors are fine since chars are 4818 // unsigned values. 4819 4820 instruct vsrl2S(vecS dst, vecS shift) %{ 4821 predicate(n->as_Vector()->length() == 2); 4822 match(Set dst (URShiftVS dst shift)); 4823 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4824 ins_encode %{ 4825 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4826 %} 4827 ins_pipe( pipe_slow ); 4828 %} 4829 4830 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 4831 predicate(n->as_Vector()->length() == 2); 4832 match(Set dst (URShiftVS dst shift)); 4833 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4834 ins_encode %{ 4835 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4836 %} 4837 ins_pipe( pipe_slow ); 4838 %} 4839 4840 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 4841 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4842 match(Set dst (URShiftVS src shift)); 4843 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4844 ins_encode %{ 4845 bool vector256 = false; 4846 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4847 %} 4848 ins_pipe( pipe_slow ); 4849 %} 4850 4851 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4852 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4853 match(Set dst (URShiftVS src shift)); 4854 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4855 ins_encode %{ 4856 bool vector256 = false; 4857 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4858 %} 4859 ins_pipe( pipe_slow ); 4860 %} 4861 4862 instruct vsrl4S(vecD dst, vecS shift) %{ 4863 predicate(n->as_Vector()->length() == 4); 4864 match(Set dst (URShiftVS dst shift)); 4865 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4866 ins_encode %{ 4867 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4868 %} 4869 ins_pipe( pipe_slow ); 4870 %} 4871 4872 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 4873 predicate(n->as_Vector()->length() == 4); 4874 match(Set dst (URShiftVS dst shift)); 4875 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4876 ins_encode %{ 4877 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4878 %} 4879 ins_pipe( pipe_slow ); 4880 %} 4881 4882 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 4883 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4884 match(Set dst (URShiftVS src shift)); 4885 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4886 ins_encode %{ 4887 bool vector256 = false; 4888 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4889 %} 4890 ins_pipe( pipe_slow ); 4891 %} 4892 4893 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4894 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4895 match(Set dst (URShiftVS src shift)); 4896 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4897 ins_encode %{ 4898 bool vector256 = false; 4899 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4900 %} 4901 ins_pipe( pipe_slow ); 4902 %} 4903 4904 instruct vsrl8S(vecX dst, vecS shift) %{ 4905 predicate(n->as_Vector()->length() == 8); 4906 match(Set dst (URShiftVS dst shift)); 4907 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4908 ins_encode %{ 4909 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4910 %} 4911 ins_pipe( pipe_slow ); 4912 %} 4913 4914 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 4915 predicate(n->as_Vector()->length() == 8); 4916 match(Set dst (URShiftVS dst shift)); 4917 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4918 ins_encode %{ 4919 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4920 %} 4921 ins_pipe( pipe_slow ); 4922 %} 4923 4924 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 4925 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4926 match(Set dst (URShiftVS src shift)); 4927 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4928 ins_encode %{ 4929 bool vector256 = false; 4930 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4931 %} 4932 ins_pipe( pipe_slow ); 4933 %} 4934 4935 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4936 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4937 match(Set dst (URShiftVS src shift)); 4938 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4939 ins_encode %{ 4940 bool vector256 = false; 4941 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4942 %} 4943 ins_pipe( pipe_slow ); 4944 %} 4945 4946 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 4947 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4948 match(Set dst (URShiftVS src shift)); 4949 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4950 ins_encode %{ 4951 bool vector256 = true; 4952 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4953 %} 4954 ins_pipe( pipe_slow ); 4955 %} 4956 4957 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4958 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4959 match(Set dst (URShiftVS src shift)); 4960 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4961 ins_encode %{ 4962 bool vector256 = true; 4963 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4964 %} 4965 ins_pipe( pipe_slow ); 4966 %} 4967 4968 // Integers vector logical right shift 4969 instruct vsrl2I(vecD dst, vecS shift) %{ 4970 predicate(n->as_Vector()->length() == 2); 4971 match(Set dst (URShiftVI dst shift)); 4972 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4973 ins_encode %{ 4974 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4975 %} 4976 ins_pipe( pipe_slow ); 4977 %} 4978 4979 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4980 predicate(n->as_Vector()->length() == 2); 4981 match(Set dst (URShiftVI dst shift)); 4982 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4983 ins_encode %{ 4984 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4985 %} 4986 ins_pipe( pipe_slow ); 4987 %} 4988 4989 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 4990 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4991 match(Set dst (URShiftVI src shift)); 4992 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4993 ins_encode %{ 4994 bool vector256 = false; 4995 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4996 %} 4997 ins_pipe( pipe_slow ); 4998 %} 4999 5000 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 5001 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5002 match(Set dst (URShiftVI src shift)); 5003 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 5004 ins_encode %{ 5005 bool vector256 = false; 5006 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5007 %} 5008 ins_pipe( pipe_slow ); 5009 %} 5010 5011 instruct vsrl4I(vecX dst, vecS shift) %{ 5012 predicate(n->as_Vector()->length() == 4); 5013 match(Set dst (URShiftVI dst shift)); 5014 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 5015 ins_encode %{ 5016 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 5017 %} 5018 ins_pipe( pipe_slow ); 5019 %} 5020 5021 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 5022 predicate(n->as_Vector()->length() == 4); 5023 match(Set dst (URShiftVI dst shift)); 5024 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 5025 ins_encode %{ 5026 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 5027 %} 5028 ins_pipe( pipe_slow ); 5029 %} 5030 5031 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 5032 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5033 match(Set dst (URShiftVI src shift)); 5034 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 5035 ins_encode %{ 5036 bool vector256 = false; 5037 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5038 %} 5039 ins_pipe( pipe_slow ); 5040 %} 5041 5042 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 5043 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5044 match(Set dst (URShiftVI src shift)); 5045 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 5046 ins_encode %{ 5047 bool vector256 = false; 5048 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5049 %} 5050 ins_pipe( pipe_slow ); 5051 %} 5052 5053 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 5054 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5055 match(Set dst (URShiftVI src shift)); 5056 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 5057 ins_encode %{ 5058 bool vector256 = true; 5059 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5060 %} 5061 ins_pipe( pipe_slow ); 5062 %} 5063 5064 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 5065 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5066 match(Set dst (URShiftVI src shift)); 5067 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 5068 ins_encode %{ 5069 bool vector256 = true; 5070 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5071 %} 5072 ins_pipe( pipe_slow ); 5073 %} 5074 5075 // Longs vector logical right shift 5076 instruct vsrl2L(vecX dst, vecS shift) %{ 5077 predicate(n->as_Vector()->length() == 2); 5078 match(Set dst (URShiftVL dst shift)); 5079 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 5080 ins_encode %{ 5081 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 5082 %} 5083 ins_pipe( pipe_slow ); 5084 %} 5085 5086 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 5087 predicate(n->as_Vector()->length() == 2); 5088 match(Set dst (URShiftVL dst shift)); 5089 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 5090 ins_encode %{ 5091 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 5092 %} 5093 ins_pipe( pipe_slow ); 5094 %} 5095 5096 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 5097 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5098 match(Set dst (URShiftVL src shift)); 5099 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 5100 ins_encode %{ 5101 bool vector256 = false; 5102 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5103 %} 5104 ins_pipe( pipe_slow ); 5105 %} 5106 5107 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 5108 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5109 match(Set dst (URShiftVL src shift)); 5110 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 5111 ins_encode %{ 5112 bool vector256 = false; 5113 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5114 %} 5115 ins_pipe( pipe_slow ); 5116 %} 5117 5118 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 5119 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5120 match(Set dst (URShiftVL src shift)); 5121 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 5122 ins_encode %{ 5123 bool vector256 = true; 5124 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5125 %} 5126 ins_pipe( pipe_slow ); 5127 %} 5128 5129 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 5130 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5131 match(Set dst (URShiftVL src shift)); 5132 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 5133 ins_encode %{ 5134 bool vector256 = true; 5135 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5136 %} 5137 ins_pipe( pipe_slow ); 5138 %} 5139 5140 // ------------------- ArithmeticRightShift ----------------------------------- 5141 5142 // Shorts/Chars vector arithmetic right shift 5143 instruct vsra2S(vecS dst, vecS shift) %{ 5144 predicate(n->as_Vector()->length() == 2); 5145 match(Set dst (RShiftVS dst shift)); 5146 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 5147 ins_encode %{ 5148 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 5149 %} 5150 ins_pipe( pipe_slow ); 5151 %} 5152 5153 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 5154 predicate(n->as_Vector()->length() == 2); 5155 match(Set dst (RShiftVS dst shift)); 5156 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 5157 ins_encode %{ 5158 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 5159 %} 5160 ins_pipe( pipe_slow ); 5161 %} 5162 5163 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 5164 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5165 match(Set dst (RShiftVS src shift)); 5166 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 5167 ins_encode %{ 5168 bool vector256 = false; 5169 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5170 %} 5171 ins_pipe( pipe_slow ); 5172 %} 5173 5174 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 5175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5176 match(Set dst (RShiftVS src shift)); 5177 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 5178 ins_encode %{ 5179 bool vector256 = false; 5180 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5181 %} 5182 ins_pipe( pipe_slow ); 5183 %} 5184 5185 instruct vsra4S(vecD dst, vecS shift) %{ 5186 predicate(n->as_Vector()->length() == 4); 5187 match(Set dst (RShiftVS dst shift)); 5188 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 5189 ins_encode %{ 5190 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 5191 %} 5192 ins_pipe( pipe_slow ); 5193 %} 5194 5195 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 5196 predicate(n->as_Vector()->length() == 4); 5197 match(Set dst (RShiftVS dst shift)); 5198 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 5199 ins_encode %{ 5200 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 5201 %} 5202 ins_pipe( pipe_slow ); 5203 %} 5204 5205 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 5206 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5207 match(Set dst (RShiftVS src shift)); 5208 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 5209 ins_encode %{ 5210 bool vector256 = false; 5211 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5212 %} 5213 ins_pipe( pipe_slow ); 5214 %} 5215 5216 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 5217 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5218 match(Set dst (RShiftVS src shift)); 5219 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 5220 ins_encode %{ 5221 bool vector256 = false; 5222 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5223 %} 5224 ins_pipe( pipe_slow ); 5225 %} 5226 5227 instruct vsra8S(vecX dst, vecS shift) %{ 5228 predicate(n->as_Vector()->length() == 8); 5229 match(Set dst (RShiftVS dst shift)); 5230 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 5231 ins_encode %{ 5232 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 5233 %} 5234 ins_pipe( pipe_slow ); 5235 %} 5236 5237 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 5238 predicate(n->as_Vector()->length() == 8); 5239 match(Set dst (RShiftVS dst shift)); 5240 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 5241 ins_encode %{ 5242 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 5243 %} 5244 ins_pipe( pipe_slow ); 5245 %} 5246 5247 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 5248 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5249 match(Set dst (RShiftVS src shift)); 5250 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 5251 ins_encode %{ 5252 bool vector256 = false; 5253 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5254 %} 5255 ins_pipe( pipe_slow ); 5256 %} 5257 5258 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 5259 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5260 match(Set dst (RShiftVS src shift)); 5261 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 5262 ins_encode %{ 5263 bool vector256 = false; 5264 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5265 %} 5266 ins_pipe( pipe_slow ); 5267 %} 5268 5269 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 5270 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5271 match(Set dst (RShiftVS src shift)); 5272 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 5273 ins_encode %{ 5274 bool vector256 = true; 5275 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5276 %} 5277 ins_pipe( pipe_slow ); 5278 %} 5279 5280 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 5281 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5282 match(Set dst (RShiftVS src shift)); 5283 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 5284 ins_encode %{ 5285 bool vector256 = true; 5286 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5287 %} 5288 ins_pipe( pipe_slow ); 5289 %} 5290 5291 // Integers vector arithmetic right shift 5292 instruct vsra2I(vecD dst, vecS shift) %{ 5293 predicate(n->as_Vector()->length() == 2); 5294 match(Set dst (RShiftVI dst shift)); 5295 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 5296 ins_encode %{ 5297 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 5298 %} 5299 ins_pipe( pipe_slow ); 5300 %} 5301 5302 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 5303 predicate(n->as_Vector()->length() == 2); 5304 match(Set dst (RShiftVI dst shift)); 5305 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 5306 ins_encode %{ 5307 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 5308 %} 5309 ins_pipe( pipe_slow ); 5310 %} 5311 5312 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 5313 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5314 match(Set dst (RShiftVI src shift)); 5315 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 5316 ins_encode %{ 5317 bool vector256 = false; 5318 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5319 %} 5320 ins_pipe( pipe_slow ); 5321 %} 5322 5323 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 5324 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5325 match(Set dst (RShiftVI src shift)); 5326 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 5327 ins_encode %{ 5328 bool vector256 = false; 5329 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5330 %} 5331 ins_pipe( pipe_slow ); 5332 %} 5333 5334 instruct vsra4I(vecX dst, vecS shift) %{ 5335 predicate(n->as_Vector()->length() == 4); 5336 match(Set dst (RShiftVI dst shift)); 5337 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 5338 ins_encode %{ 5339 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 5340 %} 5341 ins_pipe( pipe_slow ); 5342 %} 5343 5344 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 5345 predicate(n->as_Vector()->length() == 4); 5346 match(Set dst (RShiftVI dst shift)); 5347 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 5348 ins_encode %{ 5349 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 5350 %} 5351 ins_pipe( pipe_slow ); 5352 %} 5353 5354 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 5355 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5356 match(Set dst (RShiftVI src shift)); 5357 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 5358 ins_encode %{ 5359 bool vector256 = false; 5360 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5361 %} 5362 ins_pipe( pipe_slow ); 5363 %} 5364 5365 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 5366 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5367 match(Set dst (RShiftVI src shift)); 5368 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 5369 ins_encode %{ 5370 bool vector256 = false; 5371 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5372 %} 5373 ins_pipe( pipe_slow ); 5374 %} 5375 5376 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 5377 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5378 match(Set dst (RShiftVI src shift)); 5379 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 5380 ins_encode %{ 5381 bool vector256 = true; 5382 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 5383 %} 5384 ins_pipe( pipe_slow ); 5385 %} 5386 5387 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 5388 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5389 match(Set dst (RShiftVI src shift)); 5390 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 5391 ins_encode %{ 5392 bool vector256 = true; 5393 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 5394 %} 5395 ins_pipe( pipe_slow ); 5396 %} 5397 5398 // There are no longs vector arithmetic right shift instructions. 5399 5400 5401 // --------------------------------- AND -------------------------------------- 5402 5403 instruct vand4B(vecS dst, vecS src) %{ 5404 predicate(n->as_Vector()->length_in_bytes() == 4); 5405 match(Set dst (AndV dst src)); 5406 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 5407 ins_encode %{ 5408 __ pand($dst$$XMMRegister, $src$$XMMRegister); 5409 %} 5410 ins_pipe( pipe_slow ); 5411 %} 5412 5413 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 5414 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5415 match(Set dst (AndV src1 src2)); 5416 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 5417 ins_encode %{ 5418 bool vector256 = false; 5419 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5420 %} 5421 ins_pipe( pipe_slow ); 5422 %} 5423 5424 instruct vand8B(vecD dst, vecD src) %{ 5425 predicate(n->as_Vector()->length_in_bytes() == 8); 5426 match(Set dst (AndV dst src)); 5427 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 5428 ins_encode %{ 5429 __ pand($dst$$XMMRegister, $src$$XMMRegister); 5430 %} 5431 ins_pipe( pipe_slow ); 5432 %} 5433 5434 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 5435 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5436 match(Set dst (AndV src1 src2)); 5437 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 5438 ins_encode %{ 5439 bool vector256 = false; 5440 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5441 %} 5442 ins_pipe( pipe_slow ); 5443 %} 5444 5445 instruct vand16B(vecX dst, vecX src) %{ 5446 predicate(n->as_Vector()->length_in_bytes() == 16); 5447 match(Set dst (AndV dst src)); 5448 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 5449 ins_encode %{ 5450 __ pand($dst$$XMMRegister, $src$$XMMRegister); 5451 %} 5452 ins_pipe( pipe_slow ); 5453 %} 5454 5455 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 5456 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5457 match(Set dst (AndV src1 src2)); 5458 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 5459 ins_encode %{ 5460 bool vector256 = false; 5461 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5462 %} 5463 ins_pipe( pipe_slow ); 5464 %} 5465 5466 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 5467 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5468 match(Set dst (AndV src (LoadVector mem))); 5469 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 5470 ins_encode %{ 5471 bool vector256 = false; 5472 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5473 %} 5474 ins_pipe( pipe_slow ); 5475 %} 5476 5477 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 5478 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5479 match(Set dst (AndV src1 src2)); 5480 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 5481 ins_encode %{ 5482 bool vector256 = true; 5483 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5484 %} 5485 ins_pipe( pipe_slow ); 5486 %} 5487 5488 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 5489 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5490 match(Set dst (AndV src (LoadVector mem))); 5491 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 5492 ins_encode %{ 5493 bool vector256 = true; 5494 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 // --------------------------------- OR --------------------------------------- 5500 5501 instruct vor4B(vecS dst, vecS src) %{ 5502 predicate(n->as_Vector()->length_in_bytes() == 4); 5503 match(Set dst (OrV dst src)); 5504 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 5505 ins_encode %{ 5506 __ por($dst$$XMMRegister, $src$$XMMRegister); 5507 %} 5508 ins_pipe( pipe_slow ); 5509 %} 5510 5511 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 5512 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5513 match(Set dst (OrV src1 src2)); 5514 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 5515 ins_encode %{ 5516 bool vector256 = false; 5517 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5518 %} 5519 ins_pipe( pipe_slow ); 5520 %} 5521 5522 instruct vor8B(vecD dst, vecD src) %{ 5523 predicate(n->as_Vector()->length_in_bytes() == 8); 5524 match(Set dst (OrV dst src)); 5525 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 5526 ins_encode %{ 5527 __ por($dst$$XMMRegister, $src$$XMMRegister); 5528 %} 5529 ins_pipe( pipe_slow ); 5530 %} 5531 5532 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 5533 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5534 match(Set dst (OrV src1 src2)); 5535 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 5536 ins_encode %{ 5537 bool vector256 = false; 5538 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5539 %} 5540 ins_pipe( pipe_slow ); 5541 %} 5542 5543 instruct vor16B(vecX dst, vecX src) %{ 5544 predicate(n->as_Vector()->length_in_bytes() == 16); 5545 match(Set dst (OrV dst src)); 5546 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 5547 ins_encode %{ 5548 __ por($dst$$XMMRegister, $src$$XMMRegister); 5549 %} 5550 ins_pipe( pipe_slow ); 5551 %} 5552 5553 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 5554 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5555 match(Set dst (OrV src1 src2)); 5556 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 5557 ins_encode %{ 5558 bool vector256 = false; 5559 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5560 %} 5561 ins_pipe( pipe_slow ); 5562 %} 5563 5564 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 5565 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5566 match(Set dst (OrV src (LoadVector mem))); 5567 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 5568 ins_encode %{ 5569 bool vector256 = false; 5570 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5571 %} 5572 ins_pipe( pipe_slow ); 5573 %} 5574 5575 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 5576 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5577 match(Set dst (OrV src1 src2)); 5578 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 5579 ins_encode %{ 5580 bool vector256 = true; 5581 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5582 %} 5583 ins_pipe( pipe_slow ); 5584 %} 5585 5586 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 5587 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5588 match(Set dst (OrV src (LoadVector mem))); 5589 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 5590 ins_encode %{ 5591 bool vector256 = true; 5592 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5593 %} 5594 ins_pipe( pipe_slow ); 5595 %} 5596 5597 // --------------------------------- XOR -------------------------------------- 5598 5599 instruct vxor4B(vecS dst, vecS src) %{ 5600 predicate(n->as_Vector()->length_in_bytes() == 4); 5601 match(Set dst (XorV dst src)); 5602 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 5603 ins_encode %{ 5604 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5605 %} 5606 ins_pipe( pipe_slow ); 5607 %} 5608 5609 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 5610 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5611 match(Set dst (XorV src1 src2)); 5612 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 5613 ins_encode %{ 5614 bool vector256 = false; 5615 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5616 %} 5617 ins_pipe( pipe_slow ); 5618 %} 5619 5620 instruct vxor8B(vecD dst, vecD src) %{ 5621 predicate(n->as_Vector()->length_in_bytes() == 8); 5622 match(Set dst (XorV dst src)); 5623 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 5624 ins_encode %{ 5625 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5626 %} 5627 ins_pipe( pipe_slow ); 5628 %} 5629 5630 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 5631 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5632 match(Set dst (XorV src1 src2)); 5633 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 5634 ins_encode %{ 5635 bool vector256 = false; 5636 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5637 %} 5638 ins_pipe( pipe_slow ); 5639 %} 5640 5641 instruct vxor16B(vecX dst, vecX src) %{ 5642 predicate(n->as_Vector()->length_in_bytes() == 16); 5643 match(Set dst (XorV dst src)); 5644 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 5645 ins_encode %{ 5646 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5647 %} 5648 ins_pipe( pipe_slow ); 5649 %} 5650 5651 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 5652 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5653 match(Set dst (XorV src1 src2)); 5654 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 5655 ins_encode %{ 5656 bool vector256 = false; 5657 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5658 %} 5659 ins_pipe( pipe_slow ); 5660 %} 5661 5662 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 5663 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5664 match(Set dst (XorV src (LoadVector mem))); 5665 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 5666 ins_encode %{ 5667 bool vector256 = false; 5668 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5669 %} 5670 ins_pipe( pipe_slow ); 5671 %} 5672 5673 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 5674 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5675 match(Set dst (XorV src1 src2)); 5676 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 5677 ins_encode %{ 5678 bool vector256 = true; 5679 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5680 %} 5681 ins_pipe( pipe_slow ); 5682 %} 5683 5684 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 5685 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5686 match(Set dst (XorV src (LoadVector mem))); 5687 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 5688 ins_encode %{ 5689 bool vector256 = true; 5690 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5691 %} 5692 ins_pipe( pipe_slow ); 5693 %} 5694