1 // 2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340 #endif 341 ); 342 343 // flags allocation class should be last. 344 alloc_class chunk2(RFLAGS); 345 346 // Singleton class for condition codes 347 reg_class int_flags(RFLAGS); 348 349 // Class for all float registers 350 reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358 #ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367 #endif 368 ); 369 370 // Class for all double registers 371 reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379 #ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388 #endif 389 ); 390 391 // Class for all 32bit vector registers 392 reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400 #ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409 #endif 410 ); 411 412 // Class for all 64bit vector registers 413 reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421 #ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430 #endif 431 ); 432 433 // Class for all 128bit vector registers 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442 #ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451 #endif 452 ); 453 454 // Class for all 256bit vector registers 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463 #ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472 #endif 473 ); 474 475 %} 476 477 source %{ 478 // Float masks come from different places depending on platform. 479 #ifdef _LP64 480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 482 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 484 #else 485 static address float_signmask() { return (address)float_signmask_pool; } 486 static address float_signflip() { return (address)float_signflip_pool; } 487 static address double_signmask() { return (address)double_signmask_pool; } 488 static address double_signflip() { return (address)double_signflip_pool; } 489 #endif 490 491 // Map Types to machine register types 492 const int Matcher::base2reg[Type::lastype] = { 493 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, 494 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ 495 Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ 496 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ 497 0, 0/*abio*/, 498 Op_RegP /* Return address */, 0, /* the memories */ 499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, 500 0 /*bottom*/ 501 }; 502 503 // Max vector size in bytes. 0 if not supported. 504 const int Matcher::vector_width_in_bytes(BasicType bt) { 505 assert(is_java_primitive(bt), "only primitive type vectors"); 506 if (UseSSE < 2) return 0; 507 // SSE2 supports 128bit vectors for all types. 508 // AVX2 supports 256bit vectors for all types. 509 int size = (UseAVX > 1) ? 32 : 16; 510 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 511 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 512 size = 32; 513 // Use flag to limit vector size. 514 size = MIN2(size,(int)MaxVectorSize); 515 // Minimum 2 values in vector (or 4 for bytes). 516 switch (bt) { 517 case T_DOUBLE: 518 case T_LONG: 519 if (size < 16) return 0; 520 case T_FLOAT: 521 case T_INT: 522 if (size < 8) return 0; 523 case T_BOOLEAN: 524 case T_BYTE: 525 case T_CHAR: 526 case T_SHORT: 527 if (size < 4) return 0; 528 break; 529 default: 530 ShouldNotReachHere(); 531 } 532 return size; 533 } 534 535 // Limits on vector size (number of elements) loaded into vector. 536 const int Matcher::max_vector_size(const BasicType bt) { 537 return vector_width_in_bytes(bt)/type2aelembytes(bt); 538 } 539 const int Matcher::min_vector_size(const BasicType bt) { 540 int max_size = max_vector_size(bt); 541 // Min size which can be loaded into vector is 4 bytes. 542 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 543 return MIN2(size,max_size); 544 } 545 546 // Vector ideal reg corresponding to specidied size in bytes 547 const int Matcher::vector_ideal_reg(int size) { 548 assert(MaxVectorSize >= size, ""); 549 switch(size) { 550 case 4: return Op_VecS; 551 case 8: return Op_VecD; 552 case 16: return Op_VecX; 553 case 32: return Op_VecY; 554 } 555 ShouldNotReachHere(); 556 return 0; 557 } 558 559 // x86 supports misaligned vectors store/load. 560 const bool Matcher::misaligned_vectors_ok() { 561 return !AlignVector; // can be changed by flag 562 } 563 564 // Helper methods for MachSpillCopyNode::implementation(). 565 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 566 int src_hi, int dst_hi, uint ireg, outputStream* st) { 567 // In 64-bit VM size calculation is very complex. Emitting instructions 568 // into scratch buffer is used to get size in 64-bit VM. 569 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 570 assert(ireg == Op_VecS || // 32bit vector 571 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 572 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 573 "no non-adjacent vector moves" ); 574 if (cbuf) { 575 MacroAssembler _masm(cbuf); 576 int offset = __ offset(); 577 switch (ireg) { 578 case Op_VecS: // copy whole register 579 case Op_VecD: 580 case Op_VecX: 581 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 582 break; 583 case Op_VecY: 584 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 585 break; 586 default: 587 ShouldNotReachHere(); 588 } 589 int size = __ offset() - offset; 590 #ifdef ASSERT 591 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 592 assert(!do_size || size == 4, "incorrect size calculattion"); 593 #endif 594 return size; 595 #ifndef PRODUCT 596 } else if (!do_size) { 597 switch (ireg) { 598 case Op_VecS: 599 case Op_VecD: 600 case Op_VecX: 601 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 602 break; 603 case Op_VecY: 604 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 605 break; 606 default: 607 ShouldNotReachHere(); 608 } 609 #endif 610 } 611 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 612 return 4; 613 } 614 615 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 616 int stack_offset, int reg, uint ireg, outputStream* st) { 617 // In 64-bit VM size calculation is very complex. Emitting instructions 618 // into scratch buffer is used to get size in 64-bit VM. 619 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 620 if (cbuf) { 621 MacroAssembler _masm(cbuf); 622 int offset = __ offset(); 623 if (is_load) { 624 switch (ireg) { 625 case Op_VecS: 626 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 627 break; 628 case Op_VecD: 629 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 630 break; 631 case Op_VecX: 632 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 633 break; 634 case Op_VecY: 635 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 636 break; 637 default: 638 ShouldNotReachHere(); 639 } 640 } else { // store 641 switch (ireg) { 642 case Op_VecS: 643 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 644 break; 645 case Op_VecD: 646 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 647 break; 648 case Op_VecX: 649 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 650 break; 651 case Op_VecY: 652 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 653 break; 654 default: 655 ShouldNotReachHere(); 656 } 657 } 658 int size = __ offset() - offset; 659 #ifdef ASSERT 660 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 661 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 662 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 663 #endif 664 return size; 665 #ifndef PRODUCT 666 } else if (!do_size) { 667 if (is_load) { 668 switch (ireg) { 669 case Op_VecS: 670 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 671 break; 672 case Op_VecD: 673 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 674 break; 675 case Op_VecX: 676 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 677 break; 678 case Op_VecY: 679 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 680 break; 681 default: 682 ShouldNotReachHere(); 683 } 684 } else { // store 685 switch (ireg) { 686 case Op_VecS: 687 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 688 break; 689 case Op_VecD: 690 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 691 break; 692 case Op_VecX: 693 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 694 break; 695 case Op_VecY: 696 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 697 break; 698 default: 699 ShouldNotReachHere(); 700 } 701 } 702 #endif 703 } 704 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 705 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 706 return 5+offset_size; 707 } 708 709 static inline jfloat replicate4_imm(int con, int width) { 710 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 711 assert(width == 1 || width == 2, "only byte or short types here"); 712 int bit_width = width * 8; 713 jint val = con; 714 val &= (1 << bit_width) - 1; // mask off sign bits 715 while(bit_width < 32) { 716 val |= (val << bit_width); 717 bit_width <<= 1; 718 } 719 jfloat fval = *((jfloat*) &val); // coerce to float type 720 return fval; 721 } 722 723 static inline jdouble replicate8_imm(int con, int width) { 724 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 725 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 726 int bit_width = width * 8; 727 jlong val = con; 728 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 729 while(bit_width < 64) { 730 val |= (val << bit_width); 731 bit_width <<= 1; 732 } 733 jdouble dval = *((jdouble*) &val); // coerce to double type 734 return dval; 735 } 736 737 #ifndef PRODUCT 738 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 739 st->print("nop \t# %d bytes pad for loops and calls", _count); 740 } 741 #endif 742 743 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 744 MacroAssembler _masm(&cbuf); 745 __ nop(_count); 746 } 747 748 uint MachNopNode::size(PhaseRegAlloc*) const { 749 return _count; 750 } 751 752 #ifndef PRODUCT 753 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 754 st->print("# breakpoint"); 755 } 756 #endif 757 758 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 759 MacroAssembler _masm(&cbuf); 760 __ int3(); 761 } 762 763 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 764 return MachNode::size(ra_); 765 } 766 767 %} 768 769 encode %{ 770 771 enc_class preserve_SP %{ 772 debug_only(int off0 = cbuf.insts_size()); 773 MacroAssembler _masm(&cbuf); 774 // RBP is preserved across all calls, even compiled calls. 775 // Use it to preserve RSP in places where the callee might change the SP. 776 __ movptr(rbp_mh_SP_save, rsp); 777 debug_only(int off1 = cbuf.insts_size()); 778 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 779 %} 780 781 enc_class restore_SP %{ 782 MacroAssembler _masm(&cbuf); 783 __ movptr(rsp, rbp_mh_SP_save); 784 %} 785 786 enc_class call_epilog %{ 787 if (VerifyStackAtCalls) { 788 // Check that stack depth is unchanged: find majik cookie on stack 789 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 790 MacroAssembler _masm(&cbuf); 791 Label L; 792 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 793 __ jccb(Assembler::equal, L); 794 // Die if stack mismatch 795 __ int3(); 796 __ bind(L); 797 } 798 %} 799 800 %} 801 802 803 //----------OPERANDS----------------------------------------------------------- 804 // Operand definitions must precede instruction definitions for correct parsing 805 // in the ADLC because operands constitute user defined types which are used in 806 // instruction definitions. 807 808 // Vectors 809 operand vecS() %{ 810 constraint(ALLOC_IN_RC(vectors_reg)); 811 match(VecS); 812 813 format %{ %} 814 interface(REG_INTER); 815 %} 816 817 operand vecD() %{ 818 constraint(ALLOC_IN_RC(vectord_reg)); 819 match(VecD); 820 821 format %{ %} 822 interface(REG_INTER); 823 %} 824 825 operand vecX() %{ 826 constraint(ALLOC_IN_RC(vectorx_reg)); 827 match(VecX); 828 829 format %{ %} 830 interface(REG_INTER); 831 %} 832 833 operand vecY() %{ 834 constraint(ALLOC_IN_RC(vectory_reg)); 835 match(VecY); 836 837 format %{ %} 838 interface(REG_INTER); 839 %} 840 841 842 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 843 844 // ============================================================================ 845 846 instruct ShouldNotReachHere() %{ 847 match(Halt); 848 format %{ "int3\t# ShouldNotReachHere" %} 849 ins_encode %{ 850 __ int3(); 851 %} 852 ins_pipe(pipe_slow); 853 %} 854 855 // ============================================================================ 856 857 instruct addF_reg(regF dst, regF src) %{ 858 predicate((UseSSE>=1) && (UseAVX == 0)); 859 match(Set dst (AddF dst src)); 860 861 format %{ "addss $dst, $src" %} 862 ins_cost(150); 863 ins_encode %{ 864 __ addss($dst$$XMMRegister, $src$$XMMRegister); 865 %} 866 ins_pipe(pipe_slow); 867 %} 868 869 instruct addF_mem(regF dst, memory src) %{ 870 predicate((UseSSE>=1) && (UseAVX == 0)); 871 match(Set dst (AddF dst (LoadF src))); 872 873 format %{ "addss $dst, $src" %} 874 ins_cost(150); 875 ins_encode %{ 876 __ addss($dst$$XMMRegister, $src$$Address); 877 %} 878 ins_pipe(pipe_slow); 879 %} 880 881 instruct addF_imm(regF dst, immF con) %{ 882 predicate((UseSSE>=1) && (UseAVX == 0)); 883 match(Set dst (AddF dst con)); 884 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 885 ins_cost(150); 886 ins_encode %{ 887 __ addss($dst$$XMMRegister, $constantaddress($con)); 888 %} 889 ins_pipe(pipe_slow); 890 %} 891 892 instruct vaddF_reg(regF dst, regF src1, regF src2) %{ 893 predicate(UseAVX > 0); 894 match(Set dst (AddF src1 src2)); 895 896 format %{ "vaddss $dst, $src1, $src2" %} 897 ins_cost(150); 898 ins_encode %{ 899 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 900 %} 901 ins_pipe(pipe_slow); 902 %} 903 904 instruct vaddF_mem(regF dst, regF src1, memory src2) %{ 905 predicate(UseAVX > 0); 906 match(Set dst (AddF src1 (LoadF src2))); 907 908 format %{ "vaddss $dst, $src1, $src2" %} 909 ins_cost(150); 910 ins_encode %{ 911 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 912 %} 913 ins_pipe(pipe_slow); 914 %} 915 916 instruct vaddF_imm(regF dst, regF src, immF con) %{ 917 predicate(UseAVX > 0); 918 match(Set dst (AddF src con)); 919 920 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 921 ins_cost(150); 922 ins_encode %{ 923 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 924 %} 925 ins_pipe(pipe_slow); 926 %} 927 928 instruct addD_reg(regD dst, regD src) %{ 929 predicate((UseSSE>=2) && (UseAVX == 0)); 930 match(Set dst (AddD dst src)); 931 932 format %{ "addsd $dst, $src" %} 933 ins_cost(150); 934 ins_encode %{ 935 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 936 %} 937 ins_pipe(pipe_slow); 938 %} 939 940 instruct addD_mem(regD dst, memory src) %{ 941 predicate((UseSSE>=2) && (UseAVX == 0)); 942 match(Set dst (AddD dst (LoadD src))); 943 944 format %{ "addsd $dst, $src" %} 945 ins_cost(150); 946 ins_encode %{ 947 __ addsd($dst$$XMMRegister, $src$$Address); 948 %} 949 ins_pipe(pipe_slow); 950 %} 951 952 instruct addD_imm(regD dst, immD con) %{ 953 predicate((UseSSE>=2) && (UseAVX == 0)); 954 match(Set dst (AddD dst con)); 955 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 956 ins_cost(150); 957 ins_encode %{ 958 __ addsd($dst$$XMMRegister, $constantaddress($con)); 959 %} 960 ins_pipe(pipe_slow); 961 %} 962 963 instruct vaddD_reg(regD dst, regD src1, regD src2) %{ 964 predicate(UseAVX > 0); 965 match(Set dst (AddD src1 src2)); 966 967 format %{ "vaddsd $dst, $src1, $src2" %} 968 ins_cost(150); 969 ins_encode %{ 970 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 971 %} 972 ins_pipe(pipe_slow); 973 %} 974 975 instruct vaddD_mem(regD dst, regD src1, memory src2) %{ 976 predicate(UseAVX > 0); 977 match(Set dst (AddD src1 (LoadD src2))); 978 979 format %{ "vaddsd $dst, $src1, $src2" %} 980 ins_cost(150); 981 ins_encode %{ 982 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 983 %} 984 ins_pipe(pipe_slow); 985 %} 986 987 instruct vaddD_imm(regD dst, regD src, immD con) %{ 988 predicate(UseAVX > 0); 989 match(Set dst (AddD src con)); 990 991 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 992 ins_cost(150); 993 ins_encode %{ 994 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 995 %} 996 ins_pipe(pipe_slow); 997 %} 998 999 instruct subF_reg(regF dst, regF src) %{ 1000 predicate((UseSSE>=1) && (UseAVX == 0)); 1001 match(Set dst (SubF dst src)); 1002 1003 format %{ "subss $dst, $src" %} 1004 ins_cost(150); 1005 ins_encode %{ 1006 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1007 %} 1008 ins_pipe(pipe_slow); 1009 %} 1010 1011 instruct subF_mem(regF dst, memory src) %{ 1012 predicate((UseSSE>=1) && (UseAVX == 0)); 1013 match(Set dst (SubF dst (LoadF src))); 1014 1015 format %{ "subss $dst, $src" %} 1016 ins_cost(150); 1017 ins_encode %{ 1018 __ subss($dst$$XMMRegister, $src$$Address); 1019 %} 1020 ins_pipe(pipe_slow); 1021 %} 1022 1023 instruct subF_imm(regF dst, immF con) %{ 1024 predicate((UseSSE>=1) && (UseAVX == 0)); 1025 match(Set dst (SubF dst con)); 1026 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1027 ins_cost(150); 1028 ins_encode %{ 1029 __ subss($dst$$XMMRegister, $constantaddress($con)); 1030 %} 1031 ins_pipe(pipe_slow); 1032 %} 1033 1034 instruct vsubF_reg(regF dst, regF src1, regF src2) %{ 1035 predicate(UseAVX > 0); 1036 match(Set dst (SubF src1 src2)); 1037 1038 format %{ "vsubss $dst, $src1, $src2" %} 1039 ins_cost(150); 1040 ins_encode %{ 1041 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1042 %} 1043 ins_pipe(pipe_slow); 1044 %} 1045 1046 instruct vsubF_mem(regF dst, regF src1, memory src2) %{ 1047 predicate(UseAVX > 0); 1048 match(Set dst (SubF src1 (LoadF src2))); 1049 1050 format %{ "vsubss $dst, $src1, $src2" %} 1051 ins_cost(150); 1052 ins_encode %{ 1053 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1054 %} 1055 ins_pipe(pipe_slow); 1056 %} 1057 1058 instruct vsubF_imm(regF dst, regF src, immF con) %{ 1059 predicate(UseAVX > 0); 1060 match(Set dst (SubF src con)); 1061 1062 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1063 ins_cost(150); 1064 ins_encode %{ 1065 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1066 %} 1067 ins_pipe(pipe_slow); 1068 %} 1069 1070 instruct subD_reg(regD dst, regD src) %{ 1071 predicate((UseSSE>=2) && (UseAVX == 0)); 1072 match(Set dst (SubD dst src)); 1073 1074 format %{ "subsd $dst, $src" %} 1075 ins_cost(150); 1076 ins_encode %{ 1077 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1078 %} 1079 ins_pipe(pipe_slow); 1080 %} 1081 1082 instruct subD_mem(regD dst, memory src) %{ 1083 predicate((UseSSE>=2) && (UseAVX == 0)); 1084 match(Set dst (SubD dst (LoadD src))); 1085 1086 format %{ "subsd $dst, $src" %} 1087 ins_cost(150); 1088 ins_encode %{ 1089 __ subsd($dst$$XMMRegister, $src$$Address); 1090 %} 1091 ins_pipe(pipe_slow); 1092 %} 1093 1094 instruct subD_imm(regD dst, immD con) %{ 1095 predicate((UseSSE>=2) && (UseAVX == 0)); 1096 match(Set dst (SubD dst con)); 1097 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1098 ins_cost(150); 1099 ins_encode %{ 1100 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1101 %} 1102 ins_pipe(pipe_slow); 1103 %} 1104 1105 instruct vsubD_reg(regD dst, regD src1, regD src2) %{ 1106 predicate(UseAVX > 0); 1107 match(Set dst (SubD src1 src2)); 1108 1109 format %{ "vsubsd $dst, $src1, $src2" %} 1110 ins_cost(150); 1111 ins_encode %{ 1112 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1113 %} 1114 ins_pipe(pipe_slow); 1115 %} 1116 1117 instruct vsubD_mem(regD dst, regD src1, memory src2) %{ 1118 predicate(UseAVX > 0); 1119 match(Set dst (SubD src1 (LoadD src2))); 1120 1121 format %{ "vsubsd $dst, $src1, $src2" %} 1122 ins_cost(150); 1123 ins_encode %{ 1124 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1125 %} 1126 ins_pipe(pipe_slow); 1127 %} 1128 1129 instruct vsubD_imm(regD dst, regD src, immD con) %{ 1130 predicate(UseAVX > 0); 1131 match(Set dst (SubD src con)); 1132 1133 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1134 ins_cost(150); 1135 ins_encode %{ 1136 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1137 %} 1138 ins_pipe(pipe_slow); 1139 %} 1140 1141 instruct mulF_reg(regF dst, regF src) %{ 1142 predicate((UseSSE>=1) && (UseAVX == 0)); 1143 match(Set dst (MulF dst src)); 1144 1145 format %{ "mulss $dst, $src" %} 1146 ins_cost(150); 1147 ins_encode %{ 1148 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1149 %} 1150 ins_pipe(pipe_slow); 1151 %} 1152 1153 instruct mulF_mem(regF dst, memory src) %{ 1154 predicate((UseSSE>=1) && (UseAVX == 0)); 1155 match(Set dst (MulF dst (LoadF src))); 1156 1157 format %{ "mulss $dst, $src" %} 1158 ins_cost(150); 1159 ins_encode %{ 1160 __ mulss($dst$$XMMRegister, $src$$Address); 1161 %} 1162 ins_pipe(pipe_slow); 1163 %} 1164 1165 instruct mulF_imm(regF dst, immF con) %{ 1166 predicate((UseSSE>=1) && (UseAVX == 0)); 1167 match(Set dst (MulF dst con)); 1168 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1169 ins_cost(150); 1170 ins_encode %{ 1171 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1172 %} 1173 ins_pipe(pipe_slow); 1174 %} 1175 1176 instruct vmulF_reg(regF dst, regF src1, regF src2) %{ 1177 predicate(UseAVX > 0); 1178 match(Set dst (MulF src1 src2)); 1179 1180 format %{ "vmulss $dst, $src1, $src2" %} 1181 ins_cost(150); 1182 ins_encode %{ 1183 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1184 %} 1185 ins_pipe(pipe_slow); 1186 %} 1187 1188 instruct vmulF_mem(regF dst, regF src1, memory src2) %{ 1189 predicate(UseAVX > 0); 1190 match(Set dst (MulF src1 (LoadF src2))); 1191 1192 format %{ "vmulss $dst, $src1, $src2" %} 1193 ins_cost(150); 1194 ins_encode %{ 1195 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1196 %} 1197 ins_pipe(pipe_slow); 1198 %} 1199 1200 instruct vmulF_imm(regF dst, regF src, immF con) %{ 1201 predicate(UseAVX > 0); 1202 match(Set dst (MulF src con)); 1203 1204 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1205 ins_cost(150); 1206 ins_encode %{ 1207 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1208 %} 1209 ins_pipe(pipe_slow); 1210 %} 1211 1212 instruct mulD_reg(regD dst, regD src) %{ 1213 predicate((UseSSE>=2) && (UseAVX == 0)); 1214 match(Set dst (MulD dst src)); 1215 1216 format %{ "mulsd $dst, $src" %} 1217 ins_cost(150); 1218 ins_encode %{ 1219 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1220 %} 1221 ins_pipe(pipe_slow); 1222 %} 1223 1224 instruct mulD_mem(regD dst, memory src) %{ 1225 predicate((UseSSE>=2) && (UseAVX == 0)); 1226 match(Set dst (MulD dst (LoadD src))); 1227 1228 format %{ "mulsd $dst, $src" %} 1229 ins_cost(150); 1230 ins_encode %{ 1231 __ mulsd($dst$$XMMRegister, $src$$Address); 1232 %} 1233 ins_pipe(pipe_slow); 1234 %} 1235 1236 instruct mulD_imm(regD dst, immD con) %{ 1237 predicate((UseSSE>=2) && (UseAVX == 0)); 1238 match(Set dst (MulD dst con)); 1239 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1240 ins_cost(150); 1241 ins_encode %{ 1242 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1243 %} 1244 ins_pipe(pipe_slow); 1245 %} 1246 1247 instruct vmulD_reg(regD dst, regD src1, regD src2) %{ 1248 predicate(UseAVX > 0); 1249 match(Set dst (MulD src1 src2)); 1250 1251 format %{ "vmulsd $dst, $src1, $src2" %} 1252 ins_cost(150); 1253 ins_encode %{ 1254 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1255 %} 1256 ins_pipe(pipe_slow); 1257 %} 1258 1259 instruct vmulD_mem(regD dst, regD src1, memory src2) %{ 1260 predicate(UseAVX > 0); 1261 match(Set dst (MulD src1 (LoadD src2))); 1262 1263 format %{ "vmulsd $dst, $src1, $src2" %} 1264 ins_cost(150); 1265 ins_encode %{ 1266 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1267 %} 1268 ins_pipe(pipe_slow); 1269 %} 1270 1271 instruct vmulD_imm(regD dst, regD src, immD con) %{ 1272 predicate(UseAVX > 0); 1273 match(Set dst (MulD src con)); 1274 1275 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1276 ins_cost(150); 1277 ins_encode %{ 1278 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1279 %} 1280 ins_pipe(pipe_slow); 1281 %} 1282 1283 instruct divF_reg(regF dst, regF src) %{ 1284 predicate((UseSSE>=1) && (UseAVX == 0)); 1285 match(Set dst (DivF dst src)); 1286 1287 format %{ "divss $dst, $src" %} 1288 ins_cost(150); 1289 ins_encode %{ 1290 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1291 %} 1292 ins_pipe(pipe_slow); 1293 %} 1294 1295 instruct divF_mem(regF dst, memory src) %{ 1296 predicate((UseSSE>=1) && (UseAVX == 0)); 1297 match(Set dst (DivF dst (LoadF src))); 1298 1299 format %{ "divss $dst, $src" %} 1300 ins_cost(150); 1301 ins_encode %{ 1302 __ divss($dst$$XMMRegister, $src$$Address); 1303 %} 1304 ins_pipe(pipe_slow); 1305 %} 1306 1307 instruct divF_imm(regF dst, immF con) %{ 1308 predicate((UseSSE>=1) && (UseAVX == 0)); 1309 match(Set dst (DivF dst con)); 1310 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1311 ins_cost(150); 1312 ins_encode %{ 1313 __ divss($dst$$XMMRegister, $constantaddress($con)); 1314 %} 1315 ins_pipe(pipe_slow); 1316 %} 1317 1318 instruct vdivF_reg(regF dst, regF src1, regF src2) %{ 1319 predicate(UseAVX > 0); 1320 match(Set dst (DivF src1 src2)); 1321 1322 format %{ "vdivss $dst, $src1, $src2" %} 1323 ins_cost(150); 1324 ins_encode %{ 1325 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1326 %} 1327 ins_pipe(pipe_slow); 1328 %} 1329 1330 instruct vdivF_mem(regF dst, regF src1, memory src2) %{ 1331 predicate(UseAVX > 0); 1332 match(Set dst (DivF src1 (LoadF src2))); 1333 1334 format %{ "vdivss $dst, $src1, $src2" %} 1335 ins_cost(150); 1336 ins_encode %{ 1337 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1338 %} 1339 ins_pipe(pipe_slow); 1340 %} 1341 1342 instruct vdivF_imm(regF dst, regF src, immF con) %{ 1343 predicate(UseAVX > 0); 1344 match(Set dst (DivF src con)); 1345 1346 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1347 ins_cost(150); 1348 ins_encode %{ 1349 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1350 %} 1351 ins_pipe(pipe_slow); 1352 %} 1353 1354 instruct divD_reg(regD dst, regD src) %{ 1355 predicate((UseSSE>=2) && (UseAVX == 0)); 1356 match(Set dst (DivD dst src)); 1357 1358 format %{ "divsd $dst, $src" %} 1359 ins_cost(150); 1360 ins_encode %{ 1361 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1362 %} 1363 ins_pipe(pipe_slow); 1364 %} 1365 1366 instruct divD_mem(regD dst, memory src) %{ 1367 predicate((UseSSE>=2) && (UseAVX == 0)); 1368 match(Set dst (DivD dst (LoadD src))); 1369 1370 format %{ "divsd $dst, $src" %} 1371 ins_cost(150); 1372 ins_encode %{ 1373 __ divsd($dst$$XMMRegister, $src$$Address); 1374 %} 1375 ins_pipe(pipe_slow); 1376 %} 1377 1378 instruct divD_imm(regD dst, immD con) %{ 1379 predicate((UseSSE>=2) && (UseAVX == 0)); 1380 match(Set dst (DivD dst con)); 1381 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1382 ins_cost(150); 1383 ins_encode %{ 1384 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1385 %} 1386 ins_pipe(pipe_slow); 1387 %} 1388 1389 instruct vdivD_reg(regD dst, regD src1, regD src2) %{ 1390 predicate(UseAVX > 0); 1391 match(Set dst (DivD src1 src2)); 1392 1393 format %{ "vdivsd $dst, $src1, $src2" %} 1394 ins_cost(150); 1395 ins_encode %{ 1396 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1397 %} 1398 ins_pipe(pipe_slow); 1399 %} 1400 1401 instruct vdivD_mem(regD dst, regD src1, memory src2) %{ 1402 predicate(UseAVX > 0); 1403 match(Set dst (DivD src1 (LoadD src2))); 1404 1405 format %{ "vdivsd $dst, $src1, $src2" %} 1406 ins_cost(150); 1407 ins_encode %{ 1408 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1409 %} 1410 ins_pipe(pipe_slow); 1411 %} 1412 1413 instruct vdivD_imm(regD dst, regD src, immD con) %{ 1414 predicate(UseAVX > 0); 1415 match(Set dst (DivD src con)); 1416 1417 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1418 ins_cost(150); 1419 ins_encode %{ 1420 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1421 %} 1422 ins_pipe(pipe_slow); 1423 %} 1424 1425 instruct absF_reg(regF dst) %{ 1426 predicate((UseSSE>=1) && (UseAVX == 0)); 1427 match(Set dst (AbsF dst)); 1428 ins_cost(150); 1429 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1430 ins_encode %{ 1431 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1432 %} 1433 ins_pipe(pipe_slow); 1434 %} 1435 1436 instruct vabsF_reg(regF dst, regF src) %{ 1437 predicate(UseAVX > 0); 1438 match(Set dst (AbsF src)); 1439 ins_cost(150); 1440 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1441 ins_encode %{ 1442 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1443 ExternalAddress(float_signmask())); 1444 %} 1445 ins_pipe(pipe_slow); 1446 %} 1447 1448 instruct absD_reg(regD dst) %{ 1449 predicate((UseSSE>=2) && (UseAVX == 0)); 1450 match(Set dst (AbsD dst)); 1451 ins_cost(150); 1452 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1453 "# abs double by sign masking" %} 1454 ins_encode %{ 1455 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1456 %} 1457 ins_pipe(pipe_slow); 1458 %} 1459 1460 instruct vabsD_reg(regD dst, regD src) %{ 1461 predicate(UseAVX > 0); 1462 match(Set dst (AbsD src)); 1463 ins_cost(150); 1464 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1465 "# abs double by sign masking" %} 1466 ins_encode %{ 1467 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1468 ExternalAddress(double_signmask())); 1469 %} 1470 ins_pipe(pipe_slow); 1471 %} 1472 1473 instruct negF_reg(regF dst) %{ 1474 predicate((UseSSE>=1) && (UseAVX == 0)); 1475 match(Set dst (NegF dst)); 1476 ins_cost(150); 1477 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1478 ins_encode %{ 1479 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1480 %} 1481 ins_pipe(pipe_slow); 1482 %} 1483 1484 instruct vnegF_reg(regF dst, regF src) %{ 1485 predicate(UseAVX > 0); 1486 match(Set dst (NegF src)); 1487 ins_cost(150); 1488 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1489 ins_encode %{ 1490 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1491 ExternalAddress(float_signflip())); 1492 %} 1493 ins_pipe(pipe_slow); 1494 %} 1495 1496 instruct negD_reg(regD dst) %{ 1497 predicate((UseSSE>=2) && (UseAVX == 0)); 1498 match(Set dst (NegD dst)); 1499 ins_cost(150); 1500 format %{ "xorpd $dst, [0x8000000000000000]\t" 1501 "# neg double by sign flipping" %} 1502 ins_encode %{ 1503 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1504 %} 1505 ins_pipe(pipe_slow); 1506 %} 1507 1508 instruct vnegD_reg(regD dst, regD src) %{ 1509 predicate(UseAVX > 0); 1510 match(Set dst (NegD src)); 1511 ins_cost(150); 1512 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1513 "# neg double by sign flipping" %} 1514 ins_encode %{ 1515 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1516 ExternalAddress(double_signflip())); 1517 %} 1518 ins_pipe(pipe_slow); 1519 %} 1520 1521 instruct sqrtF_reg(regF dst, regF src) %{ 1522 predicate(UseSSE>=1); 1523 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1524 1525 format %{ "sqrtss $dst, $src" %} 1526 ins_cost(150); 1527 ins_encode %{ 1528 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1529 %} 1530 ins_pipe(pipe_slow); 1531 %} 1532 1533 instruct sqrtF_mem(regF dst, memory src) %{ 1534 predicate(UseSSE>=1); 1535 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1536 1537 format %{ "sqrtss $dst, $src" %} 1538 ins_cost(150); 1539 ins_encode %{ 1540 __ sqrtss($dst$$XMMRegister, $src$$Address); 1541 %} 1542 ins_pipe(pipe_slow); 1543 %} 1544 1545 instruct sqrtF_imm(regF dst, immF con) %{ 1546 predicate(UseSSE>=1); 1547 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1548 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1549 ins_cost(150); 1550 ins_encode %{ 1551 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1552 %} 1553 ins_pipe(pipe_slow); 1554 %} 1555 1556 instruct sqrtD_reg(regD dst, regD src) %{ 1557 predicate(UseSSE>=2); 1558 match(Set dst (SqrtD src)); 1559 1560 format %{ "sqrtsd $dst, $src" %} 1561 ins_cost(150); 1562 ins_encode %{ 1563 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1564 %} 1565 ins_pipe(pipe_slow); 1566 %} 1567 1568 instruct sqrtD_mem(regD dst, memory src) %{ 1569 predicate(UseSSE>=2); 1570 match(Set dst (SqrtD (LoadD src))); 1571 1572 format %{ "sqrtsd $dst, $src" %} 1573 ins_cost(150); 1574 ins_encode %{ 1575 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1576 %} 1577 ins_pipe(pipe_slow); 1578 %} 1579 1580 instruct sqrtD_imm(regD dst, immD con) %{ 1581 predicate(UseSSE>=2); 1582 match(Set dst (SqrtD con)); 1583 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1584 ins_cost(150); 1585 ins_encode %{ 1586 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1587 %} 1588 ins_pipe(pipe_slow); 1589 %} 1590 1591 1592 // ====================VECTOR INSTRUCTIONS===================================== 1593 1594 // Load vectors (4 bytes long) 1595 instruct loadV4(vecS dst, memory mem) %{ 1596 predicate(n->as_LoadVector()->memory_size() == 4); 1597 match(Set dst (LoadVector mem)); 1598 ins_cost(125); 1599 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1600 ins_encode %{ 1601 __ movdl($dst$$XMMRegister, $mem$$Address); 1602 %} 1603 ins_pipe( pipe_slow ); 1604 %} 1605 1606 // Load vectors (8 bytes long) 1607 instruct loadV8(vecD dst, memory mem) %{ 1608 predicate(n->as_LoadVector()->memory_size() == 8); 1609 match(Set dst (LoadVector mem)); 1610 ins_cost(125); 1611 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1612 ins_encode %{ 1613 __ movq($dst$$XMMRegister, $mem$$Address); 1614 %} 1615 ins_pipe( pipe_slow ); 1616 %} 1617 1618 // Load vectors (16 bytes long) 1619 instruct loadV16(vecX dst, memory mem) %{ 1620 predicate(n->as_LoadVector()->memory_size() == 16); 1621 match(Set dst (LoadVector mem)); 1622 ins_cost(125); 1623 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1624 ins_encode %{ 1625 __ movdqu($dst$$XMMRegister, $mem$$Address); 1626 %} 1627 ins_pipe( pipe_slow ); 1628 %} 1629 1630 // Load vectors (32 bytes long) 1631 instruct loadV32(vecY dst, memory mem) %{ 1632 predicate(n->as_LoadVector()->memory_size() == 32); 1633 match(Set dst (LoadVector mem)); 1634 ins_cost(125); 1635 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1636 ins_encode %{ 1637 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1638 %} 1639 ins_pipe( pipe_slow ); 1640 %} 1641 1642 // Store vectors 1643 instruct storeV4(memory mem, vecS src) %{ 1644 predicate(n->as_StoreVector()->memory_size() == 4); 1645 match(Set mem (StoreVector mem src)); 1646 ins_cost(145); 1647 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1648 ins_encode %{ 1649 __ movdl($mem$$Address, $src$$XMMRegister); 1650 %} 1651 ins_pipe( pipe_slow ); 1652 %} 1653 1654 instruct storeV8(memory mem, vecD src) %{ 1655 predicate(n->as_StoreVector()->memory_size() == 8); 1656 match(Set mem (StoreVector mem src)); 1657 ins_cost(145); 1658 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1659 ins_encode %{ 1660 __ movq($mem$$Address, $src$$XMMRegister); 1661 %} 1662 ins_pipe( pipe_slow ); 1663 %} 1664 1665 instruct storeV16(memory mem, vecX src) %{ 1666 predicate(n->as_StoreVector()->memory_size() == 16); 1667 match(Set mem (StoreVector mem src)); 1668 ins_cost(145); 1669 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1670 ins_encode %{ 1671 __ movdqu($mem$$Address, $src$$XMMRegister); 1672 %} 1673 ins_pipe( pipe_slow ); 1674 %} 1675 1676 instruct storeV32(memory mem, vecY src) %{ 1677 predicate(n->as_StoreVector()->memory_size() == 32); 1678 match(Set mem (StoreVector mem src)); 1679 ins_cost(145); 1680 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1681 ins_encode %{ 1682 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1683 %} 1684 ins_pipe( pipe_slow ); 1685 %} 1686 1687 // Replicate byte scalar to be vector 1688 instruct Repl4B(vecS dst, rRegI src) %{ 1689 predicate(n->as_Vector()->length() == 4); 1690 match(Set dst (ReplicateB src)); 1691 format %{ "movd $dst,$src\n\t" 1692 "punpcklbw $dst,$dst\n\t" 1693 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1694 ins_encode %{ 1695 __ movdl($dst$$XMMRegister, $src$$Register); 1696 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1697 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1698 %} 1699 ins_pipe( pipe_slow ); 1700 %} 1701 1702 instruct Repl8B(vecD dst, rRegI src) %{ 1703 predicate(n->as_Vector()->length() == 8); 1704 match(Set dst (ReplicateB src)); 1705 format %{ "movd $dst,$src\n\t" 1706 "punpcklbw $dst,$dst\n\t" 1707 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1708 ins_encode %{ 1709 __ movdl($dst$$XMMRegister, $src$$Register); 1710 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1711 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1712 %} 1713 ins_pipe( pipe_slow ); 1714 %} 1715 1716 instruct Repl16B(vecX dst, rRegI src) %{ 1717 predicate(n->as_Vector()->length() == 16); 1718 match(Set dst (ReplicateB src)); 1719 format %{ "movd $dst,$src\n\t" 1720 "punpcklbw $dst,$dst\n\t" 1721 "pshuflw $dst,$dst,0x00\n\t" 1722 "movlhps $dst,$dst\t! replicate16B" %} 1723 ins_encode %{ 1724 __ movdl($dst$$XMMRegister, $src$$Register); 1725 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1726 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1727 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1728 %} 1729 ins_pipe( pipe_slow ); 1730 %} 1731 1732 instruct Repl32B(vecY dst, rRegI src) %{ 1733 predicate(n->as_Vector()->length() == 32); 1734 match(Set dst (ReplicateB src)); 1735 format %{ "movd $dst,$src\n\t" 1736 "punpcklbw $dst,$dst\n\t" 1737 "pshuflw $dst,$dst,0x00\n\t" 1738 "movlhps $dst,$dst\n\t" 1739 "vinsertf128h $dst,$dst,$dst\t! replicate32B" %} 1740 ins_encode %{ 1741 __ movdl($dst$$XMMRegister, $src$$Register); 1742 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1743 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1744 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1745 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1746 %} 1747 ins_pipe( pipe_slow ); 1748 %} 1749 1750 // Replicate byte scalar immediate to be vector by loading from const table. 1751 instruct Repl4B_imm(vecS dst, immI con) %{ 1752 predicate(n->as_Vector()->length() == 4); 1753 match(Set dst (ReplicateB con)); 1754 format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %} 1755 ins_encode %{ 1756 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1757 %} 1758 ins_pipe( pipe_slow ); 1759 %} 1760 1761 instruct Repl8B_imm(vecD dst, immI con) %{ 1762 predicate(n->as_Vector()->length() == 8); 1763 match(Set dst (ReplicateB con)); 1764 format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %} 1765 ins_encode %{ 1766 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1767 %} 1768 ins_pipe( pipe_slow ); 1769 %} 1770 1771 instruct Repl16B_imm(vecX dst, immI con) %{ 1772 predicate(n->as_Vector()->length() == 16); 1773 match(Set dst (ReplicateB con)); 1774 format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t" 1775 "movlhps $dst,$dst" %} 1776 ins_encode %{ 1777 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1778 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1779 %} 1780 ins_pipe( pipe_slow ); 1781 %} 1782 1783 instruct Repl32B_imm(vecY dst, immI con) %{ 1784 predicate(n->as_Vector()->length() == 32); 1785 match(Set dst (ReplicateB con)); 1786 format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t" 1787 "movlhps $dst,$dst\n\t" 1788 "vinsertf128h $dst,$dst,$dst" %} 1789 ins_encode %{ 1790 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1791 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1792 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1793 %} 1794 ins_pipe( pipe_slow ); 1795 %} 1796 1797 // Replicate byte scalar zero to be vector 1798 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1799 predicate(n->as_Vector()->length() == 4); 1800 match(Set dst (ReplicateB zero)); 1801 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1802 ins_encode %{ 1803 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1804 %} 1805 ins_pipe( fpu_reg_reg ); 1806 %} 1807 1808 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1809 predicate(n->as_Vector()->length() == 8); 1810 match(Set dst (ReplicateB zero)); 1811 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1812 ins_encode %{ 1813 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1814 %} 1815 ins_pipe( fpu_reg_reg ); 1816 %} 1817 1818 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1819 predicate(n->as_Vector()->length() == 16); 1820 match(Set dst (ReplicateB zero)); 1821 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1822 ins_encode %{ 1823 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1824 %} 1825 ins_pipe( fpu_reg_reg ); 1826 %} 1827 1828 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1829 predicate(n->as_Vector()->length() == 32); 1830 match(Set dst (ReplicateB zero)); 1831 format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %} 1832 ins_encode %{ 1833 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1834 bool vector256 = true; 1835 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1836 %} 1837 ins_pipe( fpu_reg_reg ); 1838 %} 1839 1840 // Replicate short (2 byte) scalar to be vector 1841 instruct Repl2S(vecS dst, rRegI src) %{ 1842 predicate(n->as_Vector()->length() == 2); 1843 match(Set dst (ReplicateS src)); 1844 format %{ "movd $dst,$src\n\t" 1845 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1846 ins_encode %{ 1847 __ movdl($dst$$XMMRegister, $src$$Register); 1848 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1849 %} 1850 ins_pipe( fpu_reg_reg ); 1851 %} 1852 1853 instruct Repl4S(vecD dst, rRegI src) %{ 1854 predicate(n->as_Vector()->length() == 4); 1855 match(Set dst (ReplicateS src)); 1856 format %{ "movd $dst,$src\n\t" 1857 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1858 ins_encode %{ 1859 __ movdl($dst$$XMMRegister, $src$$Register); 1860 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1861 %} 1862 ins_pipe( fpu_reg_reg ); 1863 %} 1864 1865 instruct Repl8S(vecX dst, rRegI src) %{ 1866 predicate(n->as_Vector()->length() == 8); 1867 match(Set dst (ReplicateS src)); 1868 format %{ "movd $dst,$src\n\t" 1869 "pshuflw $dst,$dst,0x00\n\t" 1870 "movlhps $dst,$dst\t! replicate8S" %} 1871 ins_encode %{ 1872 __ movdl($dst$$XMMRegister, $src$$Register); 1873 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1874 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1875 %} 1876 ins_pipe( pipe_slow ); 1877 %} 1878 1879 instruct Repl16S(vecY dst, rRegI src) %{ 1880 predicate(n->as_Vector()->length() == 16); 1881 match(Set dst (ReplicateS src)); 1882 format %{ "movd $dst,$src\n\t" 1883 "pshuflw $dst,$dst,0x00\n\t" 1884 "movlhps $dst,$dst\n\t" 1885 "vinsertf128h $dst,$dst,$dst\t! replicate16S" %} 1886 ins_encode %{ 1887 __ movdl($dst$$XMMRegister, $src$$Register); 1888 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1889 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1890 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1891 %} 1892 ins_pipe( pipe_slow ); 1893 %} 1894 1895 // Replicate short (2 byte) scalar immediate to be vector by loading from const table. 1896 instruct Repl2S_imm(vecS dst, immI con) %{ 1897 predicate(n->as_Vector()->length() == 2); 1898 match(Set dst (ReplicateS con)); 1899 format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %} 1900 ins_encode %{ 1901 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 1902 %} 1903 ins_pipe( fpu_reg_reg ); 1904 %} 1905 1906 instruct Repl4S_imm(vecD dst, immI con) %{ 1907 predicate(n->as_Vector()->length() == 4); 1908 match(Set dst (ReplicateS con)); 1909 format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %} 1910 ins_encode %{ 1911 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1912 %} 1913 ins_pipe( fpu_reg_reg ); 1914 %} 1915 1916 instruct Repl8S_imm(vecX dst, immI con) %{ 1917 predicate(n->as_Vector()->length() == 8); 1918 match(Set dst (ReplicateS con)); 1919 format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t" 1920 "movlhps $dst,$dst" %} 1921 ins_encode %{ 1922 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1923 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1924 %} 1925 ins_pipe( pipe_slow ); 1926 %} 1927 1928 instruct Repl16S_imm(vecY dst, immI con) %{ 1929 predicate(n->as_Vector()->length() == 16); 1930 match(Set dst (ReplicateS con)); 1931 format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t" 1932 "movlhps $dst,$dst\n\t" 1933 "vinsertf128h $dst,$dst,$dst" %} 1934 ins_encode %{ 1935 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1936 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1937 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1938 %} 1939 ins_pipe( pipe_slow ); 1940 %} 1941 1942 // Replicate short (2 byte) scalar zero to be vector 1943 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 1944 predicate(n->as_Vector()->length() == 2); 1945 match(Set dst (ReplicateS zero)); 1946 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 1947 ins_encode %{ 1948 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1949 %} 1950 ins_pipe( fpu_reg_reg ); 1951 %} 1952 1953 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 1954 predicate(n->as_Vector()->length() == 4); 1955 match(Set dst (ReplicateS zero)); 1956 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 1957 ins_encode %{ 1958 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1959 %} 1960 ins_pipe( fpu_reg_reg ); 1961 %} 1962 1963 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 1964 predicate(n->as_Vector()->length() == 8); 1965 match(Set dst (ReplicateS zero)); 1966 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 1967 ins_encode %{ 1968 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1969 %} 1970 ins_pipe( fpu_reg_reg ); 1971 %} 1972 1973 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 1974 predicate(n->as_Vector()->length() == 16); 1975 match(Set dst (ReplicateS zero)); 1976 format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %} 1977 ins_encode %{ 1978 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1979 bool vector256 = true; 1980 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1981 %} 1982 ins_pipe( fpu_reg_reg ); 1983 %} 1984 1985 // Replicate char (2 byte) scalar to be vector 1986 instruct Repl2C(vecS dst, rRegI src) %{ 1987 predicate(n->as_Vector()->length() == 2); 1988 match(Set dst (ReplicateC src)); 1989 format %{ "movd $dst,$src\n\t" 1990 "pshuflw $dst,$dst,0x00\t! replicate2C" %} 1991 ins_encode %{ 1992 __ movdl($dst$$XMMRegister, $src$$Register); 1993 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1994 %} 1995 ins_pipe( fpu_reg_reg ); 1996 %} 1997 1998 instruct Repl4C(vecD dst, rRegI src) %{ 1999 predicate(n->as_Vector()->length() == 4); 2000 match(Set dst (ReplicateC src)); 2001 format %{ "movd $dst,$src\n\t" 2002 "pshuflw $dst,$dst,0x00\t! replicate4C" %} 2003 ins_encode %{ 2004 __ movdl($dst$$XMMRegister, $src$$Register); 2005 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2006 %} 2007 ins_pipe( fpu_reg_reg ); 2008 %} 2009 2010 instruct Repl8C(vecX dst, rRegI src) %{ 2011 predicate(n->as_Vector()->length() == 8); 2012 match(Set dst (ReplicateC src)); 2013 format %{ "movd $dst,$src\n\t" 2014 "pshuflw $dst,$dst,0x00\n\t" 2015 "movlhps $dst,$dst\t! replicate8C" %} 2016 ins_encode %{ 2017 __ movdl($dst$$XMMRegister, $src$$Register); 2018 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2019 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2020 %} 2021 ins_pipe( pipe_slow ); 2022 %} 2023 2024 instruct Repl16C(vecY dst, rRegI src) %{ 2025 predicate(n->as_Vector()->length() == 16); 2026 match(Set dst (ReplicateC src)); 2027 format %{ "movd $dst,$src\n\t" 2028 "pshuflw $dst,$dst,0x00\n\t" 2029 "movlhps $dst,$dst\n\t" 2030 "vinsertf128h $dst,$dst,$dst\t! replicate16C" %} 2031 ins_encode %{ 2032 __ movdl($dst$$XMMRegister, $src$$Register); 2033 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2034 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2035 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2036 %} 2037 ins_pipe( pipe_slow ); 2038 %} 2039 2040 // Replicate char (2 byte) scalar immediate to be vector by loading from const table. 2041 instruct Repl2C_imm(vecS dst, immI con) %{ 2042 predicate(n->as_Vector()->length() == 2); 2043 match(Set dst (ReplicateC con)); 2044 format %{ "movss $dst,[$constantaddress]\t! replicate2C($con)" %} 2045 ins_encode %{ 2046 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 2047 %} 2048 ins_pipe( fpu_reg_reg ); 2049 %} 2050 2051 instruct Repl4C_imm(vecD dst, immI con) %{ 2052 predicate(n->as_Vector()->length() == 4); 2053 match(Set dst (ReplicateC con)); 2054 format %{ "movsd $dst,[$constantaddress]\t! replicate4C($con)" %} 2055 ins_encode %{ 2056 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2057 %} 2058 ins_pipe( fpu_reg_reg ); 2059 %} 2060 2061 instruct Repl8C_imm(vecX dst, immI con) %{ 2062 predicate(n->as_Vector()->length() == 8); 2063 match(Set dst (ReplicateC con)); 2064 format %{ "movsd $dst,[$constantaddress]\t! replicate8C($con)\n\t" 2065 "movlhps $dst,$dst" %} 2066 ins_encode %{ 2067 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2068 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2069 %} 2070 ins_pipe( pipe_slow ); 2071 %} 2072 2073 instruct Repl16C_imm(vecY dst, immI con) %{ 2074 predicate(n->as_Vector()->length() == 16); 2075 match(Set dst (ReplicateC con)); 2076 format %{ "movsd $dst,[$constantaddress]\t! replicate16C($con)\n\t" 2077 "movlhps $dst,$dst\n\t" 2078 "vinsertf128h $dst,$dst,$dst" %} 2079 ins_encode %{ 2080 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2081 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2082 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2083 %} 2084 ins_pipe( pipe_slow ); 2085 %} 2086 2087 // Replicate char (2 byte) scalar zero to be vector 2088 instruct Repl2C_zero(vecS dst, immI0 zero) %{ 2089 predicate(n->as_Vector()->length() == 2); 2090 match(Set dst (ReplicateC zero)); 2091 format %{ "pxor $dst,$dst\t! replicate2C zero" %} 2092 ins_encode %{ 2093 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2094 %} 2095 ins_pipe( fpu_reg_reg ); 2096 %} 2097 2098 instruct Repl4C_zero(vecD dst, immI0 zero) %{ 2099 predicate(n->as_Vector()->length() == 4); 2100 match(Set dst (ReplicateC zero)); 2101 format %{ "pxor $dst,$dst\t! replicate4C zero" %} 2102 ins_encode %{ 2103 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2104 %} 2105 ins_pipe( fpu_reg_reg ); 2106 %} 2107 2108 instruct Repl8C_zero(vecX dst, immI0 zero) %{ 2109 predicate(n->as_Vector()->length() == 8); 2110 match(Set dst (ReplicateC zero)); 2111 format %{ "pxor $dst,$dst\t! replicate8C zero" %} 2112 ins_encode %{ 2113 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2114 %} 2115 ins_pipe( fpu_reg_reg ); 2116 %} 2117 2118 instruct Repl16C_zero(vecY dst, immI0 zero) %{ 2119 predicate(n->as_Vector()->length() == 16); 2120 match(Set dst (ReplicateC zero)); 2121 format %{ "vxorpd $dst,$dst,$dst\t! replicate16C zero" %} 2122 ins_encode %{ 2123 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2124 bool vector256 = true; 2125 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2126 %} 2127 ins_pipe( fpu_reg_reg ); 2128 %} 2129 2130 // Replicate integer (4 byte) scalar to be vector 2131 instruct Repl2I(vecD dst, rRegI src) %{ 2132 predicate(n->as_Vector()->length() == 2); 2133 match(Set dst (ReplicateI src)); 2134 format %{ "movd $dst,$src\n\t" 2135 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2136 ins_encode %{ 2137 __ movdl($dst$$XMMRegister, $src$$Register); 2138 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2139 %} 2140 ins_pipe( fpu_reg_reg ); 2141 %} 2142 2143 instruct Repl4I(vecX dst, rRegI src) %{ 2144 predicate(n->as_Vector()->length() == 4); 2145 match(Set dst (ReplicateI src)); 2146 format %{ "movd $dst,$src\n\t" 2147 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2148 ins_encode %{ 2149 __ movdl($dst$$XMMRegister, $src$$Register); 2150 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2151 %} 2152 ins_pipe( pipe_slow ); 2153 %} 2154 2155 instruct Repl8I(vecY dst, rRegI src) %{ 2156 predicate(n->as_Vector()->length() == 8); 2157 match(Set dst (ReplicateI src)); 2158 format %{ "movd $dst,$src\n\t" 2159 "pshufd $dst,$dst,0x00\n\t" 2160 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} 2161 ins_encode %{ 2162 __ movdl($dst$$XMMRegister, $src$$Register); 2163 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2164 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2165 %} 2166 ins_pipe( pipe_slow ); 2167 %} 2168 2169 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2170 instruct Repl2I_imm(vecD dst, immI con) %{ 2171 predicate(n->as_Vector()->length() == 2); 2172 match(Set dst (ReplicateI con)); 2173 format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %} 2174 ins_encode %{ 2175 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2176 %} 2177 ins_pipe( fpu_reg_reg ); 2178 %} 2179 2180 instruct Repl4I_imm(vecX dst, immI con) %{ 2181 predicate(n->as_Vector()->length() == 4); 2182 match(Set dst (ReplicateI con)); 2183 format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2184 "movlhps $dst,$dst" %} 2185 ins_encode %{ 2186 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2187 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2188 %} 2189 ins_pipe( pipe_slow ); 2190 %} 2191 2192 instruct Repl8I_imm(vecY dst, immI con) %{ 2193 predicate(n->as_Vector()->length() == 8); 2194 match(Set dst (ReplicateI con)); 2195 format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2196 "movlhps $dst,$dst\n\t" 2197 "vinsertf128h $dst,$dst,$dst" %} 2198 ins_encode %{ 2199 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2200 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2201 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2202 %} 2203 ins_pipe( pipe_slow ); 2204 %} 2205 2206 // Integer could be loaded into xmm register directly from memory. 2207 instruct Repl2I_mem(vecD dst, memory mem) %{ 2208 predicate(n->as_Vector()->length() == 2); 2209 match(Set dst (ReplicateI mem)); 2210 format %{ "movd $dst,$mem\n\t" 2211 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2212 ins_encode %{ 2213 __ movdl($dst$$XMMRegister, $mem$$Address); 2214 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2215 %} 2216 ins_pipe( fpu_reg_reg ); 2217 %} 2218 2219 instruct Repl4I_mem(vecX dst, memory mem) %{ 2220 predicate(n->as_Vector()->length() == 4); 2221 match(Set dst (ReplicateI mem)); 2222 format %{ "movd $dst,$mem\n\t" 2223 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2224 ins_encode %{ 2225 __ movdl($dst$$XMMRegister, $mem$$Address); 2226 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2227 %} 2228 ins_pipe( pipe_slow ); 2229 %} 2230 2231 instruct Repl8I_mem(vecY dst, memory mem) %{ 2232 predicate(n->as_Vector()->length() == 8); 2233 match(Set dst (ReplicateI mem)); 2234 format %{ "movd $dst,$mem\n\t" 2235 "pshufd $dst,$dst,0x00\n\t" 2236 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} 2237 ins_encode %{ 2238 __ movdl($dst$$XMMRegister, $mem$$Address); 2239 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2240 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2241 %} 2242 ins_pipe( pipe_slow ); 2243 %} 2244 2245 // Replicate integer (4 byte) scalar zero to be vector 2246 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2247 predicate(n->as_Vector()->length() == 2); 2248 match(Set dst (ReplicateI zero)); 2249 format %{ "pxor $dst,$dst\t! replicate2I" %} 2250 ins_encode %{ 2251 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2252 %} 2253 ins_pipe( fpu_reg_reg ); 2254 %} 2255 2256 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2257 predicate(n->as_Vector()->length() == 4); 2258 match(Set dst (ReplicateI zero)); 2259 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2260 ins_encode %{ 2261 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2262 %} 2263 ins_pipe( fpu_reg_reg ); 2264 %} 2265 2266 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2267 predicate(n->as_Vector()->length() == 8); 2268 match(Set dst (ReplicateI zero)); 2269 format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %} 2270 ins_encode %{ 2271 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2272 bool vector256 = true; 2273 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2274 %} 2275 ins_pipe( fpu_reg_reg ); 2276 %} 2277 2278 // Replicate long (8 byte) scalar to be vector 2279 #ifdef _LP64 2280 instruct Repl2L(vecX dst, rRegL src) %{ 2281 predicate(n->as_Vector()->length() == 2); 2282 match(Set dst (ReplicateL src)); 2283 format %{ "movdq $dst,$src\n\t" 2284 "movlhps $dst,$dst\t! replicate2L" %} 2285 ins_encode %{ 2286 __ movdq($dst$$XMMRegister, $src$$Register); 2287 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2288 %} 2289 ins_pipe( pipe_slow ); 2290 %} 2291 2292 instruct Repl4L(vecY dst, rRegL src) %{ 2293 predicate(n->as_Vector()->length() == 4); 2294 match(Set dst (ReplicateL src)); 2295 format %{ "movdq $dst,$src\n\t" 2296 "movlhps $dst,$dst\n\t" 2297 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2298 ins_encode %{ 2299 __ movdq($dst$$XMMRegister, $src$$Register); 2300 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2301 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2302 %} 2303 ins_pipe( pipe_slow ); 2304 %} 2305 #else // _LP64 2306 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2307 predicate(n->as_Vector()->length() == 2); 2308 match(Set dst (ReplicateL src)); 2309 effect(TEMP dst, USE src, TEMP tmp); 2310 format %{ "movdl $dst,$src.lo\n\t" 2311 "movdl $tmp,$src.hi\n\t" 2312 "punpckldq $dst,$tmp\n\t" 2313 "movlhps $dst,$dst\t! replicate2L"%} 2314 ins_encode %{ 2315 __ movdl($dst$$XMMRegister, $src$$Register); 2316 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2317 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2318 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2319 %} 2320 ins_pipe( pipe_slow ); 2321 %} 2322 2323 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2324 predicate(n->as_Vector()->length() == 4); 2325 match(Set dst (ReplicateL src)); 2326 effect(TEMP dst, USE src, TEMP tmp); 2327 format %{ "movdl $dst,$src.lo\n\t" 2328 "movdl $tmp,$src.hi\n\t" 2329 "punpckldq $dst,$tmp\n\t" 2330 "movlhps $dst,$dst\n\t" 2331 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2332 ins_encode %{ 2333 __ movdl($dst$$XMMRegister, $src$$Register); 2334 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2335 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2336 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2337 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2338 %} 2339 ins_pipe( pipe_slow ); 2340 %} 2341 #endif // _LP64 2342 2343 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2344 instruct Repl2L_imm(vecX dst, immL con) %{ 2345 predicate(n->as_Vector()->length() == 2); 2346 match(Set dst (ReplicateL con)); 2347 format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t" 2348 "movlhps $dst,$dst" %} 2349 ins_encode %{ 2350 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 2351 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2352 %} 2353 ins_pipe( pipe_slow ); 2354 %} 2355 2356 instruct Repl4L_imm(vecY dst, immL con) %{ 2357 predicate(n->as_Vector()->length() == 4); 2358 match(Set dst (ReplicateL con)); 2359 format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t" 2360 "movlhps $dst,$dst\n\t" 2361 "vinsertf128h $dst,$dst,$dst" %} 2362 ins_encode %{ 2363 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 2364 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2365 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2366 %} 2367 ins_pipe( pipe_slow ); 2368 %} 2369 2370 // Long could be loaded into xmm register directly from memory. 2371 instruct Repl2L_mem(vecX dst, memory mem) %{ 2372 predicate(n->as_Vector()->length() == 2); 2373 match(Set dst (ReplicateL mem)); 2374 format %{ "movq $dst,$mem\n\t" 2375 "movlhps $dst,$dst\t! replicate2L" %} 2376 ins_encode %{ 2377 __ movq($dst$$XMMRegister, $mem$$Address); 2378 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2379 %} 2380 ins_pipe( pipe_slow ); 2381 %} 2382 2383 instruct Repl4L_mem(vecY dst, memory mem) %{ 2384 predicate(n->as_Vector()->length() == 4); 2385 match(Set dst (ReplicateL mem)); 2386 format %{ "movq $dst,$mem\n\t" 2387 "movlhps $dst,$dst\n\t" 2388 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2389 ins_encode %{ 2390 __ movq($dst$$XMMRegister, $mem$$Address); 2391 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2392 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2393 %} 2394 ins_pipe( pipe_slow ); 2395 %} 2396 2397 // Replicate long (8 byte) scalar zero to be vector 2398 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2399 predicate(n->as_Vector()->length() == 2); 2400 match(Set dst (ReplicateL zero)); 2401 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2402 ins_encode %{ 2403 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2404 %} 2405 ins_pipe( fpu_reg_reg ); 2406 %} 2407 2408 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2409 predicate(n->as_Vector()->length() == 4); 2410 match(Set dst (ReplicateL zero)); 2411 format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %} 2412 ins_encode %{ 2413 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2414 bool vector256 = true; 2415 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2416 %} 2417 ins_pipe( fpu_reg_reg ); 2418 %} 2419 2420 // Replicate float (4 byte) scalar to be vector 2421 instruct Repl2F(vecD dst, regF src) %{ 2422 predicate(n->as_Vector()->length() == 2); 2423 match(Set dst (ReplicateF src)); 2424 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2425 ins_encode %{ 2426 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2427 %} 2428 ins_pipe( fpu_reg_reg ); 2429 %} 2430 2431 instruct Repl4F(vecX dst, regF src) %{ 2432 predicate(n->as_Vector()->length() == 4); 2433 match(Set dst (ReplicateF src)); 2434 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2435 ins_encode %{ 2436 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2437 %} 2438 ins_pipe( pipe_slow ); 2439 %} 2440 2441 instruct Repl8F(vecY dst, regF src) %{ 2442 predicate(n->as_Vector()->length() == 8); 2443 match(Set dst (ReplicateF src)); 2444 format %{ "pshufd $dst,$src,0x00\n\t" 2445 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2446 ins_encode %{ 2447 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2448 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2449 %} 2450 ins_pipe( pipe_slow ); 2451 %} 2452 2453 // Replicate float (4 byte) scalar zero to be vector 2454 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2455 predicate(n->as_Vector()->length() == 2); 2456 match(Set dst (ReplicateF zero)); 2457 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2458 ins_encode %{ 2459 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2460 %} 2461 ins_pipe( fpu_reg_reg ); 2462 %} 2463 2464 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2465 predicate(n->as_Vector()->length() == 4); 2466 match(Set dst (ReplicateF zero)); 2467 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2468 ins_encode %{ 2469 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2470 %} 2471 ins_pipe( fpu_reg_reg ); 2472 %} 2473 2474 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2475 predicate(n->as_Vector()->length() == 8); 2476 match(Set dst (ReplicateF zero)); 2477 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2478 ins_encode %{ 2479 bool vector256 = true; 2480 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2481 %} 2482 ins_pipe( fpu_reg_reg ); 2483 %} 2484 2485 // Replicate double (8 bytes) scalar to be vector 2486 instruct Repl2D(vecX dst, regD src) %{ 2487 predicate(n->as_Vector()->length() == 2); 2488 match(Set dst (ReplicateD src)); 2489 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2490 ins_encode %{ 2491 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2492 %} 2493 ins_pipe( pipe_slow ); 2494 %} 2495 2496 instruct Repl4D(vecY dst, regD src) %{ 2497 predicate(n->as_Vector()->length() == 4); 2498 match(Set dst (ReplicateD src)); 2499 format %{ "pshufd $dst,$src,0x44\n\t" 2500 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2501 ins_encode %{ 2502 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2503 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2504 %} 2505 ins_pipe( pipe_slow ); 2506 %} 2507 2508 // Replicate double (8 byte) scalar zero to be vector 2509 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2510 predicate(n->as_Vector()->length() == 2); 2511 match(Set dst (ReplicateD zero)); 2512 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2513 ins_encode %{ 2514 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2515 %} 2516 ins_pipe( fpu_reg_reg ); 2517 %} 2518 2519 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2520 predicate(n->as_Vector()->length() == 4); 2521 match(Set dst (ReplicateD zero)); 2522 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2523 ins_encode %{ 2524 bool vector256 = true; 2525 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2526 %} 2527 ins_pipe( fpu_reg_reg ); 2528 %} 2529