54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 872 873 format %{ "addss $dst, $src" %} 874 ins_cost(150); 875 ins_encode %{ 876 __ addss($dst$$XMMRegister, $src$$Address); 877 %} 878 ins_pipe(pipe_slow); 879 %} 880 881 instruct addF_imm(regF dst, immF con) %{ 882 predicate((UseSSE>=1) && (UseAVX == 0)); 883 match(Set dst (AddF dst con)); 884 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 885 ins_cost(150); 886 ins_encode %{ 887 __ addss($dst$$XMMRegister, $constantaddress($con)); 888 %} 889 ins_pipe(pipe_slow); 890 %} 891 892 instruct vaddF_reg(regF dst, regF src1, regF src2) %{ 893 predicate(UseAVX > 0); 894 match(Set dst (AddF src1 src2)); 895 896 format %{ "vaddss $dst, $src1, $src2" %} 897 ins_cost(150); 898 ins_encode %{ 899 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 900 %} 901 ins_pipe(pipe_slow); 902 %} 903 904 instruct vaddF_mem(regF dst, regF src1, memory src2) %{ 905 predicate(UseAVX > 0); 906 match(Set dst (AddF src1 (LoadF src2))); 907 908 format %{ "vaddss $dst, $src1, $src2" %} 909 ins_cost(150); 910 ins_encode %{ 911 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 912 %} 913 ins_pipe(pipe_slow); 914 %} 915 916 instruct vaddF_imm(regF dst, regF src, immF con) %{ 917 predicate(UseAVX > 0); 918 match(Set dst (AddF src con)); 919 920 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 921 ins_cost(150); 922 ins_encode %{ 923 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 924 %} 925 ins_pipe(pipe_slow); 926 %} 927 928 instruct addD_reg(regD dst, regD src) %{ 929 predicate((UseSSE>=2) && (UseAVX == 0)); 930 match(Set dst (AddD dst src)); 931 932 format %{ "addsd $dst, $src" %} 933 ins_cost(150); 934 ins_encode %{ 935 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 936 %} 943 944 format %{ "addsd $dst, $src" %} 945 ins_cost(150); 946 ins_encode %{ 947 __ addsd($dst$$XMMRegister, $src$$Address); 948 %} 949 ins_pipe(pipe_slow); 950 %} 951 952 instruct addD_imm(regD dst, immD con) %{ 953 predicate((UseSSE>=2) && (UseAVX == 0)); 954 match(Set dst (AddD dst con)); 955 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 956 ins_cost(150); 957 ins_encode %{ 958 __ addsd($dst$$XMMRegister, $constantaddress($con)); 959 %} 960 ins_pipe(pipe_slow); 961 %} 962 963 instruct vaddD_reg(regD dst, regD src1, regD src2) %{ 964 predicate(UseAVX > 0); 965 match(Set dst (AddD src1 src2)); 966 967 format %{ "vaddsd $dst, $src1, $src2" %} 968 ins_cost(150); 969 ins_encode %{ 970 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 971 %} 972 ins_pipe(pipe_slow); 973 %} 974 975 instruct vaddD_mem(regD dst, regD src1, memory src2) %{ 976 predicate(UseAVX > 0); 977 match(Set dst (AddD src1 (LoadD src2))); 978 979 format %{ "vaddsd $dst, $src1, $src2" %} 980 ins_cost(150); 981 ins_encode %{ 982 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 983 %} 984 ins_pipe(pipe_slow); 985 %} 986 987 instruct vaddD_imm(regD dst, regD src, immD con) %{ 988 predicate(UseAVX > 0); 989 match(Set dst (AddD src con)); 990 991 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 992 ins_cost(150); 993 ins_encode %{ 994 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 995 %} 996 ins_pipe(pipe_slow); 997 %} 998 999 instruct subF_reg(regF dst, regF src) %{ 1000 predicate((UseSSE>=1) && (UseAVX == 0)); 1001 match(Set dst (SubF dst src)); 1002 1003 format %{ "subss $dst, $src" %} 1004 ins_cost(150); 1005 ins_encode %{ 1006 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1007 %} 1014 1015 format %{ "subss $dst, $src" %} 1016 ins_cost(150); 1017 ins_encode %{ 1018 __ subss($dst$$XMMRegister, $src$$Address); 1019 %} 1020 ins_pipe(pipe_slow); 1021 %} 1022 1023 instruct subF_imm(regF dst, immF con) %{ 1024 predicate((UseSSE>=1) && (UseAVX == 0)); 1025 match(Set dst (SubF dst con)); 1026 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1027 ins_cost(150); 1028 ins_encode %{ 1029 __ subss($dst$$XMMRegister, $constantaddress($con)); 1030 %} 1031 ins_pipe(pipe_slow); 1032 %} 1033 1034 instruct vsubF_reg(regF dst, regF src1, regF src2) %{ 1035 predicate(UseAVX > 0); 1036 match(Set dst (SubF src1 src2)); 1037 1038 format %{ "vsubss $dst, $src1, $src2" %} 1039 ins_cost(150); 1040 ins_encode %{ 1041 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1042 %} 1043 ins_pipe(pipe_slow); 1044 %} 1045 1046 instruct vsubF_mem(regF dst, regF src1, memory src2) %{ 1047 predicate(UseAVX > 0); 1048 match(Set dst (SubF src1 (LoadF src2))); 1049 1050 format %{ "vsubss $dst, $src1, $src2" %} 1051 ins_cost(150); 1052 ins_encode %{ 1053 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1054 %} 1055 ins_pipe(pipe_slow); 1056 %} 1057 1058 instruct vsubF_imm(regF dst, regF src, immF con) %{ 1059 predicate(UseAVX > 0); 1060 match(Set dst (SubF src con)); 1061 1062 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1063 ins_cost(150); 1064 ins_encode %{ 1065 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1066 %} 1067 ins_pipe(pipe_slow); 1068 %} 1069 1070 instruct subD_reg(regD dst, regD src) %{ 1071 predicate((UseSSE>=2) && (UseAVX == 0)); 1072 match(Set dst (SubD dst src)); 1073 1074 format %{ "subsd $dst, $src" %} 1075 ins_cost(150); 1076 ins_encode %{ 1077 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1078 %} 1085 1086 format %{ "subsd $dst, $src" %} 1087 ins_cost(150); 1088 ins_encode %{ 1089 __ subsd($dst$$XMMRegister, $src$$Address); 1090 %} 1091 ins_pipe(pipe_slow); 1092 %} 1093 1094 instruct subD_imm(regD dst, immD con) %{ 1095 predicate((UseSSE>=2) && (UseAVX == 0)); 1096 match(Set dst (SubD dst con)); 1097 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1098 ins_cost(150); 1099 ins_encode %{ 1100 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1101 %} 1102 ins_pipe(pipe_slow); 1103 %} 1104 1105 instruct vsubD_reg(regD dst, regD src1, regD src2) %{ 1106 predicate(UseAVX > 0); 1107 match(Set dst (SubD src1 src2)); 1108 1109 format %{ "vsubsd $dst, $src1, $src2" %} 1110 ins_cost(150); 1111 ins_encode %{ 1112 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1113 %} 1114 ins_pipe(pipe_slow); 1115 %} 1116 1117 instruct vsubD_mem(regD dst, regD src1, memory src2) %{ 1118 predicate(UseAVX > 0); 1119 match(Set dst (SubD src1 (LoadD src2))); 1120 1121 format %{ "vsubsd $dst, $src1, $src2" %} 1122 ins_cost(150); 1123 ins_encode %{ 1124 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1125 %} 1126 ins_pipe(pipe_slow); 1127 %} 1128 1129 instruct vsubD_imm(regD dst, regD src, immD con) %{ 1130 predicate(UseAVX > 0); 1131 match(Set dst (SubD src con)); 1132 1133 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1134 ins_cost(150); 1135 ins_encode %{ 1136 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1137 %} 1138 ins_pipe(pipe_slow); 1139 %} 1140 1141 instruct mulF_reg(regF dst, regF src) %{ 1142 predicate((UseSSE>=1) && (UseAVX == 0)); 1143 match(Set dst (MulF dst src)); 1144 1145 format %{ "mulss $dst, $src" %} 1146 ins_cost(150); 1147 ins_encode %{ 1148 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1149 %} 1156 1157 format %{ "mulss $dst, $src" %} 1158 ins_cost(150); 1159 ins_encode %{ 1160 __ mulss($dst$$XMMRegister, $src$$Address); 1161 %} 1162 ins_pipe(pipe_slow); 1163 %} 1164 1165 instruct mulF_imm(regF dst, immF con) %{ 1166 predicate((UseSSE>=1) && (UseAVX == 0)); 1167 match(Set dst (MulF dst con)); 1168 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1169 ins_cost(150); 1170 ins_encode %{ 1171 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1172 %} 1173 ins_pipe(pipe_slow); 1174 %} 1175 1176 instruct vmulF_reg(regF dst, regF src1, regF src2) %{ 1177 predicate(UseAVX > 0); 1178 match(Set dst (MulF src1 src2)); 1179 1180 format %{ "vmulss $dst, $src1, $src2" %} 1181 ins_cost(150); 1182 ins_encode %{ 1183 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1184 %} 1185 ins_pipe(pipe_slow); 1186 %} 1187 1188 instruct vmulF_mem(regF dst, regF src1, memory src2) %{ 1189 predicate(UseAVX > 0); 1190 match(Set dst (MulF src1 (LoadF src2))); 1191 1192 format %{ "vmulss $dst, $src1, $src2" %} 1193 ins_cost(150); 1194 ins_encode %{ 1195 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1196 %} 1197 ins_pipe(pipe_slow); 1198 %} 1199 1200 instruct vmulF_imm(regF dst, regF src, immF con) %{ 1201 predicate(UseAVX > 0); 1202 match(Set dst (MulF src con)); 1203 1204 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1205 ins_cost(150); 1206 ins_encode %{ 1207 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1208 %} 1209 ins_pipe(pipe_slow); 1210 %} 1211 1212 instruct mulD_reg(regD dst, regD src) %{ 1213 predicate((UseSSE>=2) && (UseAVX == 0)); 1214 match(Set dst (MulD dst src)); 1215 1216 format %{ "mulsd $dst, $src" %} 1217 ins_cost(150); 1218 ins_encode %{ 1219 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1220 %} 1227 1228 format %{ "mulsd $dst, $src" %} 1229 ins_cost(150); 1230 ins_encode %{ 1231 __ mulsd($dst$$XMMRegister, $src$$Address); 1232 %} 1233 ins_pipe(pipe_slow); 1234 %} 1235 1236 instruct mulD_imm(regD dst, immD con) %{ 1237 predicate((UseSSE>=2) && (UseAVX == 0)); 1238 match(Set dst (MulD dst con)); 1239 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1240 ins_cost(150); 1241 ins_encode %{ 1242 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1243 %} 1244 ins_pipe(pipe_slow); 1245 %} 1246 1247 instruct vmulD_reg(regD dst, regD src1, regD src2) %{ 1248 predicate(UseAVX > 0); 1249 match(Set dst (MulD src1 src2)); 1250 1251 format %{ "vmulsd $dst, $src1, $src2" %} 1252 ins_cost(150); 1253 ins_encode %{ 1254 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1255 %} 1256 ins_pipe(pipe_slow); 1257 %} 1258 1259 instruct vmulD_mem(regD dst, regD src1, memory src2) %{ 1260 predicate(UseAVX > 0); 1261 match(Set dst (MulD src1 (LoadD src2))); 1262 1263 format %{ "vmulsd $dst, $src1, $src2" %} 1264 ins_cost(150); 1265 ins_encode %{ 1266 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1267 %} 1268 ins_pipe(pipe_slow); 1269 %} 1270 1271 instruct vmulD_imm(regD dst, regD src, immD con) %{ 1272 predicate(UseAVX > 0); 1273 match(Set dst (MulD src con)); 1274 1275 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1276 ins_cost(150); 1277 ins_encode %{ 1278 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1279 %} 1280 ins_pipe(pipe_slow); 1281 %} 1282 1283 instruct divF_reg(regF dst, regF src) %{ 1284 predicate((UseSSE>=1) && (UseAVX == 0)); 1285 match(Set dst (DivF dst src)); 1286 1287 format %{ "divss $dst, $src" %} 1288 ins_cost(150); 1289 ins_encode %{ 1290 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1291 %} 1298 1299 format %{ "divss $dst, $src" %} 1300 ins_cost(150); 1301 ins_encode %{ 1302 __ divss($dst$$XMMRegister, $src$$Address); 1303 %} 1304 ins_pipe(pipe_slow); 1305 %} 1306 1307 instruct divF_imm(regF dst, immF con) %{ 1308 predicate((UseSSE>=1) && (UseAVX == 0)); 1309 match(Set dst (DivF dst con)); 1310 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1311 ins_cost(150); 1312 ins_encode %{ 1313 __ divss($dst$$XMMRegister, $constantaddress($con)); 1314 %} 1315 ins_pipe(pipe_slow); 1316 %} 1317 1318 instruct vdivF_reg(regF dst, regF src1, regF src2) %{ 1319 predicate(UseAVX > 0); 1320 match(Set dst (DivF src1 src2)); 1321 1322 format %{ "vdivss $dst, $src1, $src2" %} 1323 ins_cost(150); 1324 ins_encode %{ 1325 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1326 %} 1327 ins_pipe(pipe_slow); 1328 %} 1329 1330 instruct vdivF_mem(regF dst, regF src1, memory src2) %{ 1331 predicate(UseAVX > 0); 1332 match(Set dst (DivF src1 (LoadF src2))); 1333 1334 format %{ "vdivss $dst, $src1, $src2" %} 1335 ins_cost(150); 1336 ins_encode %{ 1337 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1338 %} 1339 ins_pipe(pipe_slow); 1340 %} 1341 1342 instruct vdivF_imm(regF dst, regF src, immF con) %{ 1343 predicate(UseAVX > 0); 1344 match(Set dst (DivF src con)); 1345 1346 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1347 ins_cost(150); 1348 ins_encode %{ 1349 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1350 %} 1351 ins_pipe(pipe_slow); 1352 %} 1353 1354 instruct divD_reg(regD dst, regD src) %{ 1355 predicate((UseSSE>=2) && (UseAVX == 0)); 1356 match(Set dst (DivD dst src)); 1357 1358 format %{ "divsd $dst, $src" %} 1359 ins_cost(150); 1360 ins_encode %{ 1361 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1362 %} 1369 1370 format %{ "divsd $dst, $src" %} 1371 ins_cost(150); 1372 ins_encode %{ 1373 __ divsd($dst$$XMMRegister, $src$$Address); 1374 %} 1375 ins_pipe(pipe_slow); 1376 %} 1377 1378 instruct divD_imm(regD dst, immD con) %{ 1379 predicate((UseSSE>=2) && (UseAVX == 0)); 1380 match(Set dst (DivD dst con)); 1381 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1382 ins_cost(150); 1383 ins_encode %{ 1384 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1385 %} 1386 ins_pipe(pipe_slow); 1387 %} 1388 1389 instruct vdivD_reg(regD dst, regD src1, regD src2) %{ 1390 predicate(UseAVX > 0); 1391 match(Set dst (DivD src1 src2)); 1392 1393 format %{ "vdivsd $dst, $src1, $src2" %} 1394 ins_cost(150); 1395 ins_encode %{ 1396 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1397 %} 1398 ins_pipe(pipe_slow); 1399 %} 1400 1401 instruct vdivD_mem(regD dst, regD src1, memory src2) %{ 1402 predicate(UseAVX > 0); 1403 match(Set dst (DivD src1 (LoadD src2))); 1404 1405 format %{ "vdivsd $dst, $src1, $src2" %} 1406 ins_cost(150); 1407 ins_encode %{ 1408 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1409 %} 1410 ins_pipe(pipe_slow); 1411 %} 1412 1413 instruct vdivD_imm(regD dst, regD src, immD con) %{ 1414 predicate(UseAVX > 0); 1415 match(Set dst (DivD src con)); 1416 1417 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1418 ins_cost(150); 1419 ins_encode %{ 1420 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1421 %} 1422 ins_pipe(pipe_slow); 1423 %} 1424 1425 instruct absF_reg(regF dst) %{ 1426 predicate((UseSSE>=1) && (UseAVX == 0)); 1427 match(Set dst (AbsF dst)); 1428 ins_cost(150); 1429 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1430 ins_encode %{ 1431 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1432 %} 1433 ins_pipe(pipe_slow); 1434 %} 1435 1436 instruct vabsF_reg(regF dst, regF src) %{ 1437 predicate(UseAVX > 0); 1438 match(Set dst (AbsF src)); 1439 ins_cost(150); 1440 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1441 ins_encode %{ 1442 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1443 ExternalAddress(float_signmask())); 1444 %} 1445 ins_pipe(pipe_slow); 1446 %} 1447 1448 instruct absD_reg(regD dst) %{ 1449 predicate((UseSSE>=2) && (UseAVX == 0)); 1450 match(Set dst (AbsD dst)); 1451 ins_cost(150); 1452 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1453 "# abs double by sign masking" %} 1454 ins_encode %{ 1455 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1456 %} 1457 ins_pipe(pipe_slow); 1458 %} 1459 1460 instruct vabsD_reg(regD dst, regD src) %{ 1461 predicate(UseAVX > 0); 1462 match(Set dst (AbsD src)); 1463 ins_cost(150); 1464 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1465 "# abs double by sign masking" %} 1466 ins_encode %{ 1467 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1468 ExternalAddress(double_signmask())); 1469 %} 1470 ins_pipe(pipe_slow); 1471 %} 1472 1473 instruct negF_reg(regF dst) %{ 1474 predicate((UseSSE>=1) && (UseAVX == 0)); 1475 match(Set dst (NegF dst)); 1476 ins_cost(150); 1477 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1478 ins_encode %{ 1479 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1480 %} 1481 ins_pipe(pipe_slow); 1482 %} 1483 1484 instruct vnegF_reg(regF dst, regF src) %{ 1485 predicate(UseAVX > 0); 1486 match(Set dst (NegF src)); 1487 ins_cost(150); 1488 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1489 ins_encode %{ 1490 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1491 ExternalAddress(float_signflip())); 1492 %} 1493 ins_pipe(pipe_slow); 1494 %} 1495 1496 instruct negD_reg(regD dst) %{ 1497 predicate((UseSSE>=2) && (UseAVX == 0)); 1498 match(Set dst (NegD dst)); 1499 ins_cost(150); 1500 format %{ "xorpd $dst, [0x8000000000000000]\t" 1501 "# neg double by sign flipping" %} 1502 ins_encode %{ 1503 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1504 %} 1505 ins_pipe(pipe_slow); 1506 %} 1507 1508 instruct vnegD_reg(regD dst, regD src) %{ 1509 predicate(UseAVX > 0); 1510 match(Set dst (NegD src)); 1511 ins_cost(150); 1512 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1513 "# neg double by sign flipping" %} 1514 ins_encode %{ 1515 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1516 ExternalAddress(double_signflip())); 1517 %} 1518 ins_pipe(pipe_slow); 1519 %} 1520 1521 instruct sqrtF_reg(regF dst, regF src) %{ 1522 predicate(UseSSE>=1); 1523 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1524 1525 format %{ "sqrtss $dst, $src" %} 1526 ins_cost(150); 1527 ins_encode %{ 1528 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1702 instruct Repl8B(vecD dst, rRegI src) %{ 1703 predicate(n->as_Vector()->length() == 8); 1704 match(Set dst (ReplicateB src)); 1705 format %{ "movd $dst,$src\n\t" 1706 "punpcklbw $dst,$dst\n\t" 1707 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1708 ins_encode %{ 1709 __ movdl($dst$$XMMRegister, $src$$Register); 1710 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1711 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1712 %} 1713 ins_pipe( pipe_slow ); 1714 %} 1715 1716 instruct Repl16B(vecX dst, rRegI src) %{ 1717 predicate(n->as_Vector()->length() == 16); 1718 match(Set dst (ReplicateB src)); 1719 format %{ "movd $dst,$src\n\t" 1720 "punpcklbw $dst,$dst\n\t" 1721 "pshuflw $dst,$dst,0x00\n\t" 1722 "movlhps $dst,$dst\t! replicate16B" %} 1723 ins_encode %{ 1724 __ movdl($dst$$XMMRegister, $src$$Register); 1725 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1726 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1727 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1728 %} 1729 ins_pipe( pipe_slow ); 1730 %} 1731 1732 instruct Repl32B(vecY dst, rRegI src) %{ 1733 predicate(n->as_Vector()->length() == 32); 1734 match(Set dst (ReplicateB src)); 1735 format %{ "movd $dst,$src\n\t" 1736 "punpcklbw $dst,$dst\n\t" 1737 "pshuflw $dst,$dst,0x00\n\t" 1738 "movlhps $dst,$dst\n\t" 1739 "vinsertf128h $dst,$dst,$dst\t! replicate32B" %} 1740 ins_encode %{ 1741 __ movdl($dst$$XMMRegister, $src$$Register); 1742 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1743 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1744 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1745 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1746 %} 1747 ins_pipe( pipe_slow ); 1748 %} 1749 1750 // Replicate byte scalar immediate to be vector by loading from const table. 1751 instruct Repl4B_imm(vecS dst, immI con) %{ 1752 predicate(n->as_Vector()->length() == 4); 1753 match(Set dst (ReplicateB con)); 1754 format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %} 1755 ins_encode %{ 1756 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1757 %} 1758 ins_pipe( pipe_slow ); 1759 %} 1760 1761 instruct Repl8B_imm(vecD dst, immI con) %{ 1762 predicate(n->as_Vector()->length() == 8); 1763 match(Set dst (ReplicateB con)); 1764 format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %} 1765 ins_encode %{ 1766 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1767 %} 1768 ins_pipe( pipe_slow ); 1769 %} 1770 1771 instruct Repl16B_imm(vecX dst, immI con) %{ 1772 predicate(n->as_Vector()->length() == 16); 1773 match(Set dst (ReplicateB con)); 1774 format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t" 1775 "movlhps $dst,$dst" %} 1776 ins_encode %{ 1777 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1778 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1779 %} 1780 ins_pipe( pipe_slow ); 1781 %} 1782 1783 instruct Repl32B_imm(vecY dst, immI con) %{ 1784 predicate(n->as_Vector()->length() == 32); 1785 match(Set dst (ReplicateB con)); 1786 format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t" 1787 "movlhps $dst,$dst\n\t" 1788 "vinsertf128h $dst,$dst,$dst" %} 1789 ins_encode %{ 1790 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1791 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1792 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1793 %} 1794 ins_pipe( pipe_slow ); 1795 %} 1796 1797 // Replicate byte scalar zero to be vector 1798 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1799 predicate(n->as_Vector()->length() == 4); 1800 match(Set dst (ReplicateB zero)); 1801 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1802 ins_encode %{ 1803 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1804 %} 1805 ins_pipe( fpu_reg_reg ); 1806 %} 1807 1808 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1809 predicate(n->as_Vector()->length() == 8); 1810 match(Set dst (ReplicateB zero)); 1811 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1812 ins_encode %{ 1813 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1814 %} 1815 ins_pipe( fpu_reg_reg ); 1816 %} 1817 1818 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1819 predicate(n->as_Vector()->length() == 16); 1820 match(Set dst (ReplicateB zero)); 1821 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1822 ins_encode %{ 1823 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1824 %} 1825 ins_pipe( fpu_reg_reg ); 1826 %} 1827 1828 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1829 predicate(n->as_Vector()->length() == 32); 1830 match(Set dst (ReplicateB zero)); 1831 format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %} 1832 ins_encode %{ 1833 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1834 bool vector256 = true; 1835 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1836 %} 1837 ins_pipe( fpu_reg_reg ); 1838 %} 1839 1840 // Replicate char/short (2 byte) scalar to be vector 1841 instruct Repl2S(vecS dst, rRegI src) %{ 1842 predicate(n->as_Vector()->length() == 2); 1843 match(Set dst (ReplicateS src)); 1844 format %{ "movd $dst,$src\n\t" 1845 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1846 ins_encode %{ 1847 __ movdl($dst$$XMMRegister, $src$$Register); 1848 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1849 %} 1850 ins_pipe( fpu_reg_reg ); 1851 %} 1852 1853 instruct Repl4S(vecD dst, rRegI src) %{ 1854 predicate(n->as_Vector()->length() == 4); 1855 match(Set dst (ReplicateS src)); 1856 format %{ "movd $dst,$src\n\t" 1857 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1858 ins_encode %{ 1859 __ movdl($dst$$XMMRegister, $src$$Register); 1860 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1861 %} 1862 ins_pipe( fpu_reg_reg ); 1863 %} 1864 1865 instruct Repl8S(vecX dst, rRegI src) %{ 1866 predicate(n->as_Vector()->length() == 8); 1867 match(Set dst (ReplicateS src)); 1868 format %{ "movd $dst,$src\n\t" 1869 "pshuflw $dst,$dst,0x00\n\t" 1870 "movlhps $dst,$dst\t! replicate8S" %} 1871 ins_encode %{ 1872 __ movdl($dst$$XMMRegister, $src$$Register); 1873 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1874 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1875 %} 1876 ins_pipe( pipe_slow ); 1877 %} 1878 1879 instruct Repl16S(vecY dst, rRegI src) %{ 1880 predicate(n->as_Vector()->length() == 16); 1881 match(Set dst (ReplicateS src)); 1882 format %{ "movd $dst,$src\n\t" 1883 "pshuflw $dst,$dst,0x00\n\t" 1884 "movlhps $dst,$dst\n\t" 1885 "vinsertf128h $dst,$dst,$dst\t! replicate16S" %} 1886 ins_encode %{ 1887 __ movdl($dst$$XMMRegister, $src$$Register); 1888 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1889 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1890 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1891 %} 1892 ins_pipe( pipe_slow ); 1893 %} 1894 1895 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 1896 instruct Repl2S_imm(vecS dst, immI con) %{ 1897 predicate(n->as_Vector()->length() == 2); 1898 match(Set dst (ReplicateS con)); 1899 format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %} 1900 ins_encode %{ 1901 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 1902 %} 1903 ins_pipe( fpu_reg_reg ); 1904 %} 1905 1906 instruct Repl4S_imm(vecD dst, immI con) %{ 1907 predicate(n->as_Vector()->length() == 4); 1908 match(Set dst (ReplicateS con)); 1909 format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %} 1910 ins_encode %{ 1911 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1912 %} 1913 ins_pipe( fpu_reg_reg ); 1914 %} 1915 1916 instruct Repl8S_imm(vecX dst, immI con) %{ 1917 predicate(n->as_Vector()->length() == 8); 1918 match(Set dst (ReplicateS con)); 1919 format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t" 1920 "movlhps $dst,$dst" %} 1921 ins_encode %{ 1922 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1923 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1924 %} 1925 ins_pipe( pipe_slow ); 1926 %} 1927 1928 instruct Repl16S_imm(vecY dst, immI con) %{ 1929 predicate(n->as_Vector()->length() == 16); 1930 match(Set dst (ReplicateS con)); 1931 format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t" 1932 "movlhps $dst,$dst\n\t" 1933 "vinsertf128h $dst,$dst,$dst" %} 1934 ins_encode %{ 1935 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1936 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1937 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1938 %} 1939 ins_pipe( pipe_slow ); 1940 %} 1941 1942 // Replicate char/short (2 byte) scalar zero to be vector 1943 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 1944 predicate(n->as_Vector()->length() == 2); 1945 match(Set dst (ReplicateS zero)); 1946 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 1947 ins_encode %{ 1948 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1949 %} 1950 ins_pipe( fpu_reg_reg ); 1951 %} 1952 1953 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 1954 predicate(n->as_Vector()->length() == 4); 1955 match(Set dst (ReplicateS zero)); 1956 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 1957 ins_encode %{ 1958 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1959 %} 1960 ins_pipe( fpu_reg_reg ); 1961 %} 1962 1963 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 1964 predicate(n->as_Vector()->length() == 8); 1965 match(Set dst (ReplicateS zero)); 1966 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 1967 ins_encode %{ 1968 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1969 %} 1970 ins_pipe( fpu_reg_reg ); 1971 %} 1972 1973 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 1974 predicate(n->as_Vector()->length() == 16); 1975 match(Set dst (ReplicateS zero)); 1976 format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %} 1977 ins_encode %{ 1978 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1979 bool vector256 = true; 1980 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1981 %} 1982 ins_pipe( fpu_reg_reg ); 1983 %} 1984 1985 // Replicate integer (4 byte) scalar to be vector 1986 instruct Repl2I(vecD dst, rRegI src) %{ 1987 predicate(n->as_Vector()->length() == 2); 1988 match(Set dst (ReplicateI src)); 1989 format %{ "movd $dst,$src\n\t" 1990 "pshufd $dst,$dst,0x00\t! replicate2I" %} 1991 ins_encode %{ 1992 __ movdl($dst$$XMMRegister, $src$$Register); 1993 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1994 %} 1995 ins_pipe( fpu_reg_reg ); 1996 %} 1997 1998 instruct Repl4I(vecX dst, rRegI src) %{ 1999 predicate(n->as_Vector()->length() == 4); 2000 match(Set dst (ReplicateI src)); 2001 format %{ "movd $dst,$src\n\t" 2002 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2003 ins_encode %{ 2004 __ movdl($dst$$XMMRegister, $src$$Register); 2005 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2006 %} 2007 ins_pipe( pipe_slow ); 2008 %} 2009 2010 instruct Repl8I(vecY dst, rRegI src) %{ 2011 predicate(n->as_Vector()->length() == 8); 2012 match(Set dst (ReplicateI src)); 2013 format %{ "movd $dst,$src\n\t" 2014 "pshufd $dst,$dst,0x00\n\t" 2015 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} 2016 ins_encode %{ 2017 __ movdl($dst$$XMMRegister, $src$$Register); 2018 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2019 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2020 %} 2021 ins_pipe( pipe_slow ); 2022 %} 2023 2024 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2025 instruct Repl2I_imm(vecD dst, immI con) %{ 2026 predicate(n->as_Vector()->length() == 2); 2027 match(Set dst (ReplicateI con)); 2028 format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %} 2029 ins_encode %{ 2030 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2031 %} 2032 ins_pipe( fpu_reg_reg ); 2033 %} 2034 2035 instruct Repl4I_imm(vecX dst, immI con) %{ 2036 predicate(n->as_Vector()->length() == 4); 2037 match(Set dst (ReplicateI con)); 2038 format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2039 "movlhps $dst,$dst" %} 2040 ins_encode %{ 2041 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2042 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2043 %} 2044 ins_pipe( pipe_slow ); 2045 %} 2046 2047 instruct Repl8I_imm(vecY dst, immI con) %{ 2048 predicate(n->as_Vector()->length() == 8); 2049 match(Set dst (ReplicateI con)); 2050 format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2051 "movlhps $dst,$dst\n\t" 2052 "vinsertf128h $dst,$dst,$dst" %} 2053 ins_encode %{ 2054 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2055 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2056 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2057 %} 2058 ins_pipe( pipe_slow ); 2059 %} 2060 2061 // Integer could be loaded into xmm register directly from memory. 2062 instruct Repl2I_mem(vecD dst, memory mem) %{ 2063 predicate(n->as_Vector()->length() == 2); 2064 match(Set dst (ReplicateI (LoadVector mem))); 2065 format %{ "movd $dst,$mem\n\t" 2066 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2067 ins_encode %{ 2068 __ movdl($dst$$XMMRegister, $mem$$Address); 2069 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2070 %} 2071 ins_pipe( fpu_reg_reg ); 2072 %} 2073 2074 instruct Repl4I_mem(vecX dst, memory mem) %{ 2075 predicate(n->as_Vector()->length() == 4); 2076 match(Set dst (ReplicateI (LoadVector mem))); 2077 format %{ "movd $dst,$mem\n\t" 2078 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2079 ins_encode %{ 2080 __ movdl($dst$$XMMRegister, $mem$$Address); 2081 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2082 %} 2083 ins_pipe( pipe_slow ); 2084 %} 2085 2086 instruct Repl8I_mem(vecY dst, memory mem) %{ 2087 predicate(n->as_Vector()->length() == 8); 2088 match(Set dst (ReplicateI (LoadVector mem))); 2089 format %{ "movd $dst,$mem\n\t" 2090 "pshufd $dst,$dst,0x00\n\t" 2091 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} 2092 ins_encode %{ 2093 __ movdl($dst$$XMMRegister, $mem$$Address); 2094 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2095 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2096 %} 2097 ins_pipe( pipe_slow ); 2098 %} 2099 2100 // Replicate integer (4 byte) scalar zero to be vector 2101 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2102 predicate(n->as_Vector()->length() == 2); 2103 match(Set dst (ReplicateI zero)); 2104 format %{ "pxor $dst,$dst\t! replicate2I" %} 2105 ins_encode %{ 2106 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2107 %} 2108 ins_pipe( fpu_reg_reg ); 2109 %} 2110 2111 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2112 predicate(n->as_Vector()->length() == 4); 2113 match(Set dst (ReplicateI zero)); 2114 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2115 ins_encode %{ 2116 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2117 %} 2118 ins_pipe( fpu_reg_reg ); 2119 %} 2120 2121 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2122 predicate(n->as_Vector()->length() == 8); 2123 match(Set dst (ReplicateI zero)); 2124 format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %} 2125 ins_encode %{ 2126 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2127 bool vector256 = true; 2128 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2129 %} 2130 ins_pipe( fpu_reg_reg ); 2131 %} 2132 2133 // Replicate long (8 byte) scalar to be vector 2134 #ifdef _LP64 2135 instruct Repl2L(vecX dst, rRegL src) %{ 2136 predicate(n->as_Vector()->length() == 2); 2137 match(Set dst (ReplicateL src)); 2138 format %{ "movdq $dst,$src\n\t" 2139 "movlhps $dst,$dst\t! replicate2L" %} 2140 ins_encode %{ 2141 __ movdq($dst$$XMMRegister, $src$$Register); 2142 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2143 %} 2144 ins_pipe( pipe_slow ); 2145 %} 2146 2147 instruct Repl4L(vecY dst, rRegL src) %{ 2148 predicate(n->as_Vector()->length() == 4); 2149 match(Set dst (ReplicateL src)); 2150 format %{ "movdq $dst,$src\n\t" 2151 "movlhps $dst,$dst\n\t" 2152 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2153 ins_encode %{ 2154 __ movdq($dst$$XMMRegister, $src$$Register); 2155 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2156 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2157 %} 2158 ins_pipe( pipe_slow ); 2159 %} 2160 #else // _LP64 2161 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2162 predicate(n->as_Vector()->length() == 2); 2163 match(Set dst (ReplicateL src)); 2164 effect(TEMP dst, USE src, TEMP tmp); 2165 format %{ "movdl $dst,$src.lo\n\t" 2166 "movdl $tmp,$src.hi\n\t" 2167 "punpckldq $dst,$tmp\n\t" 2168 "movlhps $dst,$dst\t! replicate2L"%} 2169 ins_encode %{ 2170 __ movdl($dst$$XMMRegister, $src$$Register); 2171 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2172 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2173 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2174 %} 2175 ins_pipe( pipe_slow ); 2176 %} 2177 2178 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2179 predicate(n->as_Vector()->length() == 4); 2180 match(Set dst (ReplicateL src)); 2181 effect(TEMP dst, USE src, TEMP tmp); 2182 format %{ "movdl $dst,$src.lo\n\t" 2183 "movdl $tmp,$src.hi\n\t" 2184 "punpckldq $dst,$tmp\n\t" 2185 "movlhps $dst,$dst\n\t" 2186 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2187 ins_encode %{ 2188 __ movdl($dst$$XMMRegister, $src$$Register); 2189 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2190 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2191 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2192 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2193 %} 2194 ins_pipe( pipe_slow ); 2195 %} 2196 #endif // _LP64 2197 2198 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2199 instruct Repl2L_imm(vecX dst, immL con) %{ 2200 predicate(n->as_Vector()->length() == 2); 2201 match(Set dst (ReplicateL con)); 2202 format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t" 2203 "movlhps $dst,$dst" %} 2204 ins_encode %{ 2205 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 2206 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2207 %} 2208 ins_pipe( pipe_slow ); 2209 %} 2210 2211 instruct Repl4L_imm(vecY dst, immL con) %{ 2212 predicate(n->as_Vector()->length() == 4); 2213 match(Set dst (ReplicateL con)); 2214 format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t" 2215 "movlhps $dst,$dst\n\t" 2216 "vinsertf128h $dst,$dst,$dst" %} 2217 ins_encode %{ 2218 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 2219 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2220 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2221 %} 2222 ins_pipe( pipe_slow ); 2223 %} 2224 2225 // Long could be loaded into xmm register directly from memory. 2226 instruct Repl2L_mem(vecX dst, memory mem) %{ 2227 predicate(n->as_Vector()->length() == 2); 2228 match(Set dst (ReplicateL (LoadVector mem))); 2229 format %{ "movq $dst,$mem\n\t" 2230 "movlhps $dst,$dst\t! replicate2L" %} 2231 ins_encode %{ 2232 __ movq($dst$$XMMRegister, $mem$$Address); 2233 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2234 %} 2235 ins_pipe( pipe_slow ); 2236 %} 2237 2238 instruct Repl4L_mem(vecY dst, memory mem) %{ 2239 predicate(n->as_Vector()->length() == 4); 2240 match(Set dst (ReplicateL (LoadVector mem))); 2241 format %{ "movq $dst,$mem\n\t" 2242 "movlhps $dst,$dst\n\t" 2243 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2244 ins_encode %{ 2245 __ movq($dst$$XMMRegister, $mem$$Address); 2246 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2247 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2248 %} 2249 ins_pipe( pipe_slow ); 2250 %} 2251 2252 // Replicate long (8 byte) scalar zero to be vector 2253 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2254 predicate(n->as_Vector()->length() == 2); 2255 match(Set dst (ReplicateL zero)); 2256 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2257 ins_encode %{ 2258 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2259 %} 2260 ins_pipe( fpu_reg_reg ); 2261 %} 2262 2263 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2264 predicate(n->as_Vector()->length() == 4); 2265 match(Set dst (ReplicateL zero)); 2266 format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %} 2267 ins_encode %{ 2268 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2269 bool vector256 = true; 2270 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2271 %} 2272 ins_pipe( fpu_reg_reg ); 2273 %} 2274 2275 // Replicate float (4 byte) scalar to be vector 2276 instruct Repl2F(vecD dst, regF src) %{ 2277 predicate(n->as_Vector()->length() == 2); 2278 match(Set dst (ReplicateF src)); 2279 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2280 ins_encode %{ 2281 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2282 %} 2283 ins_pipe( fpu_reg_reg ); 2284 %} 2285 2286 instruct Repl4F(vecX dst, regF src) %{ 2287 predicate(n->as_Vector()->length() == 4); 2288 match(Set dst (ReplicateF src)); 2289 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2290 ins_encode %{ | 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 872 873 format %{ "addss $dst, $src" %} 874 ins_cost(150); 875 ins_encode %{ 876 __ addss($dst$$XMMRegister, $src$$Address); 877 %} 878 ins_pipe(pipe_slow); 879 %} 880 881 instruct addF_imm(regF dst, immF con) %{ 882 predicate((UseSSE>=1) && (UseAVX == 0)); 883 match(Set dst (AddF dst con)); 884 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 885 ins_cost(150); 886 ins_encode %{ 887 __ addss($dst$$XMMRegister, $constantaddress($con)); 888 %} 889 ins_pipe(pipe_slow); 890 %} 891 892 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 893 predicate(UseAVX > 0); 894 match(Set dst (AddF src1 src2)); 895 896 format %{ "vaddss $dst, $src1, $src2" %} 897 ins_cost(150); 898 ins_encode %{ 899 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 900 %} 901 ins_pipe(pipe_slow); 902 %} 903 904 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 905 predicate(UseAVX > 0); 906 match(Set dst (AddF src1 (LoadF src2))); 907 908 format %{ "vaddss $dst, $src1, $src2" %} 909 ins_cost(150); 910 ins_encode %{ 911 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 912 %} 913 ins_pipe(pipe_slow); 914 %} 915 916 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 917 predicate(UseAVX > 0); 918 match(Set dst (AddF src con)); 919 920 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 921 ins_cost(150); 922 ins_encode %{ 923 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 924 %} 925 ins_pipe(pipe_slow); 926 %} 927 928 instruct addD_reg(regD dst, regD src) %{ 929 predicate((UseSSE>=2) && (UseAVX == 0)); 930 match(Set dst (AddD dst src)); 931 932 format %{ "addsd $dst, $src" %} 933 ins_cost(150); 934 ins_encode %{ 935 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 936 %} 943 944 format %{ "addsd $dst, $src" %} 945 ins_cost(150); 946 ins_encode %{ 947 __ addsd($dst$$XMMRegister, $src$$Address); 948 %} 949 ins_pipe(pipe_slow); 950 %} 951 952 instruct addD_imm(regD dst, immD con) %{ 953 predicate((UseSSE>=2) && (UseAVX == 0)); 954 match(Set dst (AddD dst con)); 955 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 956 ins_cost(150); 957 ins_encode %{ 958 __ addsd($dst$$XMMRegister, $constantaddress($con)); 959 %} 960 ins_pipe(pipe_slow); 961 %} 962 963 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 964 predicate(UseAVX > 0); 965 match(Set dst (AddD src1 src2)); 966 967 format %{ "vaddsd $dst, $src1, $src2" %} 968 ins_cost(150); 969 ins_encode %{ 970 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 971 %} 972 ins_pipe(pipe_slow); 973 %} 974 975 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 976 predicate(UseAVX > 0); 977 match(Set dst (AddD src1 (LoadD src2))); 978 979 format %{ "vaddsd $dst, $src1, $src2" %} 980 ins_cost(150); 981 ins_encode %{ 982 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 983 %} 984 ins_pipe(pipe_slow); 985 %} 986 987 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 988 predicate(UseAVX > 0); 989 match(Set dst (AddD src con)); 990 991 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 992 ins_cost(150); 993 ins_encode %{ 994 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 995 %} 996 ins_pipe(pipe_slow); 997 %} 998 999 instruct subF_reg(regF dst, regF src) %{ 1000 predicate((UseSSE>=1) && (UseAVX == 0)); 1001 match(Set dst (SubF dst src)); 1002 1003 format %{ "subss $dst, $src" %} 1004 ins_cost(150); 1005 ins_encode %{ 1006 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1007 %} 1014 1015 format %{ "subss $dst, $src" %} 1016 ins_cost(150); 1017 ins_encode %{ 1018 __ subss($dst$$XMMRegister, $src$$Address); 1019 %} 1020 ins_pipe(pipe_slow); 1021 %} 1022 1023 instruct subF_imm(regF dst, immF con) %{ 1024 predicate((UseSSE>=1) && (UseAVX == 0)); 1025 match(Set dst (SubF dst con)); 1026 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1027 ins_cost(150); 1028 ins_encode %{ 1029 __ subss($dst$$XMMRegister, $constantaddress($con)); 1030 %} 1031 ins_pipe(pipe_slow); 1032 %} 1033 1034 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1035 predicate(UseAVX > 0); 1036 match(Set dst (SubF src1 src2)); 1037 1038 format %{ "vsubss $dst, $src1, $src2" %} 1039 ins_cost(150); 1040 ins_encode %{ 1041 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1042 %} 1043 ins_pipe(pipe_slow); 1044 %} 1045 1046 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1047 predicate(UseAVX > 0); 1048 match(Set dst (SubF src1 (LoadF src2))); 1049 1050 format %{ "vsubss $dst, $src1, $src2" %} 1051 ins_cost(150); 1052 ins_encode %{ 1053 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1054 %} 1055 ins_pipe(pipe_slow); 1056 %} 1057 1058 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1059 predicate(UseAVX > 0); 1060 match(Set dst (SubF src con)); 1061 1062 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1063 ins_cost(150); 1064 ins_encode %{ 1065 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1066 %} 1067 ins_pipe(pipe_slow); 1068 %} 1069 1070 instruct subD_reg(regD dst, regD src) %{ 1071 predicate((UseSSE>=2) && (UseAVX == 0)); 1072 match(Set dst (SubD dst src)); 1073 1074 format %{ "subsd $dst, $src" %} 1075 ins_cost(150); 1076 ins_encode %{ 1077 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1078 %} 1085 1086 format %{ "subsd $dst, $src" %} 1087 ins_cost(150); 1088 ins_encode %{ 1089 __ subsd($dst$$XMMRegister, $src$$Address); 1090 %} 1091 ins_pipe(pipe_slow); 1092 %} 1093 1094 instruct subD_imm(regD dst, immD con) %{ 1095 predicate((UseSSE>=2) && (UseAVX == 0)); 1096 match(Set dst (SubD dst con)); 1097 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1098 ins_cost(150); 1099 ins_encode %{ 1100 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1101 %} 1102 ins_pipe(pipe_slow); 1103 %} 1104 1105 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1106 predicate(UseAVX > 0); 1107 match(Set dst (SubD src1 src2)); 1108 1109 format %{ "vsubsd $dst, $src1, $src2" %} 1110 ins_cost(150); 1111 ins_encode %{ 1112 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1113 %} 1114 ins_pipe(pipe_slow); 1115 %} 1116 1117 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1118 predicate(UseAVX > 0); 1119 match(Set dst (SubD src1 (LoadD src2))); 1120 1121 format %{ "vsubsd $dst, $src1, $src2" %} 1122 ins_cost(150); 1123 ins_encode %{ 1124 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1125 %} 1126 ins_pipe(pipe_slow); 1127 %} 1128 1129 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1130 predicate(UseAVX > 0); 1131 match(Set dst (SubD src con)); 1132 1133 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1134 ins_cost(150); 1135 ins_encode %{ 1136 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1137 %} 1138 ins_pipe(pipe_slow); 1139 %} 1140 1141 instruct mulF_reg(regF dst, regF src) %{ 1142 predicate((UseSSE>=1) && (UseAVX == 0)); 1143 match(Set dst (MulF dst src)); 1144 1145 format %{ "mulss $dst, $src" %} 1146 ins_cost(150); 1147 ins_encode %{ 1148 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1149 %} 1156 1157 format %{ "mulss $dst, $src" %} 1158 ins_cost(150); 1159 ins_encode %{ 1160 __ mulss($dst$$XMMRegister, $src$$Address); 1161 %} 1162 ins_pipe(pipe_slow); 1163 %} 1164 1165 instruct mulF_imm(regF dst, immF con) %{ 1166 predicate((UseSSE>=1) && (UseAVX == 0)); 1167 match(Set dst (MulF dst con)); 1168 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1169 ins_cost(150); 1170 ins_encode %{ 1171 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1172 %} 1173 ins_pipe(pipe_slow); 1174 %} 1175 1176 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1177 predicate(UseAVX > 0); 1178 match(Set dst (MulF src1 src2)); 1179 1180 format %{ "vmulss $dst, $src1, $src2" %} 1181 ins_cost(150); 1182 ins_encode %{ 1183 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1184 %} 1185 ins_pipe(pipe_slow); 1186 %} 1187 1188 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1189 predicate(UseAVX > 0); 1190 match(Set dst (MulF src1 (LoadF src2))); 1191 1192 format %{ "vmulss $dst, $src1, $src2" %} 1193 ins_cost(150); 1194 ins_encode %{ 1195 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1196 %} 1197 ins_pipe(pipe_slow); 1198 %} 1199 1200 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1201 predicate(UseAVX > 0); 1202 match(Set dst (MulF src con)); 1203 1204 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1205 ins_cost(150); 1206 ins_encode %{ 1207 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1208 %} 1209 ins_pipe(pipe_slow); 1210 %} 1211 1212 instruct mulD_reg(regD dst, regD src) %{ 1213 predicate((UseSSE>=2) && (UseAVX == 0)); 1214 match(Set dst (MulD dst src)); 1215 1216 format %{ "mulsd $dst, $src" %} 1217 ins_cost(150); 1218 ins_encode %{ 1219 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1220 %} 1227 1228 format %{ "mulsd $dst, $src" %} 1229 ins_cost(150); 1230 ins_encode %{ 1231 __ mulsd($dst$$XMMRegister, $src$$Address); 1232 %} 1233 ins_pipe(pipe_slow); 1234 %} 1235 1236 instruct mulD_imm(regD dst, immD con) %{ 1237 predicate((UseSSE>=2) && (UseAVX == 0)); 1238 match(Set dst (MulD dst con)); 1239 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1240 ins_cost(150); 1241 ins_encode %{ 1242 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1243 %} 1244 ins_pipe(pipe_slow); 1245 %} 1246 1247 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1248 predicate(UseAVX > 0); 1249 match(Set dst (MulD src1 src2)); 1250 1251 format %{ "vmulsd $dst, $src1, $src2" %} 1252 ins_cost(150); 1253 ins_encode %{ 1254 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1255 %} 1256 ins_pipe(pipe_slow); 1257 %} 1258 1259 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1260 predicate(UseAVX > 0); 1261 match(Set dst (MulD src1 (LoadD src2))); 1262 1263 format %{ "vmulsd $dst, $src1, $src2" %} 1264 ins_cost(150); 1265 ins_encode %{ 1266 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1267 %} 1268 ins_pipe(pipe_slow); 1269 %} 1270 1271 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1272 predicate(UseAVX > 0); 1273 match(Set dst (MulD src con)); 1274 1275 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1276 ins_cost(150); 1277 ins_encode %{ 1278 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1279 %} 1280 ins_pipe(pipe_slow); 1281 %} 1282 1283 instruct divF_reg(regF dst, regF src) %{ 1284 predicate((UseSSE>=1) && (UseAVX == 0)); 1285 match(Set dst (DivF dst src)); 1286 1287 format %{ "divss $dst, $src" %} 1288 ins_cost(150); 1289 ins_encode %{ 1290 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1291 %} 1298 1299 format %{ "divss $dst, $src" %} 1300 ins_cost(150); 1301 ins_encode %{ 1302 __ divss($dst$$XMMRegister, $src$$Address); 1303 %} 1304 ins_pipe(pipe_slow); 1305 %} 1306 1307 instruct divF_imm(regF dst, immF con) %{ 1308 predicate((UseSSE>=1) && (UseAVX == 0)); 1309 match(Set dst (DivF dst con)); 1310 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1311 ins_cost(150); 1312 ins_encode %{ 1313 __ divss($dst$$XMMRegister, $constantaddress($con)); 1314 %} 1315 ins_pipe(pipe_slow); 1316 %} 1317 1318 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1319 predicate(UseAVX > 0); 1320 match(Set dst (DivF src1 src2)); 1321 1322 format %{ "vdivss $dst, $src1, $src2" %} 1323 ins_cost(150); 1324 ins_encode %{ 1325 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1326 %} 1327 ins_pipe(pipe_slow); 1328 %} 1329 1330 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1331 predicate(UseAVX > 0); 1332 match(Set dst (DivF src1 (LoadF src2))); 1333 1334 format %{ "vdivss $dst, $src1, $src2" %} 1335 ins_cost(150); 1336 ins_encode %{ 1337 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1338 %} 1339 ins_pipe(pipe_slow); 1340 %} 1341 1342 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1343 predicate(UseAVX > 0); 1344 match(Set dst (DivF src con)); 1345 1346 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1347 ins_cost(150); 1348 ins_encode %{ 1349 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1350 %} 1351 ins_pipe(pipe_slow); 1352 %} 1353 1354 instruct divD_reg(regD dst, regD src) %{ 1355 predicate((UseSSE>=2) && (UseAVX == 0)); 1356 match(Set dst (DivD dst src)); 1357 1358 format %{ "divsd $dst, $src" %} 1359 ins_cost(150); 1360 ins_encode %{ 1361 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1362 %} 1369 1370 format %{ "divsd $dst, $src" %} 1371 ins_cost(150); 1372 ins_encode %{ 1373 __ divsd($dst$$XMMRegister, $src$$Address); 1374 %} 1375 ins_pipe(pipe_slow); 1376 %} 1377 1378 instruct divD_imm(regD dst, immD con) %{ 1379 predicate((UseSSE>=2) && (UseAVX == 0)); 1380 match(Set dst (DivD dst con)); 1381 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1382 ins_cost(150); 1383 ins_encode %{ 1384 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1385 %} 1386 ins_pipe(pipe_slow); 1387 %} 1388 1389 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1390 predicate(UseAVX > 0); 1391 match(Set dst (DivD src1 src2)); 1392 1393 format %{ "vdivsd $dst, $src1, $src2" %} 1394 ins_cost(150); 1395 ins_encode %{ 1396 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1397 %} 1398 ins_pipe(pipe_slow); 1399 %} 1400 1401 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1402 predicate(UseAVX > 0); 1403 match(Set dst (DivD src1 (LoadD src2))); 1404 1405 format %{ "vdivsd $dst, $src1, $src2" %} 1406 ins_cost(150); 1407 ins_encode %{ 1408 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1409 %} 1410 ins_pipe(pipe_slow); 1411 %} 1412 1413 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1414 predicate(UseAVX > 0); 1415 match(Set dst (DivD src con)); 1416 1417 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1418 ins_cost(150); 1419 ins_encode %{ 1420 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1421 %} 1422 ins_pipe(pipe_slow); 1423 %} 1424 1425 instruct absF_reg(regF dst) %{ 1426 predicate((UseSSE>=1) && (UseAVX == 0)); 1427 match(Set dst (AbsF dst)); 1428 ins_cost(150); 1429 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1430 ins_encode %{ 1431 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1432 %} 1433 ins_pipe(pipe_slow); 1434 %} 1435 1436 instruct absF_reg_reg(regF dst, regF src) %{ 1437 predicate(UseAVX > 0); 1438 match(Set dst (AbsF src)); 1439 ins_cost(150); 1440 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1441 ins_encode %{ 1442 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1443 ExternalAddress(float_signmask())); 1444 %} 1445 ins_pipe(pipe_slow); 1446 %} 1447 1448 instruct absD_reg(regD dst) %{ 1449 predicate((UseSSE>=2) && (UseAVX == 0)); 1450 match(Set dst (AbsD dst)); 1451 ins_cost(150); 1452 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1453 "# abs double by sign masking" %} 1454 ins_encode %{ 1455 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1456 %} 1457 ins_pipe(pipe_slow); 1458 %} 1459 1460 instruct absD_reg_reg(regD dst, regD src) %{ 1461 predicate(UseAVX > 0); 1462 match(Set dst (AbsD src)); 1463 ins_cost(150); 1464 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1465 "# abs double by sign masking" %} 1466 ins_encode %{ 1467 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1468 ExternalAddress(double_signmask())); 1469 %} 1470 ins_pipe(pipe_slow); 1471 %} 1472 1473 instruct negF_reg(regF dst) %{ 1474 predicate((UseSSE>=1) && (UseAVX == 0)); 1475 match(Set dst (NegF dst)); 1476 ins_cost(150); 1477 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1478 ins_encode %{ 1479 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1480 %} 1481 ins_pipe(pipe_slow); 1482 %} 1483 1484 instruct negF_reg_reg(regF dst, regF src) %{ 1485 predicate(UseAVX > 0); 1486 match(Set dst (NegF src)); 1487 ins_cost(150); 1488 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1489 ins_encode %{ 1490 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1491 ExternalAddress(float_signflip())); 1492 %} 1493 ins_pipe(pipe_slow); 1494 %} 1495 1496 instruct negD_reg(regD dst) %{ 1497 predicate((UseSSE>=2) && (UseAVX == 0)); 1498 match(Set dst (NegD dst)); 1499 ins_cost(150); 1500 format %{ "xorpd $dst, [0x8000000000000000]\t" 1501 "# neg double by sign flipping" %} 1502 ins_encode %{ 1503 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1504 %} 1505 ins_pipe(pipe_slow); 1506 %} 1507 1508 instruct negD_reg_reg(regD dst, regD src) %{ 1509 predicate(UseAVX > 0); 1510 match(Set dst (NegD src)); 1511 ins_cost(150); 1512 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1513 "# neg double by sign flipping" %} 1514 ins_encode %{ 1515 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1516 ExternalAddress(double_signflip())); 1517 %} 1518 ins_pipe(pipe_slow); 1519 %} 1520 1521 instruct sqrtF_reg(regF dst, regF src) %{ 1522 predicate(UseSSE>=1); 1523 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1524 1525 format %{ "sqrtss $dst, $src" %} 1526 ins_cost(150); 1527 ins_encode %{ 1528 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1702 instruct Repl8B(vecD dst, rRegI src) %{ 1703 predicate(n->as_Vector()->length() == 8); 1704 match(Set dst (ReplicateB src)); 1705 format %{ "movd $dst,$src\n\t" 1706 "punpcklbw $dst,$dst\n\t" 1707 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1708 ins_encode %{ 1709 __ movdl($dst$$XMMRegister, $src$$Register); 1710 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1711 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1712 %} 1713 ins_pipe( pipe_slow ); 1714 %} 1715 1716 instruct Repl16B(vecX dst, rRegI src) %{ 1717 predicate(n->as_Vector()->length() == 16); 1718 match(Set dst (ReplicateB src)); 1719 format %{ "movd $dst,$src\n\t" 1720 "punpcklbw $dst,$dst\n\t" 1721 "pshuflw $dst,$dst,0x00\n\t" 1722 "punpcklqdq $dst,$dst\t! replicate16B" %} 1723 ins_encode %{ 1724 __ movdl($dst$$XMMRegister, $src$$Register); 1725 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1726 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1727 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1728 %} 1729 ins_pipe( pipe_slow ); 1730 %} 1731 1732 instruct Repl32B(vecY dst, rRegI src) %{ 1733 predicate(n->as_Vector()->length() == 32); 1734 match(Set dst (ReplicateB src)); 1735 format %{ "movd $dst,$src\n\t" 1736 "punpcklbw $dst,$dst\n\t" 1737 "pshuflw $dst,$dst,0x00\n\t" 1738 "punpcklqdq $dst,$dst\n\t" 1739 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1740 ins_encode %{ 1741 __ movdl($dst$$XMMRegister, $src$$Register); 1742 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1743 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1744 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1745 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1746 %} 1747 ins_pipe( pipe_slow ); 1748 %} 1749 1750 // Replicate byte scalar immediate to be vector by loading from const table. 1751 instruct Repl4B_imm(vecS dst, immI con) %{ 1752 predicate(n->as_Vector()->length() == 4); 1753 match(Set dst (ReplicateB con)); 1754 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1755 ins_encode %{ 1756 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1757 %} 1758 ins_pipe( pipe_slow ); 1759 %} 1760 1761 instruct Repl8B_imm(vecD dst, immI con) %{ 1762 predicate(n->as_Vector()->length() == 8); 1763 match(Set dst (ReplicateB con)); 1764 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1765 ins_encode %{ 1766 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1767 %} 1768 ins_pipe( pipe_slow ); 1769 %} 1770 1771 instruct Repl16B_imm(vecX dst, immI con) %{ 1772 predicate(n->as_Vector()->length() == 16); 1773 match(Set dst (ReplicateB con)); 1774 format %{ "movq $dst,[$constantaddress]\n\t" 1775 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1776 ins_encode %{ 1777 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1778 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1779 %} 1780 ins_pipe( pipe_slow ); 1781 %} 1782 1783 instruct Repl32B_imm(vecY dst, immI con) %{ 1784 predicate(n->as_Vector()->length() == 32); 1785 match(Set dst (ReplicateB con)); 1786 format %{ "movq $dst,[$constantaddress]\n\t" 1787 "punpcklqdq $dst,$dst\n\t" 1788 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1789 ins_encode %{ 1790 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1791 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1792 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1793 %} 1794 ins_pipe( pipe_slow ); 1795 %} 1796 1797 // Replicate byte scalar zero to be vector 1798 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1799 predicate(n->as_Vector()->length() == 4); 1800 match(Set dst (ReplicateB zero)); 1801 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1802 ins_encode %{ 1803 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1804 %} 1805 ins_pipe( fpu_reg_reg ); 1806 %} 1807 1808 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1809 predicate(n->as_Vector()->length() == 8); 1810 match(Set dst (ReplicateB zero)); 1811 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1812 ins_encode %{ 1813 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1814 %} 1815 ins_pipe( fpu_reg_reg ); 1816 %} 1817 1818 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1819 predicate(n->as_Vector()->length() == 16); 1820 match(Set dst (ReplicateB zero)); 1821 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1822 ins_encode %{ 1823 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1824 %} 1825 ins_pipe( fpu_reg_reg ); 1826 %} 1827 1828 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1829 predicate(n->as_Vector()->length() == 32); 1830 match(Set dst (ReplicateB zero)); 1831 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1832 ins_encode %{ 1833 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1834 bool vector256 = true; 1835 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1836 %} 1837 ins_pipe( fpu_reg_reg ); 1838 %} 1839 1840 // Replicate char/short (2 byte) scalar to be vector 1841 instruct Repl2S(vecS dst, rRegI src) %{ 1842 predicate(n->as_Vector()->length() == 2); 1843 match(Set dst (ReplicateS src)); 1844 format %{ "movd $dst,$src\n\t" 1845 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1846 ins_encode %{ 1847 __ movdl($dst$$XMMRegister, $src$$Register); 1848 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1849 %} 1850 ins_pipe( fpu_reg_reg ); 1851 %} 1852 1853 instruct Repl4S(vecD dst, rRegI src) %{ 1854 predicate(n->as_Vector()->length() == 4); 1855 match(Set dst (ReplicateS src)); 1856 format %{ "movd $dst,$src\n\t" 1857 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1858 ins_encode %{ 1859 __ movdl($dst$$XMMRegister, $src$$Register); 1860 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1861 %} 1862 ins_pipe( fpu_reg_reg ); 1863 %} 1864 1865 instruct Repl8S(vecX dst, rRegI src) %{ 1866 predicate(n->as_Vector()->length() == 8); 1867 match(Set dst (ReplicateS src)); 1868 format %{ "movd $dst,$src\n\t" 1869 "pshuflw $dst,$dst,0x00\n\t" 1870 "punpcklqdq $dst,$dst\t! replicate8S" %} 1871 ins_encode %{ 1872 __ movdl($dst$$XMMRegister, $src$$Register); 1873 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1874 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1875 %} 1876 ins_pipe( pipe_slow ); 1877 %} 1878 1879 instruct Repl16S(vecY dst, rRegI src) %{ 1880 predicate(n->as_Vector()->length() == 16); 1881 match(Set dst (ReplicateS src)); 1882 format %{ "movd $dst,$src\n\t" 1883 "pshuflw $dst,$dst,0x00\n\t" 1884 "punpcklqdq $dst,$dst\n\t" 1885 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 1886 ins_encode %{ 1887 __ movdl($dst$$XMMRegister, $src$$Register); 1888 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1889 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1890 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1891 %} 1892 ins_pipe( pipe_slow ); 1893 %} 1894 1895 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 1896 instruct Repl2S_imm(vecS dst, immI con) %{ 1897 predicate(n->as_Vector()->length() == 2); 1898 match(Set dst (ReplicateS con)); 1899 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 1900 ins_encode %{ 1901 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 1902 %} 1903 ins_pipe( fpu_reg_reg ); 1904 %} 1905 1906 instruct Repl4S_imm(vecD dst, immI con) %{ 1907 predicate(n->as_Vector()->length() == 4); 1908 match(Set dst (ReplicateS con)); 1909 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 1910 ins_encode %{ 1911 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1912 %} 1913 ins_pipe( fpu_reg_reg ); 1914 %} 1915 1916 instruct Repl8S_imm(vecX dst, immI con) %{ 1917 predicate(n->as_Vector()->length() == 8); 1918 match(Set dst (ReplicateS con)); 1919 format %{ "movq $dst,[$constantaddress]\n\t" 1920 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 1921 ins_encode %{ 1922 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1923 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1924 %} 1925 ins_pipe( pipe_slow ); 1926 %} 1927 1928 instruct Repl16S_imm(vecY dst, immI con) %{ 1929 predicate(n->as_Vector()->length() == 16); 1930 match(Set dst (ReplicateS con)); 1931 format %{ "movq $dst,[$constantaddress]\n\t" 1932 "punpcklqdq $dst,$dst\n\t" 1933 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 1934 ins_encode %{ 1935 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1936 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1937 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1938 %} 1939 ins_pipe( pipe_slow ); 1940 %} 1941 1942 // Replicate char/short (2 byte) scalar zero to be vector 1943 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 1944 predicate(n->as_Vector()->length() == 2); 1945 match(Set dst (ReplicateS zero)); 1946 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 1947 ins_encode %{ 1948 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1949 %} 1950 ins_pipe( fpu_reg_reg ); 1951 %} 1952 1953 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 1954 predicate(n->as_Vector()->length() == 4); 1955 match(Set dst (ReplicateS zero)); 1956 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 1957 ins_encode %{ 1958 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1959 %} 1960 ins_pipe( fpu_reg_reg ); 1961 %} 1962 1963 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 1964 predicate(n->as_Vector()->length() == 8); 1965 match(Set dst (ReplicateS zero)); 1966 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 1967 ins_encode %{ 1968 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1969 %} 1970 ins_pipe( fpu_reg_reg ); 1971 %} 1972 1973 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 1974 predicate(n->as_Vector()->length() == 16); 1975 match(Set dst (ReplicateS zero)); 1976 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 1977 ins_encode %{ 1978 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1979 bool vector256 = true; 1980 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1981 %} 1982 ins_pipe( fpu_reg_reg ); 1983 %} 1984 1985 // Replicate integer (4 byte) scalar to be vector 1986 instruct Repl2I(vecD dst, rRegI src) %{ 1987 predicate(n->as_Vector()->length() == 2); 1988 match(Set dst (ReplicateI src)); 1989 format %{ "movd $dst,$src\n\t" 1990 "pshufd $dst,$dst,0x00\t! replicate2I" %} 1991 ins_encode %{ 1992 __ movdl($dst$$XMMRegister, $src$$Register); 1993 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1994 %} 1995 ins_pipe( fpu_reg_reg ); 1996 %} 1997 1998 instruct Repl4I(vecX dst, rRegI src) %{ 1999 predicate(n->as_Vector()->length() == 4); 2000 match(Set dst (ReplicateI src)); 2001 format %{ "movd $dst,$src\n\t" 2002 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2003 ins_encode %{ 2004 __ movdl($dst$$XMMRegister, $src$$Register); 2005 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2006 %} 2007 ins_pipe( pipe_slow ); 2008 %} 2009 2010 instruct Repl8I(vecY dst, rRegI src) %{ 2011 predicate(n->as_Vector()->length() == 8); 2012 match(Set dst (ReplicateI src)); 2013 format %{ "movd $dst,$src\n\t" 2014 "pshufd $dst,$dst,0x00\n\t" 2015 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2016 ins_encode %{ 2017 __ movdl($dst$$XMMRegister, $src$$Register); 2018 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2019 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2020 %} 2021 ins_pipe( pipe_slow ); 2022 %} 2023 2024 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2025 instruct Repl2I_imm(vecD dst, immI con) %{ 2026 predicate(n->as_Vector()->length() == 2); 2027 match(Set dst (ReplicateI con)); 2028 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2029 ins_encode %{ 2030 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2031 %} 2032 ins_pipe( fpu_reg_reg ); 2033 %} 2034 2035 instruct Repl4I_imm(vecX dst, immI con) %{ 2036 predicate(n->as_Vector()->length() == 4); 2037 match(Set dst (ReplicateI con)); 2038 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2039 "punpcklqdq $dst,$dst" %} 2040 ins_encode %{ 2041 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2042 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2043 %} 2044 ins_pipe( pipe_slow ); 2045 %} 2046 2047 instruct Repl8I_imm(vecY dst, immI con) %{ 2048 predicate(n->as_Vector()->length() == 8); 2049 match(Set dst (ReplicateI con)); 2050 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2051 "punpcklqdq $dst,$dst\n\t" 2052 "vinserti128h $dst,$dst,$dst" %} 2053 ins_encode %{ 2054 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2055 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2056 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2057 %} 2058 ins_pipe( pipe_slow ); 2059 %} 2060 2061 // Integer could be loaded into xmm register directly from memory. 2062 instruct Repl2I_mem(vecD dst, memory mem) %{ 2063 predicate(n->as_Vector()->length() == 2); 2064 match(Set dst (ReplicateI (LoadI mem))); 2065 format %{ "movd $dst,$mem\n\t" 2066 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2067 ins_encode %{ 2068 __ movdl($dst$$XMMRegister, $mem$$Address); 2069 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2070 %} 2071 ins_pipe( fpu_reg_reg ); 2072 %} 2073 2074 instruct Repl4I_mem(vecX dst, memory mem) %{ 2075 predicate(n->as_Vector()->length() == 4); 2076 match(Set dst (ReplicateI (LoadI mem))); 2077 format %{ "movd $dst,$mem\n\t" 2078 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2079 ins_encode %{ 2080 __ movdl($dst$$XMMRegister, $mem$$Address); 2081 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2082 %} 2083 ins_pipe( pipe_slow ); 2084 %} 2085 2086 instruct Repl8I_mem(vecY dst, memory mem) %{ 2087 predicate(n->as_Vector()->length() == 8); 2088 match(Set dst (ReplicateI (LoadI mem))); 2089 format %{ "movd $dst,$mem\n\t" 2090 "pshufd $dst,$dst,0x00\n\t" 2091 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2092 ins_encode %{ 2093 __ movdl($dst$$XMMRegister, $mem$$Address); 2094 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2095 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2096 %} 2097 ins_pipe( pipe_slow ); 2098 %} 2099 2100 // Replicate integer (4 byte) scalar zero to be vector 2101 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2102 predicate(n->as_Vector()->length() == 2); 2103 match(Set dst (ReplicateI zero)); 2104 format %{ "pxor $dst,$dst\t! replicate2I" %} 2105 ins_encode %{ 2106 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2107 %} 2108 ins_pipe( fpu_reg_reg ); 2109 %} 2110 2111 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2112 predicate(n->as_Vector()->length() == 4); 2113 match(Set dst (ReplicateI zero)); 2114 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2115 ins_encode %{ 2116 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2117 %} 2118 ins_pipe( fpu_reg_reg ); 2119 %} 2120 2121 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2122 predicate(n->as_Vector()->length() == 8); 2123 match(Set dst (ReplicateI zero)); 2124 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2125 ins_encode %{ 2126 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2127 bool vector256 = true; 2128 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2129 %} 2130 ins_pipe( fpu_reg_reg ); 2131 %} 2132 2133 // Replicate long (8 byte) scalar to be vector 2134 #ifdef _LP64 2135 instruct Repl2L(vecX dst, rRegL src) %{ 2136 predicate(n->as_Vector()->length() == 2); 2137 match(Set dst (ReplicateL src)); 2138 format %{ "movdq $dst,$src\n\t" 2139 "punpcklqdq $dst,$dst\t! replicate2L" %} 2140 ins_encode %{ 2141 __ movdq($dst$$XMMRegister, $src$$Register); 2142 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2143 %} 2144 ins_pipe( pipe_slow ); 2145 %} 2146 2147 instruct Repl4L(vecY dst, rRegL src) %{ 2148 predicate(n->as_Vector()->length() == 4); 2149 match(Set dst (ReplicateL src)); 2150 format %{ "movdq $dst,$src\n\t" 2151 "punpcklqdq $dst,$dst\n\t" 2152 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2153 ins_encode %{ 2154 __ movdq($dst$$XMMRegister, $src$$Register); 2155 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2156 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2157 %} 2158 ins_pipe( pipe_slow ); 2159 %} 2160 #else // _LP64 2161 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2162 predicate(n->as_Vector()->length() == 2); 2163 match(Set dst (ReplicateL src)); 2164 effect(TEMP dst, USE src, TEMP tmp); 2165 format %{ "movdl $dst,$src.lo\n\t" 2166 "movdl $tmp,$src.hi\n\t" 2167 "punpckldq $dst,$tmp\n\t" 2168 "punpcklqdq $dst,$dst\t! replicate2L"%} 2169 ins_encode %{ 2170 __ movdl($dst$$XMMRegister, $src$$Register); 2171 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2172 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2173 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2174 %} 2175 ins_pipe( pipe_slow ); 2176 %} 2177 2178 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2179 predicate(n->as_Vector()->length() == 4); 2180 match(Set dst (ReplicateL src)); 2181 effect(TEMP dst, USE src, TEMP tmp); 2182 format %{ "movdl $dst,$src.lo\n\t" 2183 "movdl $tmp,$src.hi\n\t" 2184 "punpckldq $dst,$tmp\n\t" 2185 "punpcklqdq $dst,$dst\n\t" 2186 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2187 ins_encode %{ 2188 __ movdl($dst$$XMMRegister, $src$$Register); 2189 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2190 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2191 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2192 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2193 %} 2194 ins_pipe( pipe_slow ); 2195 %} 2196 #endif // _LP64 2197 2198 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2199 instruct Repl2L_imm(vecX dst, immL con) %{ 2200 predicate(n->as_Vector()->length() == 2); 2201 match(Set dst (ReplicateL con)); 2202 format %{ "movq $dst,[$constantaddress]\n\t" 2203 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2204 ins_encode %{ 2205 __ movq($dst$$XMMRegister, $constantaddress($con)); 2206 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2207 %} 2208 ins_pipe( pipe_slow ); 2209 %} 2210 2211 instruct Repl4L_imm(vecY dst, immL con) %{ 2212 predicate(n->as_Vector()->length() == 4); 2213 match(Set dst (ReplicateL con)); 2214 format %{ "movq $dst,[$constantaddress]\n\t" 2215 "punpcklqdq $dst,$dst\n\t" 2216 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2217 ins_encode %{ 2218 __ movq($dst$$XMMRegister, $constantaddress($con)); 2219 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2220 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2221 %} 2222 ins_pipe( pipe_slow ); 2223 %} 2224 2225 // Long could be loaded into xmm register directly from memory. 2226 instruct Repl2L_mem(vecX dst, memory mem) %{ 2227 predicate(n->as_Vector()->length() == 2); 2228 match(Set dst (ReplicateL (LoadL mem))); 2229 format %{ "movq $dst,$mem\n\t" 2230 "punpcklqdq $dst,$dst\t! replicate2L" %} 2231 ins_encode %{ 2232 __ movq($dst$$XMMRegister, $mem$$Address); 2233 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2234 %} 2235 ins_pipe( pipe_slow ); 2236 %} 2237 2238 instruct Repl4L_mem(vecY dst, memory mem) %{ 2239 predicate(n->as_Vector()->length() == 4); 2240 match(Set dst (ReplicateL (LoadL mem))); 2241 format %{ "movq $dst,$mem\n\t" 2242 "punpcklqdq $dst,$dst\n\t" 2243 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2244 ins_encode %{ 2245 __ movq($dst$$XMMRegister, $mem$$Address); 2246 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2247 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2248 %} 2249 ins_pipe( pipe_slow ); 2250 %} 2251 2252 // Replicate long (8 byte) scalar zero to be vector 2253 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2254 predicate(n->as_Vector()->length() == 2); 2255 match(Set dst (ReplicateL zero)); 2256 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2257 ins_encode %{ 2258 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2259 %} 2260 ins_pipe( fpu_reg_reg ); 2261 %} 2262 2263 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2264 predicate(n->as_Vector()->length() == 4); 2265 match(Set dst (ReplicateL zero)); 2266 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2267 ins_encode %{ 2268 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2269 bool vector256 = true; 2270 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2271 %} 2272 ins_pipe( fpu_reg_reg ); 2273 %} 2274 2275 // Replicate float (4 byte) scalar to be vector 2276 instruct Repl2F(vecD dst, regF src) %{ 2277 predicate(n->as_Vector()->length() == 2); 2278 match(Set dst (ReplicateF src)); 2279 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2280 ins_encode %{ 2281 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2282 %} 2283 ins_pipe( fpu_reg_reg ); 2284 %} 2285 2286 instruct Repl4F(vecX dst, regF src) %{ 2287 predicate(n->as_Vector()->length() == 4); 2288 match(Set dst (ReplicateF src)); 2289 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2290 ins_encode %{ |