1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 1376 1377 const bool Matcher::match_rule_supported(int opcode) { 1378 if (!has_match_rule(opcode)) 1379 return false; 1380 1381 bool ret_value = true; 1382 switch (opcode) { 1383 case Op_PopCountI: 1384 case Op_PopCountL: 1385 if (!UsePopCountInstruction) 1386 ret_value = false; 1387 break; 1388 case Op_PopCountVI: 1389 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1390 ret_value = false; 1391 break; 1392 case Op_MulVI: 1393 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1394 ret_value = false; 1395 break; 1396 case Op_MulVL: 1397 case Op_MulReductionVL: 1398 if (VM_Version::supports_avx512dq() == false) 1399 ret_value = false; 1400 break; 1401 case Op_AddReductionVL: 1402 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1403 ret_value = false; 1404 break; 1405 case Op_AddReductionVI: 1406 if (UseSSE < 3) // requires at least SSE3 1407 ret_value = false; 1408 break; 1409 case Op_MulReductionVI: 1410 if (UseSSE < 4) // requires at least SSE4 1411 ret_value = false; 1412 break; 1413 case Op_AddReductionVF: 1414 case Op_AddReductionVD: 1415 case Op_MulReductionVF: 1416 case Op_MulReductionVD: 1417 if (UseSSE < 1) // requires at least SSE 1418 ret_value = false; 1419 break; 1420 case Op_SqrtVD: 1421 case Op_SqrtVF: 1422 if (UseAVX < 1) // enabled for AVX only 1423 ret_value = false; 1424 break; 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 #ifdef _LP64 1454 case Op_MaxD: 1455 case Op_MaxF: 1456 case Op_MinD: 1457 case Op_MinF: 1458 if (UseAVX < 1) // enabled for AVX only 1459 ret_value = false; 1460 break; 1461 #endif 1462 } 1463 1464 return ret_value; // Per default match rules are supported. 1465 } 1466 1467 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1468 // identify extra cases that we might want to provide match rules for 1469 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1470 bool ret_value = match_rule_supported(opcode); 1471 if (ret_value) { 1472 switch (opcode) { 1473 case Op_AddVB: 1474 case Op_SubVB: 1475 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1476 ret_value = false; 1477 break; 1478 case Op_URShiftVS: 1479 case Op_RShiftVS: 1480 case Op_LShiftVS: 1481 case Op_MulVS: 1482 case Op_AddVS: 1483 case Op_SubVS: 1484 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1485 ret_value = false; 1486 break; 1487 case Op_CMoveVF: 1488 if (vlen != 8) 1489 ret_value = false; 1490 break; 1491 case Op_CMoveVD: 1492 if (vlen != 4) 1493 ret_value = false; 1494 break; 1495 } 1496 } 1497 1498 return ret_value; // Per default match rules are supported. 1499 } 1500 1501 const bool Matcher::has_predicated_vectors(void) { 1502 bool ret_value = false; 1503 if (UseAVX > 2) { 1504 ret_value = VM_Version::supports_avx512vl(); 1505 } 1506 1507 return ret_value; 1508 } 1509 1510 const int Matcher::float_pressure(int default_pressure_threshold) { 7285 ins_encode %{ 7286 int vector_len = 2; 7287 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7288 %} 7289 ins_pipe( pipe_slow ); 7290 %} 7291 7292 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7293 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7294 match(Set dst (SubVD src (LoadVector mem))); 7295 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7296 ins_encode %{ 7297 int vector_len = 2; 7298 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7299 %} 7300 ins_pipe( pipe_slow ); 7301 %} 7302 7303 // --------------------------------- MUL -------------------------------------- 7304 7305 // Shorts/Chars vector mul 7306 instruct vmul2S(vecS dst, vecS src) %{ 7307 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7308 match(Set dst (MulVS dst src)); 7309 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7310 ins_encode %{ 7311 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7312 %} 7313 ins_pipe( pipe_slow ); 7314 %} 7315 7316 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7317 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7318 match(Set dst (MulVS src1 src2)); 7319 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7320 ins_encode %{ 7321 int vector_len = 0; 7322 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7323 %} 7324 ins_pipe( pipe_slow ); 8007 match(Set dst (DivVD src1 src2)); 8008 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8009 ins_encode %{ 8010 int vector_len = 2; 8011 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8012 %} 8013 ins_pipe( pipe_slow ); 8014 %} 8015 8016 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8017 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8018 match(Set dst (DivVD src (LoadVector mem))); 8019 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8020 ins_encode %{ 8021 int vector_len = 2; 8022 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 // ------------------------------ Shift --------------------------------------- 8028 8029 // Left and right shift count vectors are the same on x86 8030 // (only lowest bits of xmm reg are used for count). 8031 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8032 match(Set dst (LShiftCntV cnt)); 8033 match(Set dst (RShiftCntV cnt)); 8034 format %{ "movd $dst,$cnt\t! load shift count" %} 8035 ins_encode %{ 8036 __ movdl($dst$$XMMRegister, $cnt$$Register); 8037 %} 8038 ins_pipe( pipe_slow ); 8039 %} 8040 8041 // --------------------------------- Sqrt -------------------------------------- 8042 8043 // Floating point vector sqrt 8044 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8045 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8046 match(Set dst (SqrtVD src)); 8047 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8048 ins_encode %{ 8049 int vector_len = 0; 8050 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8051 %} 8052 ins_pipe( pipe_slow ); 8053 %} 8054 8055 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8056 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8057 match(Set dst (SqrtVD (LoadVector mem))); 8058 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8059 ins_encode %{ 8060 int vector_len = 0; 8178 match(Set dst (SqrtVF src)); 8179 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8180 ins_encode %{ 8181 int vector_len = 2; 8182 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8183 %} 8184 ins_pipe( pipe_slow ); 8185 %} 8186 8187 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8188 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8189 match(Set dst (SqrtVF (LoadVector mem))); 8190 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8191 ins_encode %{ 8192 int vector_len = 2; 8193 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8194 %} 8195 ins_pipe( pipe_slow ); 8196 %} 8197 8198 // ------------------------------ LeftShift ----------------------------------- 8199 8200 // Shorts/Chars vector left shift 8201 instruct vsll2S(vecS dst, vecS shift) %{ 8202 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8203 match(Set dst (LShiftVS dst shift)); 8204 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8205 ins_encode %{ 8206 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8207 %} 8208 ins_pipe( pipe_slow ); 8209 %} 8210 8211 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8212 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8213 match(Set dst (LShiftVS dst shift)); 8214 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8215 ins_encode %{ 8216 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 8222 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8223 match(Set dst (LShiftVS src shift)); 8224 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8225 ins_encode %{ 8226 int vector_len = 0; 8227 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8234 match(Set dst (LShiftVS src shift)); 8235 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8236 ins_encode %{ 8237 int vector_len = 0; 8238 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vsll4S(vecD dst, vecS shift) %{ 8244 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8245 match(Set dst (LShiftVS dst shift)); 8246 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8247 ins_encode %{ 8248 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8249 %} 8250 ins_pipe( pipe_slow ); 8251 %} 8252 8253 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8254 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8255 match(Set dst (LShiftVS dst shift)); 8256 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8257 ins_encode %{ 8258 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8259 %} 8260 ins_pipe( pipe_slow ); 8261 %} 8262 8263 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 8264 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8265 match(Set dst (LShiftVS src shift)); 8266 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8267 ins_encode %{ 8268 int vector_len = 0; 8269 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8270 %} 8271 ins_pipe( pipe_slow ); 8272 %} 8273 8274 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8275 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8276 match(Set dst (LShiftVS src shift)); 8277 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8278 ins_encode %{ 8279 int vector_len = 0; 8280 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8281 %} 8282 ins_pipe( pipe_slow ); 8283 %} 8284 8285 instruct vsll8S(vecX dst, vecS shift) %{ 8286 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8287 match(Set dst (LShiftVS dst shift)); 8288 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8289 ins_encode %{ 8290 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8291 %} 8292 ins_pipe( pipe_slow ); 8293 %} 8294 8295 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8296 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8297 match(Set dst (LShiftVS dst shift)); 8298 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8299 ins_encode %{ 8300 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8301 %} 8302 ins_pipe( pipe_slow ); 8303 %} 8304 8305 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 8306 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8307 match(Set dst (LShiftVS src shift)); 8308 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8309 ins_encode %{ 8310 int vector_len = 0; 8311 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8312 %} 8313 ins_pipe( pipe_slow ); 8314 %} 8315 8316 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8317 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8318 match(Set dst (LShiftVS src shift)); 8319 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8320 ins_encode %{ 8321 int vector_len = 0; 8322 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8323 %} 8324 ins_pipe( pipe_slow ); 8325 %} 8326 8327 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 8328 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8329 match(Set dst (LShiftVS src shift)); 8330 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8331 ins_encode %{ 8332 int vector_len = 1; 8333 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8334 %} 8335 ins_pipe( pipe_slow ); 8336 %} 8337 8338 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8339 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8340 match(Set dst (LShiftVS src shift)); 8341 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8342 ins_encode %{ 8343 int vector_len = 1; 8344 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8345 %} 8346 ins_pipe( pipe_slow ); 8347 %} 8348 8349 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8350 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8351 match(Set dst (LShiftVS src shift)); 8352 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8353 ins_encode %{ 8354 int vector_len = 2; 8355 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8356 %} 8357 ins_pipe( pipe_slow ); 8358 %} 8359 8360 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8361 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8362 match(Set dst (LShiftVS src shift)); 8363 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8364 ins_encode %{ 8365 int vector_len = 2; 8366 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8367 %} 8368 ins_pipe( pipe_slow ); 8369 %} 8370 8371 // Integers vector left shift 8372 instruct vsll2I(vecD dst, vecS shift) %{ 8373 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8374 match(Set dst (LShiftVI dst shift)); 8375 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8376 ins_encode %{ 8377 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8378 %} 8379 ins_pipe( pipe_slow ); 8380 %} 8381 8382 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8383 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8384 match(Set dst (LShiftVI dst shift)); 8385 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8386 ins_encode %{ 8387 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8388 %} 8389 ins_pipe( pipe_slow ); 8390 %} 8391 8392 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8394 match(Set dst (LShiftVI src shift)); 8395 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8396 ins_encode %{ 8397 int vector_len = 0; 8398 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8399 %} 8400 ins_pipe( pipe_slow ); 8401 %} 8402 8403 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8405 match(Set dst (LShiftVI src shift)); 8406 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8407 ins_encode %{ 8408 int vector_len = 0; 8409 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8410 %} 8411 ins_pipe( pipe_slow ); 8412 %} 8413 8414 instruct vsll4I(vecX dst, vecS shift) %{ 8415 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8416 match(Set dst (LShiftVI dst shift)); 8417 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8418 ins_encode %{ 8419 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8420 %} 8421 ins_pipe( pipe_slow ); 8422 %} 8423 8424 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8425 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8426 match(Set dst (LShiftVI dst shift)); 8427 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8428 ins_encode %{ 8429 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8430 %} 8431 ins_pipe( pipe_slow ); 8432 %} 8433 8434 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8435 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8436 match(Set dst (LShiftVI src shift)); 8437 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8438 ins_encode %{ 8439 int vector_len = 0; 8440 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8441 %} 8442 ins_pipe( pipe_slow ); 8443 %} 8444 8445 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8446 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8447 match(Set dst (LShiftVI src shift)); 8448 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8449 ins_encode %{ 8450 int vector_len = 0; 8451 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8452 %} 8453 ins_pipe( pipe_slow ); 8454 %} 8455 8456 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8457 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8458 match(Set dst (LShiftVI src shift)); 8459 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8460 ins_encode %{ 8461 int vector_len = 1; 8462 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8463 %} 8464 ins_pipe( pipe_slow ); 8465 %} 8466 8467 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8468 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8469 match(Set dst (LShiftVI src shift)); 8470 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8471 ins_encode %{ 8472 int vector_len = 1; 8473 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8474 %} 8475 ins_pipe( pipe_slow ); 8476 %} 8477 8478 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8479 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8480 match(Set dst (LShiftVI src shift)); 8481 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8482 ins_encode %{ 8483 int vector_len = 2; 8484 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8485 %} 8486 ins_pipe( pipe_slow ); 8487 %} 8488 8489 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8490 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8491 match(Set dst (LShiftVI src shift)); 8492 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8493 ins_encode %{ 8494 int vector_len = 2; 8495 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8496 %} 8497 ins_pipe( pipe_slow ); 8498 %} 8499 8500 // Longs vector left shift 8501 instruct vsll2L(vecX dst, vecS shift) %{ 8502 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8503 match(Set dst (LShiftVL dst shift)); 8504 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8505 ins_encode %{ 8506 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8507 %} 8508 ins_pipe( pipe_slow ); 8509 %} 8510 8511 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8512 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8513 match(Set dst (LShiftVL dst shift)); 8514 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8515 ins_encode %{ 8516 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8517 %} 8518 ins_pipe( pipe_slow ); 8519 %} 8520 8521 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8523 match(Set dst (LShiftVL src shift)); 8524 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8525 ins_encode %{ 8526 int vector_len = 0; 8527 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8528 %} 8529 ins_pipe( pipe_slow ); 8530 %} 8531 8532 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8533 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8534 match(Set dst (LShiftVL src shift)); 8535 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8536 ins_encode %{ 8537 int vector_len = 0; 8538 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8539 %} 8540 ins_pipe( pipe_slow ); 8541 %} 8542 8543 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8544 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8545 match(Set dst (LShiftVL src shift)); 8546 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8547 ins_encode %{ 8548 int vector_len = 1; 8549 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8550 %} 8551 ins_pipe( pipe_slow ); 8552 %} 8553 8554 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8555 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8556 match(Set dst (LShiftVL src shift)); 8557 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8558 ins_encode %{ 8559 int vector_len = 1; 8560 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8561 %} 8562 ins_pipe( pipe_slow ); 8563 %} 8564 8565 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8566 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8567 match(Set dst (LShiftVL src shift)); 8568 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8569 ins_encode %{ 8570 int vector_len = 2; 8571 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8572 %} 8573 ins_pipe( pipe_slow ); 8574 %} 8575 8576 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8577 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8578 match(Set dst (LShiftVL src shift)); 8579 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8580 ins_encode %{ 8581 int vector_len = 2; 8582 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8583 %} 8584 ins_pipe( pipe_slow ); 8585 %} 8586 8587 // ----------------------- LogicalRightShift ----------------------------------- 8588 8589 // Shorts vector logical right shift produces incorrect Java result 8590 // for negative data because java code convert short value into int with 8591 // sign extension before a shift. But char vectors are fine since chars are 8592 // unsigned values. 8593 8594 instruct vsrl2S(vecS dst, vecS shift) %{ 8595 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8596 match(Set dst (URShiftVS dst shift)); 8597 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8598 ins_encode %{ 8599 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8600 %} 8601 ins_pipe( pipe_slow ); 8602 %} 8603 8604 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8605 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8606 match(Set dst (URShiftVS dst shift)); 8607 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8608 ins_encode %{ 8609 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8610 %} 8611 ins_pipe( pipe_slow ); 8612 %} 8613 8614 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8615 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8616 match(Set dst (URShiftVS src shift)); 8617 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8618 ins_encode %{ 8619 int vector_len = 0; 8620 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8621 %} 8622 ins_pipe( pipe_slow ); 8623 %} 8624 8625 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8626 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8627 match(Set dst (URShiftVS src shift)); 8628 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8629 ins_encode %{ 8630 int vector_len = 0; 8631 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8632 %} 8633 ins_pipe( pipe_slow ); 8634 %} 8635 8636 instruct vsrl4S(vecD dst, vecS shift) %{ 8637 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8638 match(Set dst (URShiftVS dst shift)); 8639 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8640 ins_encode %{ 8641 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8642 %} 8643 ins_pipe( pipe_slow ); 8644 %} 8645 8646 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8647 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8648 match(Set dst (URShiftVS dst shift)); 8649 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8650 ins_encode %{ 8651 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8652 %} 8653 ins_pipe( pipe_slow ); 8654 %} 8655 8656 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8657 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8658 match(Set dst (URShiftVS src shift)); 8659 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8660 ins_encode %{ 8661 int vector_len = 0; 8662 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8663 %} 8664 ins_pipe( pipe_slow ); 8665 %} 8666 8667 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8668 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8669 match(Set dst (URShiftVS src shift)); 8670 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8671 ins_encode %{ 8672 int vector_len = 0; 8673 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8674 %} 8675 ins_pipe( pipe_slow ); 8676 %} 8677 8678 instruct vsrl8S(vecX dst, vecS shift) %{ 8679 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8680 match(Set dst (URShiftVS dst shift)); 8681 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8682 ins_encode %{ 8683 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8684 %} 8685 ins_pipe( pipe_slow ); 8686 %} 8687 8688 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8689 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8690 match(Set dst (URShiftVS dst shift)); 8691 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8692 ins_encode %{ 8693 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8694 %} 8695 ins_pipe( pipe_slow ); 8696 %} 8697 8698 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8700 match(Set dst (URShiftVS src shift)); 8701 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8702 ins_encode %{ 8703 int vector_len = 0; 8704 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8705 %} 8706 ins_pipe( pipe_slow ); 8707 %} 8708 8709 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8710 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8711 match(Set dst (URShiftVS src shift)); 8712 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8713 ins_encode %{ 8714 int vector_len = 0; 8715 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8716 %} 8717 ins_pipe( pipe_slow ); 8718 %} 8719 8720 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8721 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8722 match(Set dst (URShiftVS src shift)); 8723 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8724 ins_encode %{ 8725 int vector_len = 1; 8726 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8727 %} 8728 ins_pipe( pipe_slow ); 8729 %} 8730 8731 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8732 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8733 match(Set dst (URShiftVS src shift)); 8734 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8735 ins_encode %{ 8736 int vector_len = 1; 8737 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8738 %} 8739 ins_pipe( pipe_slow ); 8740 %} 8741 8742 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8743 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8744 match(Set dst (URShiftVS src shift)); 8745 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8746 ins_encode %{ 8747 int vector_len = 2; 8748 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8749 %} 8750 ins_pipe( pipe_slow ); 8751 %} 8752 8753 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8754 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8755 match(Set dst (URShiftVS src shift)); 8756 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8757 ins_encode %{ 8758 int vector_len = 2; 8759 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8760 %} 8761 ins_pipe( pipe_slow ); 8762 %} 8763 8764 // Integers vector logical right shift 8765 instruct vsrl2I(vecD dst, vecS shift) %{ 8766 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8767 match(Set dst (URShiftVI dst shift)); 8768 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8769 ins_encode %{ 8770 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8771 %} 8772 ins_pipe( pipe_slow ); 8773 %} 8774 8775 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8776 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8777 match(Set dst (URShiftVI dst shift)); 8778 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8779 ins_encode %{ 8780 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8781 %} 8782 ins_pipe( pipe_slow ); 8783 %} 8784 8785 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8786 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8787 match(Set dst (URShiftVI src shift)); 8788 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8789 ins_encode %{ 8790 int vector_len = 0; 8791 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8792 %} 8793 ins_pipe( pipe_slow ); 8794 %} 8795 8796 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8797 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8798 match(Set dst (URShiftVI src shift)); 8799 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8800 ins_encode %{ 8801 int vector_len = 0; 8802 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8803 %} 8804 ins_pipe( pipe_slow ); 8805 %} 8806 8807 instruct vsrl4I(vecX dst, vecS shift) %{ 8808 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8809 match(Set dst (URShiftVI dst shift)); 8810 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8811 ins_encode %{ 8812 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8813 %} 8814 ins_pipe( pipe_slow ); 8815 %} 8816 8817 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8818 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8819 match(Set dst (URShiftVI dst shift)); 8820 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8821 ins_encode %{ 8822 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8823 %} 8824 ins_pipe( pipe_slow ); 8825 %} 8826 8827 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8828 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8829 match(Set dst (URShiftVI src shift)); 8830 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8831 ins_encode %{ 8832 int vector_len = 0; 8833 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8834 %} 8835 ins_pipe( pipe_slow ); 8836 %} 8837 8838 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8839 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8840 match(Set dst (URShiftVI src shift)); 8841 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8842 ins_encode %{ 8843 int vector_len = 0; 8844 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8845 %} 8846 ins_pipe( pipe_slow ); 8847 %} 8848 8849 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8850 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8851 match(Set dst (URShiftVI src shift)); 8852 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8853 ins_encode %{ 8854 int vector_len = 1; 8855 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8856 %} 8857 ins_pipe( pipe_slow ); 8858 %} 8859 8860 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8861 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8862 match(Set dst (URShiftVI src shift)); 8863 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8864 ins_encode %{ 8865 int vector_len = 1; 8866 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8867 %} 8868 ins_pipe( pipe_slow ); 8869 %} 8870 8871 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8872 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8873 match(Set dst (URShiftVI src shift)); 8874 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8875 ins_encode %{ 8876 int vector_len = 2; 8877 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8878 %} 8879 ins_pipe( pipe_slow ); 8880 %} 8881 8882 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8883 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8884 match(Set dst (URShiftVI src shift)); 8885 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8886 ins_encode %{ 8887 int vector_len = 2; 8888 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8889 %} 8890 ins_pipe( pipe_slow ); 8891 %} 8892 8893 // Longs vector logical right shift 8894 instruct vsrl2L(vecX dst, vecS shift) %{ 8895 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8896 match(Set dst (URShiftVL dst shift)); 8897 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8898 ins_encode %{ 8899 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8900 %} 8901 ins_pipe( pipe_slow ); 8902 %} 8903 8904 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8905 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8906 match(Set dst (URShiftVL dst shift)); 8907 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8908 ins_encode %{ 8909 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8910 %} 8911 ins_pipe( pipe_slow ); 8912 %} 8913 8914 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8915 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8916 match(Set dst (URShiftVL src shift)); 8917 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8918 ins_encode %{ 8919 int vector_len = 0; 8920 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8921 %} 8922 ins_pipe( pipe_slow ); 8923 %} 8924 8925 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8926 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8927 match(Set dst (URShiftVL src shift)); 8928 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8929 ins_encode %{ 8930 int vector_len = 0; 8931 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8932 %} 8933 ins_pipe( pipe_slow ); 8934 %} 8935 8936 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8937 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8938 match(Set dst (URShiftVL src shift)); 8939 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8940 ins_encode %{ 8941 int vector_len = 1; 8942 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8943 %} 8944 ins_pipe( pipe_slow ); 8945 %} 8946 8947 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8948 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8949 match(Set dst (URShiftVL src shift)); 8950 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8951 ins_encode %{ 8952 int vector_len = 1; 8953 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8954 %} 8955 ins_pipe( pipe_slow ); 8956 %} 8957 8958 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8959 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8960 match(Set dst (URShiftVL src shift)); 8961 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8962 ins_encode %{ 8963 int vector_len = 2; 8964 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8965 %} 8966 ins_pipe( pipe_slow ); 8967 %} 8968 8969 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8970 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8971 match(Set dst (URShiftVL src shift)); 8972 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8973 ins_encode %{ 8974 int vector_len = 2; 8975 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8976 %} 8977 ins_pipe( pipe_slow ); 8978 %} 8979 8980 // ------------------- ArithmeticRightShift ----------------------------------- 8981 8982 // Shorts/Chars vector arithmetic right shift 8983 instruct vsra2S(vecS dst, vecS shift) %{ 8984 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8985 match(Set dst (RShiftVS dst shift)); 8986 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8987 ins_encode %{ 8988 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8989 %} 8990 ins_pipe( pipe_slow ); 8991 %} 8992 8993 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8994 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8995 match(Set dst (RShiftVS dst shift)); 8996 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8997 ins_encode %{ 8998 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8999 %} 9000 ins_pipe( pipe_slow ); 9001 %} 9002 9003 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 9004 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9005 match(Set dst (RShiftVS src shift)); 9006 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9007 ins_encode %{ 9008 int vector_len = 0; 9009 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9010 %} 9011 ins_pipe( pipe_slow ); 9012 %} 9013 9014 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 9015 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9016 match(Set dst (RShiftVS src shift)); 9017 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9018 ins_encode %{ 9019 int vector_len = 0; 9020 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9021 %} 9022 ins_pipe( pipe_slow ); 9023 %} 9024 9025 instruct vsra4S(vecD dst, vecS shift) %{ 9026 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9027 match(Set dst (RShiftVS dst shift)); 9028 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9029 ins_encode %{ 9030 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9031 %} 9032 ins_pipe( pipe_slow ); 9033 %} 9034 9035 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9036 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9037 match(Set dst (RShiftVS dst shift)); 9038 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9039 ins_encode %{ 9040 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9041 %} 9042 ins_pipe( pipe_slow ); 9043 %} 9044 9045 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 9046 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9047 match(Set dst (RShiftVS src shift)); 9048 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9049 ins_encode %{ 9050 int vector_len = 0; 9051 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9052 %} 9053 ins_pipe( pipe_slow ); 9054 %} 9055 9056 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9057 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9058 match(Set dst (RShiftVS src shift)); 9059 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9060 ins_encode %{ 9061 int vector_len = 0; 9062 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9063 %} 9064 ins_pipe( pipe_slow ); 9065 %} 9066 9067 instruct vsra8S(vecX dst, vecS shift) %{ 9068 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9069 match(Set dst (RShiftVS dst shift)); 9070 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9071 ins_encode %{ 9072 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9073 %} 9074 ins_pipe( pipe_slow ); 9075 %} 9076 9077 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9078 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9079 match(Set dst (RShiftVS dst shift)); 9080 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9081 ins_encode %{ 9082 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9083 %} 9084 ins_pipe( pipe_slow ); 9085 %} 9086 9087 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 9088 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9089 match(Set dst (RShiftVS src shift)); 9090 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9091 ins_encode %{ 9092 int vector_len = 0; 9093 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9094 %} 9095 ins_pipe( pipe_slow ); 9096 %} 9097 9098 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9099 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9100 match(Set dst (RShiftVS src shift)); 9101 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9102 ins_encode %{ 9103 int vector_len = 0; 9104 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9105 %} 9106 ins_pipe( pipe_slow ); 9107 %} 9108 9109 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 9110 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9111 match(Set dst (RShiftVS src shift)); 9112 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9113 ins_encode %{ 9114 int vector_len = 1; 9115 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9116 %} 9117 ins_pipe( pipe_slow ); 9118 %} 9119 9120 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9121 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9122 match(Set dst (RShiftVS src shift)); 9123 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9124 ins_encode %{ 9125 int vector_len = 1; 9126 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9127 %} 9128 ins_pipe( pipe_slow ); 9129 %} 9130 9131 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9132 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9133 match(Set dst (RShiftVS src shift)); 9134 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9135 ins_encode %{ 9136 int vector_len = 2; 9137 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9138 %} 9139 ins_pipe( pipe_slow ); 9140 %} 9141 9142 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9143 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9144 match(Set dst (RShiftVS src shift)); 9145 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9146 ins_encode %{ 9147 int vector_len = 2; 9148 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9149 %} 9150 ins_pipe( pipe_slow ); 9151 %} 9152 9153 // Integers vector arithmetic right shift 9154 instruct vsra2I(vecD dst, vecS shift) %{ 9155 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9156 match(Set dst (RShiftVI dst shift)); 9157 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9158 ins_encode %{ 9159 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9160 %} 9161 ins_pipe( pipe_slow ); 9162 %} 9163 9164 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9165 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9166 match(Set dst (RShiftVI dst shift)); 9167 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9168 ins_encode %{ 9169 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9170 %} 9171 ins_pipe( pipe_slow ); 9172 %} 9173 9174 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9176 match(Set dst (RShiftVI src shift)); 9177 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9178 ins_encode %{ 9179 int vector_len = 0; 9180 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9181 %} 9182 ins_pipe( pipe_slow ); 9183 %} 9184 9185 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9187 match(Set dst (RShiftVI src shift)); 9188 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9189 ins_encode %{ 9190 int vector_len = 0; 9191 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9192 %} 9193 ins_pipe( pipe_slow ); 9194 %} 9195 9196 instruct vsra4I(vecX dst, vecS shift) %{ 9197 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9198 match(Set dst (RShiftVI dst shift)); 9199 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9200 ins_encode %{ 9201 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 9207 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9208 match(Set dst (RShiftVI dst shift)); 9209 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9210 ins_encode %{ 9211 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9212 %} 9213 ins_pipe( pipe_slow ); 9214 %} 9215 9216 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 9217 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9218 match(Set dst (RShiftVI src shift)); 9219 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9220 ins_encode %{ 9221 int vector_len = 0; 9222 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9223 %} 9224 ins_pipe( pipe_slow ); 9225 %} 9226 9227 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9228 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9229 match(Set dst (RShiftVI src shift)); 9230 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9231 ins_encode %{ 9232 int vector_len = 0; 9233 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9234 %} 9235 ins_pipe( pipe_slow ); 9236 %} 9237 9238 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 9239 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9240 match(Set dst (RShiftVI src shift)); 9241 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9242 ins_encode %{ 9243 int vector_len = 1; 9244 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9245 %} 9246 ins_pipe( pipe_slow ); 9247 %} 9248 9249 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9250 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9251 match(Set dst (RShiftVI src shift)); 9252 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9253 ins_encode %{ 9254 int vector_len = 1; 9255 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9256 %} 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9261 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9262 match(Set dst (RShiftVI src shift)); 9263 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9264 ins_encode %{ 9265 int vector_len = 2; 9266 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9272 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9273 match(Set dst (RShiftVI src shift)); 9274 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9275 ins_encode %{ 9276 int vector_len = 2; 9277 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9278 %} 9279 ins_pipe( pipe_slow ); 9280 %} 9281 9282 // There are no longs vector arithmetic right shift instructions. 9283 9284 9285 // --------------------------------- AND -------------------------------------- 9286 9287 instruct vand4B(vecS dst, vecS src) %{ 9288 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9289 match(Set dst (AndV dst src)); 9290 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 9291 ins_encode %{ 9292 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9293 %} 9294 ins_pipe( pipe_slow ); 9295 %} 9296 9297 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 9298 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9299 match(Set dst (AndV src1 src2)); 9300 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 9301 ins_encode %{ 9302 int vector_len = 0; 9303 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9304 %} 9305 ins_pipe( pipe_slow ); 9306 %} 9307 9308 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 9309 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9310 match(Set dst (AndV src (LoadVector mem))); 9311 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 9312 ins_encode %{ 9313 int vector_len = 0; 9314 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9315 %} 9316 ins_pipe( pipe_slow ); 9317 %} 9318 9319 instruct vand8B(vecD dst, vecD src) %{ 9320 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9321 match(Set dst (AndV dst src)); 9322 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 9323 ins_encode %{ 9687 %} 9688 9689 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9690 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9691 match(Set dst (XorV src1 src2)); 9692 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9693 ins_encode %{ 9694 int vector_len = 2; 9695 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9696 %} 9697 ins_pipe( pipe_slow ); 9698 %} 9699 9700 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9701 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9702 match(Set dst (XorV src (LoadVector mem))); 9703 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9704 ins_encode %{ 9705 int vector_len = 2; 9706 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9707 %} 9708 ins_pipe( pipe_slow ); 9709 %} 9710 9711 // --------------------------------- FMA -------------------------------------- 9712 9713 // a * b + c 9714 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9715 predicate(UseFMA && n->as_Vector()->length() == 2); 9716 match(Set c (FmaVD c (Binary a b))); 9717 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9718 ins_cost(150); 9719 ins_encode %{ 9720 int vector_len = 0; 9721 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9722 %} 9723 ins_pipe( pipe_slow ); 9724 %} 9725 9726 // a * b + c | 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1376 static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } 1377 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } 1378 static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } 1379 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } 1380 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1381 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1382 1383 //============================================================================= 1384 1385 1386 typedef void (MacroAssembler::*XX_Inst)(XMMRegister, XMMRegister); 1387 typedef void (MacroAssembler::*XAR_Inst)(XMMRegister, AddressLiteral, Register); 1388 typedef void (MacroAssembler::*XXI_Inst)(XMMRegister, XMMRegister, int); 1389 typedef void (MacroAssembler::*XXAIR_Inst)(XMMRegister, XMMRegister, AddressLiteral, int, Register); 1390 typedef void (MacroAssembler::*XXXI_Inst)(XMMRegister, XMMRegister, XMMRegister, int); 1391 1392 XX_Inst get_xx_inst(int opcode) { 1393 XX_Inst inst; 1394 switch(opcode) { 1395 case Op_RShiftVB: 1396 case Op_RShiftVS: 1397 return &MacroAssembler::psraw; 1398 case Op_LShiftVB: 1399 case Op_LShiftVS: 1400 return &MacroAssembler::psllw; 1401 case Op_URShiftVB: 1402 case Op_URShiftVS: 1403 return &MacroAssembler::psrlw; 1404 case Op_RShiftVI: 1405 return &MacroAssembler::psrad; 1406 case Op_LShiftVI: 1407 return &MacroAssembler::pslld; 1408 case Op_URShiftVI: 1409 return &MacroAssembler::psrld; 1410 case Op_LShiftVL: 1411 return &MacroAssembler::psllq; 1412 case Op_RShiftVL: 1413 case Op_URShiftVL: 1414 return &MacroAssembler::psrlq; 1415 default: 1416 return NULL; 1417 } 1418 } 1419 1420 XAR_Inst get_xar_inst(int opcode) { 1421 XAR_Inst inst; 1422 switch(opcode) { 1423 case Op_AbsVF: 1424 return &MacroAssembler::andps; 1425 case Op_AbsVD: 1426 return &MacroAssembler::andpd; 1427 case Op_NegVF: 1428 return &MacroAssembler::xorps; 1429 case Op_NegVD: 1430 return &MacroAssembler::xorpd; 1431 default: 1432 return NULL; 1433 } 1434 } 1435 1436 XXAIR_Inst get_xxair_inst(int opcode) { 1437 XXAIR_Inst inst; 1438 switch(opcode) { 1439 case Op_AbsVF: 1440 return &MacroAssembler::vandps; 1441 case Op_AbsVD: 1442 return &MacroAssembler::vandpd; 1443 case Op_NegVF: 1444 return &MacroAssembler::vxorps; 1445 case Op_NegVD: 1446 return &MacroAssembler::vxorpd; 1447 default: 1448 return NULL; 1449 } 1450 } 1451 1452 XXXI_Inst get_xxxi_inst(int opcode) { 1453 XXXI_Inst inst; 1454 switch(opcode) { 1455 case Op_RShiftVB: 1456 case Op_RShiftVS: 1457 return &MacroAssembler::vpsraw; 1458 case Op_LShiftVB: 1459 case Op_LShiftVS: 1460 return &MacroAssembler::vpsllw; 1461 case Op_URShiftVB: 1462 case Op_URShiftVS: 1463 return &MacroAssembler::vpsrlw; 1464 case Op_RShiftVI: 1465 return &MacroAssembler::vpsrad; 1466 case Op_LShiftVI: 1467 return &MacroAssembler::vpslld; 1468 case Op_URShiftVI: 1469 return &MacroAssembler::vpsrld; 1470 case Op_RShiftVL: 1471 return &MacroAssembler::evpsraq; 1472 case Op_LShiftVL: 1473 return &MacroAssembler::vpsllq; 1474 case Op_URShiftVL: 1475 return &MacroAssembler::vpsrlq; 1476 default: 1477 return NULL; 1478 } 1479 } 1480 1481 XX_Inst get_extend_inst(bool sign) { 1482 XX_Inst inst; 1483 if (sign) 1484 inst = &MacroAssembler::pmovsxbw; 1485 else 1486 inst = &MacroAssembler::pmovzxbw; 1487 return inst; 1488 } 1489 1490 XXI_Inst get_avx_extend_inst(bool sign) { 1491 XXI_Inst inst; 1492 if (sign) 1493 inst = &MacroAssembler::vpmovsxbw; 1494 else 1495 inst = &MacroAssembler::vpmovzxbw; 1496 return inst; 1497 } 1498 1499 AddressLiteral get_mask(int opcode) { 1500 switch(opcode) { 1501 case Op_AbsVF: 1502 return ExternalAddress(vector_float_signmask()); 1503 case Op_AbsVD: 1504 return ExternalAddress(vector_double_signmask()); 1505 case Op_NegVF: 1506 return ExternalAddress(vector_float_signflip()); 1507 case Op_NegVD: 1508 return ExternalAddress(vector_double_signflip()); 1509 default: 1510 return ExternalAddress(vector_double_signflip()); 1511 } 1512 } 1513 // need a scratch register to load mask TBD 1514 void emit_vshift4Bor8B_code(MacroAssembler& _masm, int opcode, XMMRegister dst, 1515 XMMRegister src, XMMRegister shift, 1516 XMMRegister tmp, Register scratch) { 1517 XX_Inst extendinst = get_extend_inst(opcode == Op_URShiftVB ? false : true); 1518 XX_Inst shiftinst = get_xx_inst(opcode); 1519 1520 (_masm.*extendinst)(tmp, src); 1521 (_masm.*shiftinst)(tmp, shift); 1522 __ movdqu(dst, ExternalAddress(vector_short_to_byte_mask()), scratch); 1523 __ pand(dst, tmp); 1524 __ packuswb(dst, dst); 1525 } 1526 1527 // need a scratch register to load mask TBD 1528 void emit_vshift16B_code(MacroAssembler& _masm, int opcode, XMMRegister dst, 1529 XMMRegister src, XMMRegister shift, 1530 XMMRegister tmp1, XMMRegister tmp2, Register scratch) { 1531 XX_Inst extendinst = get_extend_inst(opcode == Op_URShiftVB ? false : true); 1532 XX_Inst shiftinst = get_xx_inst(opcode); 1533 1534 (_masm.*extendinst)(tmp1, src); 1535 (_masm.*shiftinst)(tmp1, shift); 1536 __ pshufd(tmp2, src, 0xE); 1537 (_masm.*extendinst)(tmp2, tmp2); 1538 (_masm.*shiftinst)(tmp2, shift); 1539 __ movdqu(dst, ExternalAddress(vector_short_to_byte_mask()), scratch); 1540 __ pand(tmp2, dst); 1541 __ pand(dst, tmp1); 1542 __ packuswb(dst, tmp2); 1543 } 1544 1545 1546 void emit_vshift16B_avx_code(MacroAssembler& _masm, int opcode, XMMRegister dst, 1547 XMMRegister src, XMMRegister shift, 1548 XMMRegister tmp, Register scratch) { 1549 XXI_Inst extendinst = get_avx_extend_inst(opcode == Op_URShiftVB ? false : true); 1550 XXXI_Inst shiftinst = get_xxxi_inst(opcode); 1551 1552 int vector_len = 1; 1553 (_masm.*extendinst)(tmp, src, vector_len); 1554 (_masm.*shiftinst)(tmp, tmp, shift, vector_len); 1555 __ vpand(tmp, tmp, ExternalAddress(vector_short_to_byte_mask()), vector_len, scratch); 1556 __ vextracti128_high(dst, tmp); 1557 __ vpackuswb(dst, tmp, dst, 0); 1558 } 1559 1560 void emit_vshift32B_avx_code(MacroAssembler& _masm, int opcode, XMMRegister dst, 1561 XMMRegister src, XMMRegister shift, 1562 XMMRegister tmp, Register scratch) { 1563 XXI_Inst extendinst = get_avx_extend_inst(opcode == Op_URShiftVB ? false : true); 1564 XXXI_Inst shiftinst = get_xxxi_inst(opcode); 1565 1566 int vector_len = 1; 1567 __ vextracti128_high(tmp, src); 1568 (_masm.*extendinst)(tmp, tmp, vector_len); 1569 (_masm.*extendinst)(dst, src, vector_len); 1570 (_masm.*shiftinst)(tmp, tmp, shift, vector_len); 1571 (_masm.*shiftinst)(dst, dst, shift, vector_len); 1572 __ vpand(tmp, tmp, ExternalAddress(vector_short_to_byte_mask()), vector_len, scratch); 1573 __ vpand(dst, dst, ExternalAddress(vector_short_to_byte_mask()), vector_len, scratch); 1574 __ vpackuswb(dst, dst, tmp, vector_len); 1575 __ vpermq(dst, dst, 0xD8, vector_len); 1576 } 1577 1578 void emit_vshift64B_avx_code(MacroAssembler& _masm, int opcode, XMMRegister dst, 1579 XMMRegister src, XMMRegister shift, 1580 XMMRegister tmp1, XMMRegister tmp2, Register scratch) { 1581 XXI_Inst extendinst = get_avx_extend_inst(opcode == Op_URShiftVB ? false : true); 1582 XXXI_Inst shiftinst = get_xxxi_inst(opcode); 1583 1584 int vector_len = 2; 1585 __ vextracti64x4(tmp1, src, 1); 1586 (_masm.*extendinst)(tmp1, tmp1, vector_len); 1587 (_masm.*extendinst)(tmp2, src, vector_len); 1588 (_masm.*shiftinst)(tmp1, tmp1, shift, vector_len); 1589 (_masm.*shiftinst)(tmp2, tmp2, shift, vector_len); 1590 __ vmovdqu(dst, ExternalAddress(vector_short_to_byte_mask()), scratch); 1591 __ vpbroadcastd(dst, dst, vector_len); 1592 __ vpand(tmp1, tmp1, dst, vector_len); 1593 __ vpand(tmp2, tmp2, dst, vector_len); 1594 __ vpackuswb(dst, tmp1, tmp2, vector_len); 1595 __ evmovdquq(tmp2, ExternalAddress(vector_byte_perm_mask()), vector_len, scratch); 1596 __ vpermq(dst, tmp2, dst, vector_len); 1597 } 1598 1599 //============================================================================= 1600 const bool Matcher::match_rule_supported(int opcode) { 1601 if (!has_match_rule(opcode)) 1602 return false; 1603 1604 bool ret_value = true; 1605 switch (opcode) { 1606 case Op_AbsVL: 1607 if (UseAVX < 3) 1608 ret_value = false; 1609 case Op_PopCountI: 1610 case Op_PopCountL: 1611 if (!UsePopCountInstruction) 1612 ret_value = false; 1613 break; 1614 case Op_PopCountVI: 1615 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1616 ret_value = false; 1617 break; 1618 case Op_MulVI: 1619 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1620 ret_value = false; 1621 break; 1622 case Op_MulVL: 1623 case Op_MulReductionVL: 1624 if (VM_Version::supports_avx512dq() == false) 1625 ret_value = false; 1626 break; 1627 case Op_AddReductionVL: 1628 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1629 ret_value = false; 1630 break; 1631 case Op_AbsVB: 1632 case Op_AbsVS: 1633 case Op_AbsVI: 1634 case Op_AddReductionVI: 1635 if (UseSSE < 3) // requires at least SSE3 1636 ret_value = false; 1637 break; 1638 case Op_MulReductionVI: 1639 if (UseSSE < 4) // requires at least SSE4 1640 ret_value = false; 1641 break; 1642 case Op_AddReductionVF: 1643 case Op_AddReductionVD: 1644 case Op_MulReductionVF: 1645 case Op_MulReductionVD: 1646 if (UseSSE < 1) // requires at least SSE 1647 ret_value = false; 1648 break; 1649 case Op_SqrtVD: 1650 case Op_SqrtVF: 1651 if (UseAVX < 1) // enabled for AVX only 1652 ret_value = false; 1653 break; 1659 ret_value = false; 1660 break; 1661 case Op_CMoveVF: 1662 case Op_CMoveVD: 1663 if (UseAVX < 1 || UseAVX > 2) 1664 ret_value = false; 1665 break; 1666 case Op_StrIndexOf: 1667 if (!UseSSE42Intrinsics) 1668 ret_value = false; 1669 break; 1670 case Op_StrIndexOfChar: 1671 if (!UseSSE42Intrinsics) 1672 ret_value = false; 1673 break; 1674 case Op_OnSpinWait: 1675 if (VM_Version::supports_on_spin_wait() == false) 1676 ret_value = false; 1677 break; 1678 case Op_MulAddVS2VI: 1679 case Op_RShiftVL: 1680 case Op_AbsVD: 1681 case Op_NegVD: 1682 if (UseSSE < 2) 1683 ret_value = false; 1684 break; 1685 case Op_MulVB: 1686 case Op_LShiftVB: 1687 case Op_RShiftVB: 1688 case Op_URShiftVB: 1689 if (UseSSE < 4) 1690 ret_value = false; 1691 break; 1692 #ifdef _LP64 1693 case Op_MaxD: 1694 case Op_MaxF: 1695 case Op_MinD: 1696 case Op_MinF: 1697 if (UseAVX < 1) // enabled for AVX only 1698 ret_value = false; 1699 break; 1700 #endif 1701 } 1702 1703 return ret_value; // Per default match rules are supported. 1704 } 1705 1706 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1707 // identify extra cases that we might want to provide match rules for 1708 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1709 bool ret_value = match_rule_supported(opcode); 1710 if (ret_value) { 1711 switch (opcode) { 1712 case Op_AbsVB: 1713 case Op_AddVB: 1714 case Op_SubVB: 1715 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1716 ret_value = false; 1717 break; 1718 case Op_AbsVS: 1719 case Op_AddVS: 1720 case Op_SubVS: 1721 case Op_MulVS: 1722 case Op_LShiftVS: 1723 case Op_RShiftVS: 1724 case Op_URShiftVS: 1725 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1726 ret_value = false; 1727 break; 1728 case Op_MulVB: 1729 case Op_LShiftVB: 1730 case Op_RShiftVB: 1731 case Op_URShiftVB: 1732 if ((vlen == 32 && UseAVX < 2) || 1733 ((vlen == 64) && (VM_Version::supports_avx512bw() == false))) 1734 ret_value = false; 1735 break; 1736 case Op_NegVF: 1737 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) 1738 ret_value = false; 1739 break; 1740 case Op_CMoveVF: 1741 if (vlen != 8) 1742 ret_value = false; 1743 break; 1744 case Op_NegVD: 1745 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) 1746 ret_value = false; 1747 break; 1748 case Op_CMoveVD: 1749 if (vlen != 4) 1750 ret_value = false; 1751 break; 1752 } 1753 } 1754 1755 return ret_value; // Per default match rules are supported. 1756 } 1757 1758 const bool Matcher::has_predicated_vectors(void) { 1759 bool ret_value = false; 1760 if (UseAVX > 2) { 1761 ret_value = VM_Version::supports_avx512vl(); 1762 } 1763 1764 return ret_value; 1765 } 1766 1767 const int Matcher::float_pressure(int default_pressure_threshold) { 7542 ins_encode %{ 7543 int vector_len = 2; 7544 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7545 %} 7546 ins_pipe( pipe_slow ); 7547 %} 7548 7549 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7550 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7551 match(Set dst (SubVD src (LoadVector mem))); 7552 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7553 ins_encode %{ 7554 int vector_len = 2; 7555 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7556 %} 7557 ins_pipe( pipe_slow ); 7558 %} 7559 7560 // --------------------------------- MUL -------------------------------------- 7561 7562 // Byte vector mul 7563 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp, rRegI scratch) %{ 7564 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7565 match(Set dst (MulVB src1 src2)); 7566 effect(TEMP dst, TEMP tmp, TEMP scratch); 7567 format %{"pmovsxbw $tmp,$src1\n\t" 7568 "pmovsxbw $dst,$src2\n\t" 7569 "pmullw $tmp,$dst\n\t" 7570 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7571 "pand $dst,$tmp\n\t" 7572 "packuswb $dst,$dst\t! mul packed4B" %} 7573 ins_encode %{ 7574 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7575 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 7576 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 7577 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7578 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 7579 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 7580 %} 7581 ins_pipe( pipe_slow ); 7582 %} 7583 7584 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp, rRegI scratch) %{ 7585 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 7586 match(Set dst (MulVB src1 src2)); 7587 effect(TEMP dst, TEMP tmp, TEMP scratch); 7588 format %{"pmovsxbw $tmp,$src1\n\t" 7589 "pmovsxbw $dst,$src2\n\t" 7590 "pmullw $tmp,$dst\n\t" 7591 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7592 "pand $dst,$tmp\n\t" 7593 "packuswb $dst,$dst\t! mul packed8B" %} 7594 ins_encode %{ 7595 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7596 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 7597 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 7598 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7599 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 7600 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 7601 %} 7602 ins_pipe( pipe_slow ); 7603 %} 7604 7605 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2, rRegI scratch) %{ 7606 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 7607 match(Set dst (MulVB src1 src2)); 7608 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7609 format %{"pmovsxbw $tmp1,$src1\n\t" 7610 "pmovsxbw $tmp2,$src2\n\t" 7611 "pmullw $tmp1,$tmp2\n\t" 7612 "pshufd $tmp2,$src1,0xEE\n\t" 7613 "pshufd $dst,$src2,0xEE\n\t" 7614 "pmovsxbw $tmp2,$tmp2\n\t" 7615 "pmovsxbw $dst,$dst\n\t" 7616 "pmullw $tmp2,$dst\n\t" 7617 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7618 "pand $tmp2,$dst\n\t" 7619 "pand $dst,$tmp1\n\t" 7620 "packuswb $dst,$tmp2\t! mul packed16B" %} 7621 ins_encode %{ 7622 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 7623 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 7624 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 7625 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 7626 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 7627 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 7628 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 7629 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 7630 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7631 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 7632 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 7633 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 7634 %} 7635 ins_pipe( pipe_slow ); 7636 %} 7637 7638 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp, rRegI scratch) %{ 7639 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7640 match(Set dst (MulVB src1 src2)); 7641 effect(TEMP dst, TEMP tmp, TEMP scratch); 7642 format %{"vpmovsxbw $tmp,$src1\n\t" 7643 "vpmovsxbw $dst,$src2\n\t" 7644 "vpmullw $tmp,$tmp,$dst\n\t" 7645 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7646 "vpand $dst,$dst,$tmp\n\t" 7647 "vextracti128_high $tmp,$dst\n\t" 7648 "vpackuswb $dst,$dst,$dst\n\t! mul packed16B" %} 7649 ins_encode %{ 7650 int vector_len = 1; 7651 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 7652 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7653 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); 7654 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7655 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 7656 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 7657 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 7658 %} 7659 ins_pipe( pipe_slow ); 7660 %} 7661 7662 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, rRegI scratch) %{ 7663 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 7664 match(Set dst (MulVB src1 src2)); 7665 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7666 format %{"vextracti128_high $tmp1,$src1\n\t" 7667 "vextracti128_high $dst,$src2\n\t" 7668 "vpmovsxbw $tmp1,$tmp1\n\t" 7669 "vpmovsxbw $dst,$dst\n\t" 7670 "vpmullw $tmp1,$tmp1,$dst\n\t" 7671 "vpmovsxbw $tmp2,$src1\n\t" 7672 "vpmovsxbw $dst,$src2\n\t" 7673 "vpmullw $tmp2,$tmp2,$dst\n\t" 7674 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" 7675 "vpbroadcastd $dst, $dst\n\t" 7676 "vpand $tmp1,$tmp1,$dst\n\t" 7677 "vpand $dst,$dst,$tmp2\n\t" 7678 "vpackuswb $dst,$dst,$tmp1\n\t" 7679 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 7680 ins_encode %{ 7681 int vector_len = 1; 7682 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7683 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 7684 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7685 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7686 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7687 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7688 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7689 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7690 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7691 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7692 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7693 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7694 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7695 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 7696 %} 7697 ins_pipe( pipe_slow ); 7698 %} 7699 7700 instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ 7701 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 7702 match(Set dst (MulVB src1 src2)); 7703 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7704 format %{"vextracti64x4_high $tmp1,$src1\n\t" 7705 "vextracti64x4_high $dst,$src2\n\t" 7706 "vpmovsxbw $tmp1,$tmp1\n\t" 7707 "vpmovsxbw $dst,$dst\n\t" 7708 "vpmullw $tmp1,$tmp1,$dst\n\t" 7709 "vpmovsxbw $tmp2,$src1\n\t" 7710 "vpmovsxbw $dst,$src2\n\t" 7711 "vpmullw $tmp2,$tmp2,$dst\n\t" 7712 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" 7713 "vpbroadcastd $dst, $dst\n\t" 7714 "vpand $tmp1,$tmp1,$dst\n\t" 7715 "vpand $tmp2,$tmp2,$dst\n\t" 7716 "vpackuswb $dst,$tmp1,$tmp2\n\t" 7717 "evmovdquq $tmp2,[0x0604020007050301]\n\t" 7718 "vpermq $dst,$tmp2,$dst,0x01\t! mul packed64B" %} 7719 7720 ins_encode %{ 7721 int vector_len = 2; 7722 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7723 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 7724 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7725 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7726 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7727 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7728 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7729 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7730 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7731 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7732 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7733 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7734 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7735 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 7736 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7737 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 // Shorts/Chars vector mul 7743 instruct vmul2S(vecS dst, vecS src) %{ 7744 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7745 match(Set dst (MulVS dst src)); 7746 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7747 ins_encode %{ 7748 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7749 %} 7750 ins_pipe( pipe_slow ); 7751 %} 7752 7753 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7754 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7755 match(Set dst (MulVS src1 src2)); 7756 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7757 ins_encode %{ 7758 int vector_len = 0; 7759 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7760 %} 7761 ins_pipe( pipe_slow ); 8444 match(Set dst (DivVD src1 src2)); 8445 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8446 ins_encode %{ 8447 int vector_len = 2; 8448 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8449 %} 8450 ins_pipe( pipe_slow ); 8451 %} 8452 8453 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8454 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8455 match(Set dst (DivVD src (LoadVector mem))); 8456 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8457 ins_encode %{ 8458 int vector_len = 2; 8459 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8460 %} 8461 ins_pipe( pipe_slow ); 8462 %} 8463 8464 // --------------------------------- Sqrt -------------------------------------- 8465 8466 // Floating point vector sqrt 8467 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8468 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8469 match(Set dst (SqrtVD src)); 8470 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8471 ins_encode %{ 8472 int vector_len = 0; 8473 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8474 %} 8475 ins_pipe( pipe_slow ); 8476 %} 8477 8478 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8479 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8480 match(Set dst (SqrtVD (LoadVector mem))); 8481 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8482 ins_encode %{ 8483 int vector_len = 0; 8601 match(Set dst (SqrtVF src)); 8602 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8603 ins_encode %{ 8604 int vector_len = 2; 8605 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8606 %} 8607 ins_pipe( pipe_slow ); 8608 %} 8609 8610 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8611 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8612 match(Set dst (SqrtVF (LoadVector mem))); 8613 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8614 ins_encode %{ 8615 int vector_len = 2; 8616 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8617 %} 8618 ins_pipe( pipe_slow ); 8619 %} 8620 8621 // ------------------------------ Shift --------------------------------------- 8622 8623 // Left and right shift count vectors are the same on x86 8624 // (only lowest bits of xmm reg are used for count). 8625 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8626 match(Set dst (LShiftCntV cnt)); 8627 match(Set dst (RShiftCntV cnt)); 8628 format %{ "movdl $dst,$cnt\t! load shift count" %} 8629 ins_encode %{ 8630 __ movdl($dst$$XMMRegister, $cnt$$Register); 8631 %} 8632 ins_pipe( pipe_slow ); 8633 %} 8634 8635 instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{ 8636 match(Set dst cnt); 8637 effect(TEMP tmp); 8638 format %{ "movl $tmp,$cnt\t" 8639 "movdl $dst,$tmp\t! load shift count" %} 8640 ins_encode %{ 8641 __ movl($tmp$$Register, $cnt$$constant); 8642 __ movdl($dst$$XMMRegister, $tmp$$Register); 8643 %} 8644 ins_pipe( pipe_slow ); 8645 %} 8646 8647 // Byte vector shift 8648 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{ 8649 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8650 match(Set dst (LShiftVB src shift)); 8651 match(Set dst (RShiftVB src shift)); 8652 match(Set dst (URShiftVB src shift)); 8653 effect(TEMP dst, TEMP tmp, TEMP scratch); 8654 format %{"pmovxbw $tmp,$src\n\t" 8655 "shiftop $tmp,$shift\n\t" 8656 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8657 "pand $dst,$tmp\n\t" 8658 "packuswb $dst,$dst\n\t ! packed4B shift" %} 8659 ins_encode %{ 8660 emit_vshift4Bor8B_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp$$XMMRegister, $scratch$$Register); 8661 %} 8662 ins_pipe( pipe_slow ); 8663 %} 8664 8665 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{ 8666 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 8667 match(Set dst (LShiftVB src shift)); 8668 match(Set dst (RShiftVB src shift)); 8669 match(Set dst (URShiftVB src shift)); 8670 effect(TEMP dst, TEMP tmp, TEMP scratch); 8671 format %{"pmovxbw $tmp,$src\n\t" 8672 "shiftop $tmp,$shift\n\t" 8673 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8674 "pand $dst,$tmp\n\t" 8675 "packuswb $dst,$dst\n\t ! packed8B shift" %} 8676 ins_encode %{ 8677 emit_vshift4Bor8B_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp$$XMMRegister, $scratch$$Register); 8678 %} 8679 ins_pipe( pipe_slow ); 8680 %} 8681 8682 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{ 8683 predicate(UseSSE > 3 && UseAVX <= 1 && n->as_Vector()->length() == 16); 8684 match(Set dst (LShiftVB src shift)); 8685 match(Set dst (RShiftVB src shift)); 8686 match(Set dst (URShiftVB src shift)); 8687 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 8688 format %{"pmovxbw $tmp1,$src\n\t" 8689 "shiftop $tmp1,$shift\n\t" 8690 "pshufd $tmp2,$src\n\t" 8691 "pmovxbw $tmp2,$tmp2\n\t" 8692 "shiftop $tmp2,$shift\n\t" 8693 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8694 "pand $tmp2,$dst\n\t" 8695 "pand $dst,$tmp1\n\t" 8696 "packuswb $dst,$tmp2\n\t! packed16B shift" %} 8697 ins_encode %{ 8698 emit_vshift16B_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $scratch$$Register); 8699 %} 8700 ins_pipe( pipe_slow ); 8701 %} 8702 8703 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ 8704 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8705 match(Set dst (LShiftVB src shift)); 8706 match(Set dst (RShiftVB src shift)); 8707 match(Set dst (URShiftVB src shift)); 8708 effect(TEMP dst, TEMP tmp, TEMP scratch); 8709 format %{"vpmovxbw $tmp,$src\n\t" 8710 "shiftop $tmp,$tmp,$shift\n\t" 8711 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" 8712 "vextracti128_high $dst,$tmp\n\t" 8713 "vpackuswb $dst,$tmp,$dst\n\t! packed16B shift" %} 8714 ins_encode %{ 8715 emit_vshift16B_avx_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp$$XMMRegister, $scratch$$Register); 8716 %} 8717 ins_pipe( pipe_slow ); 8718 %} 8719 8720 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ 8721 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 8722 match(Set dst (LShiftVB src shift)); 8723 match(Set dst (RShiftVB src shift)); 8724 match(Set dst (URShiftVB src shift)); 8725 effect(TEMP dst, TEMP tmp, TEMP scratch); 8726 format %{"vextracti128_high $tmp,$src\n\t" 8727 "vpmovxbw $tmp,$tmp\n\t" 8728 "vpmovxbw $dst,$src\n\t" 8729 "shiftop $tmp,$tmp,$shift\n\t" 8730 "shiftop $dst,$dst,$shift\n\t" 8731 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" 8732 "vpand $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t" 8733 "vpackuswb $dst,$dst,$tmp\n\t" 8734 "vpermq $dst,$dst,0xD8\n\t! packed32B shift" %} 8735 ins_encode %{ 8736 emit_vshift32B_avx_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp$$XMMRegister, $scratch$$Register); 8737 %} 8738 ins_pipe( pipe_slow ); 8739 %} 8740 8741 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ 8742 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 8743 match(Set dst (LShiftVB src shift)); 8744 match(Set dst (RShiftVB src shift)); 8745 match(Set dst (URShiftVB src shift)); 8746 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 8747 format %{"vextracti64x4 $tmp1,$src\n\t" 8748 "vpmovxbw $tmp1,$tmp1\n\t" 8749 "vpmovxbw $tmp2,$src\n\t" 8750 "shiftop $tmp1,$tmp1,$shift\n\t" 8751 "shiftop $tmp2,$tmp2,$shift\n\t" 8752 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8753 "vpbroadcastd $dst,$dst\n\t" 8754 "vpand $tmp1,$tmp1,$dst\n\t" 8755 "vpand $tmp2,$tmp2,$dst\n\t" 8756 "vpackuswb $dst,$tmp1,$tmp2\n\t" 8757 "evmovdquq $tmp2, [0x0604020007050301]\n\t" 8758 "vpermq $dst,$tmp2,$dst\n\t! packed64B shift" %} 8759 ins_encode %{ 8760 emit_vshift64B_avx_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $scratch$$Register); 8761 %} 8762 ins_pipe( pipe_slow ); 8763 %} 8764 8765 // Shorts vector logical right shift produces incorrect Java result 8766 // for negative data because java code convert short value into int with 8767 // sign extension before a shift. But char vectors are fine since chars are 8768 // unsigned values. 8769 // Shorts/Chars vector left shift 8770 instruct vshist2S(vecS dst, vecS src, vecS shift) %{ 8771 predicate(n->as_Vector()->length() == 2); 8772 match(Set dst (LShiftVS src shift)); 8773 match(Set dst (RShiftVS src shift)); 8774 match(Set dst (URShiftVS src shift)); 8775 format %{ "shiftop $dst,$src,$shift\t! shift packed2S" %} 8776 ins_encode %{ 8777 if (UseAVX == 0) { 8778 XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); 8779 if ($dst$$XMMRegister != $src$$XMMRegister) 8780 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 8781 (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); 8782 } else { 8783 int vector_len = 0; 8784 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8785 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8786 } 8787 %} 8788 ins_pipe( pipe_slow ); 8789 %} 8790 8791 instruct vshift4S(vecD dst, vecD src, vecS shift) %{ 8792 predicate(n->as_Vector()->length() == 4); 8793 match(Set dst (LShiftVS src shift)); 8794 match(Set dst (RShiftVS src shift)); 8795 match(Set dst (URShiftVS src shift)); 8796 format %{ "shiftop $dst,$src,$shift\t! shift packed4S" %} 8797 ins_encode %{ 8798 if (UseAVX == 0) { 8799 XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); 8800 if ($dst$$XMMRegister != $src$$XMMRegister) 8801 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8802 (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); 8803 8804 } else { 8805 int vector_len = 0; 8806 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8807 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8808 } 8809 %} 8810 ins_pipe( pipe_slow ); 8811 %} 8812 8813 instruct vshift8S(vecX dst, vecX src, vecS shift) %{ 8814 predicate(n->as_Vector()->length() == 8); 8815 match(Set dst (LShiftVS src shift)); 8816 match(Set dst (RShiftVS src shift)); 8817 match(Set dst (URShiftVS src shift)); 8818 format %{ "shiftop $dst,$src,$shift\t! shift packed8S" %} 8819 ins_encode %{ 8820 if (UseAVX == 0) { 8821 XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); 8822 if ($dst$$XMMRegister != $src$$XMMRegister) 8823 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8824 (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); 8825 } else { 8826 int vector_len = 0; 8827 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8828 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8829 } 8830 %} 8831 ins_pipe( pipe_slow ); 8832 %} 8833 8834 instruct vshift16S(vecY dst, vecY src, vecS shift) %{ 8835 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8836 match(Set dst (LShiftVS src shift)); 8837 match(Set dst (RShiftVS src shift)); 8838 match(Set dst (URShiftVS src shift)); 8839 format %{ "shiftop $dst,$src,$shift\t! shift packed16S" %} 8840 ins_encode %{ 8841 int vector_len = 1; 8842 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8843 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8844 %} 8845 ins_pipe( pipe_slow ); 8846 %} 8847 8848 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{ 8849 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8850 match(Set dst (LShiftVS src shift)); 8851 match(Set dst (RShiftVS src shift)); 8852 match(Set dst (URShiftVS src shift)); 8853 format %{ "shiftop $dst,$src,$shift\t! shift packed32S" %} 8854 ins_encode %{ 8855 int vector_len = 2; 8856 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8857 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8858 %} 8859 ins_pipe( pipe_slow ); 8860 %} 8861 8862 // Integers vector left shift 8863 instruct vshift2I(vecD dst, vecD src, vecS shift) %{ 8864 predicate(n->as_Vector()->length() == 2); 8865 match(Set dst (LShiftVI src shift)); 8866 match(Set dst (RShiftVI src shift)); 8867 match(Set dst (URShiftVI src shift)); 8868 format %{ "shiftop $dst,$src,$shift\t! shift packed2I" %} 8869 ins_encode %{ 8870 if (UseAVX == 0) { 8871 XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); 8872 if ($dst$$XMMRegister != $src$$XMMRegister) 8873 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8874 (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); 8875 } else { 8876 int vector_len = 0; 8877 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8878 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8879 } 8880 %} 8881 ins_pipe( pipe_slow ); 8882 %} 8883 8884 instruct vshift4I(vecX dst, vecX src, vecS shift) %{ 8885 predicate(n->as_Vector()->length() == 4); 8886 match(Set dst (LShiftVI src shift)); 8887 match(Set dst (RShiftVI src shift)); 8888 match(Set dst (URShiftVI src shift)); 8889 format %{ "shiftop $dst,$src,$shift\t! shift packed4I" %} 8890 ins_encode %{ 8891 if (UseAVX == 0) { 8892 XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); 8893 if ($dst$$XMMRegister != $src$$XMMRegister) 8894 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8895 (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); 8896 } else { 8897 int vector_len = 0; 8898 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8899 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8900 } 8901 %} 8902 ins_pipe( pipe_slow ); 8903 %} 8904 8905 instruct vshift8I(vecY dst, vecY src, vecS shift) %{ 8906 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8907 match(Set dst (LShiftVI src shift)); 8908 match(Set dst (RShiftVI src shift)); 8909 match(Set dst (URShiftVI src shift)); 8910 format %{ "shiftop $dst,$src,$shift\t! shift packed8I" %} 8911 ins_encode %{ 8912 int vector_len = 1; 8913 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8914 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8915 %} 8916 ins_pipe( pipe_slow ); 8917 %} 8918 8919 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{ 8920 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8921 match(Set dst (LShiftVI src shift)); 8922 match(Set dst (RShiftVI src shift)); 8923 match(Set dst (URShiftVI src shift)); 8924 format %{ "shiftop $dst,$src,$shift\t! shift packed16I" %} 8925 ins_encode %{ 8926 int vector_len = 2; 8927 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8928 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8929 %} 8930 ins_pipe( pipe_slow ); 8931 %} 8932 8933 // Longs vector shift 8934 instruct vshift2L(vecX dst, vecX src, vecS shift) %{ 8935 predicate(n->as_Vector()->length() == 2); 8936 match(Set dst (LShiftVL src shift)); 8937 match(Set dst (URShiftVL src shift)); 8938 format %{ "shiftop $dst,$src,$shift\t! shift packed2L" %} 8939 ins_encode %{ 8940 if (UseAVX == 0) { 8941 XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); 8942 if ($dst$$XMMRegister != $src$$XMMRegister) 8943 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8944 (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); 8945 } else { 8946 int vector_len = 0; 8947 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8948 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8949 } 8950 %} 8951 ins_pipe( pipe_slow ); 8952 %} 8953 8954 instruct vshift4L(vecY dst, vecY src, vecS shift) %{ 8955 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8956 match(Set dst (LShiftVL src shift)); 8957 match(Set dst (URShiftVL src shift)); 8958 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8959 ins_encode %{ 8960 int vector_len = 1; 8961 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8962 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8963 %} 8964 ins_pipe( pipe_slow ); 8965 %} 8966 8967 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{ 8968 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8969 match(Set dst (LShiftVL src shift)); 8970 match(Set dst (RShiftVL src shift)); 8971 match(Set dst (URShiftVL src shift)); 8972 format %{ "shiftop $dst,$src,$shift\t! shift packed8L" %} 8973 ins_encode %{ 8974 int vector_len = 2; 8975 XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); 8976 (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8977 %} 8978 ins_pipe( pipe_slow ); 8979 %} 8980 8981 // -------------------ArithmeticRightShift ----------------------------------- 8982 // Long vector arithmetic right shift 8983 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ 8984 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); 8985 match(Set dst (RShiftVL src shift)); 8986 effect(TEMP dst, TEMP tmp, TEMP scratch); 8987 format %{ "movdqu $dst,$src\n\t" 8988 "psrlq $dst,$shift\n\t" 8989 "movdqu $tmp,[0x8000000000000000]\n\t" 8990 "psrlq $tmp,$shift\n\t" 8991 "pxor $dst,$tmp\n\t" 8992 "psubq $dst,$tmp\t! arithmetic right shift packed2L" %} 8993 ins_encode %{ 8994 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8995 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8996 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 8997 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 8998 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 8999 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 9000 %} 9001 ins_pipe( pipe_slow ); 9002 %} 9003 9004 instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ 9005 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 9006 match(Set dst (RShiftVL src shift)); 9007 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 9008 ins_encode %{ 9009 int vector_len = 0; 9010 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9011 %} 9012 ins_pipe( pipe_slow ); 9013 %} 9014 9015 instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ 9016 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9017 match(Set dst (RShiftVL src shift)); 9018 effect(TEMP dst, TEMP tmp, TEMP scratch); 9019 format %{ "vpsrlq $dst,$src,$shift\n\t" 9020 "vmovdqu $tmp,[0x8000000000000000]\n\t" 9021 "vpsrlq $tmp,$tmp,$shift\n\t" 9022 "vpxor $dst,$dst,$tmp\n\t" 9023 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} 9024 ins_encode %{ 9025 int vector_len = 1; 9026 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9027 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 9028 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 9029 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9030 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9031 %} 9032 ins_pipe( pipe_slow ); 9033 %} 9034 9035 instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ 9036 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 9037 match(Set dst (RShiftVL src shift)); 9038 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} 9039 ins_encode %{ 9040 int vector_len = 1; 9041 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9042 %} 9043 ins_pipe( pipe_slow ); 9044 %} 9045 9046 // --------------------------------- AND -------------------------------------- 9047 9048 instruct vand4B(vecS dst, vecS src) %{ 9049 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9050 match(Set dst (AndV dst src)); 9051 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 9052 ins_encode %{ 9053 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9054 %} 9055 ins_pipe( pipe_slow ); 9056 %} 9057 9058 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 9059 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9060 match(Set dst (AndV src1 src2)); 9061 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 9062 ins_encode %{ 9063 int vector_len = 0; 9064 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9065 %} 9066 ins_pipe( pipe_slow ); 9067 %} 9068 9069 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 9070 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9071 match(Set dst (AndV src (LoadVector mem))); 9072 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 9073 ins_encode %{ 9074 int vector_len = 0; 9075 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9076 %} 9077 ins_pipe( pipe_slow ); 9078 %} 9079 9080 instruct vand8B(vecD dst, vecD src) %{ 9081 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9082 match(Set dst (AndV dst src)); 9083 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 9084 ins_encode %{ 9448 %} 9449 9450 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9451 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9452 match(Set dst (XorV src1 src2)); 9453 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9454 ins_encode %{ 9455 int vector_len = 2; 9456 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9457 %} 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9462 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9463 match(Set dst (XorV src (LoadVector mem))); 9464 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9465 ins_encode %{ 9466 int vector_len = 2; 9467 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9468 %} 9469 ins_pipe( pipe_slow ); 9470 %} 9471 9472 // --------------------------------- ABS -------------------------------------- 9473 // a = |a| 9474 instruct vabs4B_reg(vecS dst, vecS src) %{ 9475 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9476 match(Set dst (AbsVB src)); 9477 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed4B" %} 9478 ins_encode %{ 9479 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9480 %} 9481 ins_pipe( pipe_slow ); 9482 %} 9483 9484 instruct vabs8B_reg(vecD dst, vecD src) %{ 9485 predicate(UseSSE > 2 && n->as_Vector()->length() == 8); 9486 match(Set dst (AbsVB src)); 9487 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} 9488 ins_encode %{ 9489 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9490 %} 9491 ins_pipe( pipe_slow ); 9492 %} 9493 9494 instruct vabs16B_reg(vecX dst, vecX src) %{ 9495 predicate(UseSSE > 2 && n->as_Vector()->length() == 16); 9496 match(Set dst (AbsVB src)); 9497 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} 9498 ins_encode %{ 9499 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9500 %} 9501 ins_pipe( pipe_slow ); 9502 %} 9503 9504 instruct vabs32B_reg(vecY dst, vecY src) %{ 9505 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 9506 match(Set dst (AbsVB src)); 9507 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} 9508 ins_encode %{ 9509 int vector_len = 1; 9510 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9511 %} 9512 ins_pipe( pipe_slow ); 9513 %} 9514 9515 instruct vabs64B_reg(vecZ dst, vecZ src) %{ 9516 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 9517 match(Set dst (AbsVB src)); 9518 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} 9519 ins_encode %{ 9520 int vector_len = 2; 9521 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9522 %} 9523 ins_pipe( pipe_slow ); 9524 %} 9525 9526 instruct vabs2S_reg(vecD dst, vecD src) %{ 9527 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 9528 match(Set dst (AbsVS src)); 9529 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed2S" %} 9530 ins_encode %{ 9531 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9532 %} 9533 ins_pipe( pipe_slow ); 9534 %} 9535 9536 instruct vabs4S_reg(vecD dst, vecD src) %{ 9537 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9538 match(Set dst (AbsVS src)); 9539 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} 9540 ins_encode %{ 9541 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9542 %} 9543 ins_pipe( pipe_slow ); 9544 %} 9545 9546 instruct vabs8S_reg(vecX dst, vecX src) %{ 9547 predicate(UseSSE > 2 && n->as_Vector()->length() == 8); 9548 match(Set dst (AbsVS src)); 9549 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} 9550 ins_encode %{ 9551 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9552 %} 9553 ins_pipe( pipe_slow ); 9554 %} 9555 9556 instruct vabs16S_reg(vecY dst, vecY src) %{ 9557 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9558 match(Set dst (AbsVS src)); 9559 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} 9560 ins_encode %{ 9561 int vector_len = 1; 9562 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9563 %} 9564 ins_pipe( pipe_slow ); 9565 %} 9566 9567 instruct vabs32S_reg(vecZ dst, vecZ src) %{ 9568 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 9569 match(Set dst (AbsVS src)); 9570 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} 9571 ins_encode %{ 9572 int vector_len = 2; 9573 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9574 %} 9575 ins_pipe( pipe_slow ); 9576 %} 9577 9578 instruct vabs2I_reg(vecD dst, vecD src) %{ 9579 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 9580 match(Set dst (AbsVI src)); 9581 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 9582 ins_encode %{ 9583 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 9584 %} 9585 ins_pipe( pipe_slow ); 9586 %} 9587 9588 instruct vabs4I_reg(vecX dst, vecX src) %{ 9589 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9590 match(Set dst (AbsVI src)); 9591 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 9592 ins_encode %{ 9593 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 9594 %} 9595 ins_pipe( pipe_slow ); 9596 %} 9597 9598 instruct vabs8I_reg(vecY dst, vecY src) %{ 9599 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9600 match(Set dst (AbsVI src)); 9601 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 9602 ins_encode %{ 9603 int vector_len = 1; 9604 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9605 %} 9606 ins_pipe( pipe_slow ); 9607 %} 9608 9609 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 9610 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9611 match(Set dst (AbsVI src)); 9612 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 9613 ins_encode %{ 9614 int vector_len = 2; 9615 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vabs2L_reg(vecX dst, vecX src) %{ 9621 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 9622 match(Set dst (AbsVL src)); 9623 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} 9624 ins_encode %{ 9625 int vector_len = 0; 9626 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9627 %} 9628 ins_pipe( pipe_slow ); 9629 %} 9630 9631 instruct vabs4L_reg(vecY dst, vecY src) %{ 9632 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 9633 match(Set dst (AbsVL src)); 9634 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} 9635 ins_encode %{ 9636 int vector_len = 1; 9637 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9638 %} 9639 ins_pipe( pipe_slow ); 9640 %} 9641 9642 instruct vabs8L_reg(vecZ dst, vecZ src) %{ 9643 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9644 match(Set dst (AbsVL src)); 9645 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} 9646 ins_encode %{ 9647 int vector_len = 2; 9648 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9649 %} 9650 ins_pipe( pipe_slow ); 9651 %} 9652 9653 // --------------------------------- ABSNEG -------------------------------------- 9654 9655 instruct vabsneg2D(vecX dst, vecX src, rRegI scratch) %{ 9656 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); 9657 match(Set dst (AbsVD src)); 9658 match(Set dst (NegVD src)); 9659 effect(TEMP scratch); 9660 format %{ "and(xor)pd $dst,$src,[mask]\t# absneg packed2D" %} 9661 ins_encode %{ 9662 int opcode = this->as_Mach()->ideal_Opcode(); 9663 XAR_Inst opinst = get_xar_inst(opcode); 9664 AddressLiteral adr = get_mask(opcode); 9665 if ($dst$$XMMRegister != $src$$XMMRegister) 9666 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 9667 (_masm.*opinst)($dst$$XMMRegister, adr, $scratch$$Register); 9668 %} 9669 ins_pipe( pipe_slow ); 9670 %} 9671 9672 instruct vabsneg4D(vecY dst, vecY src, rRegI scratch) %{ 9673 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9674 match(Set dst (AbsVD src)); 9675 match(Set dst (NegVD src)); 9676 effect(TEMP scratch); 9677 format %{ "vand(xor)pd $dst,$src,[mask]\t# absneg packed4D" %} 9678 ins_encode %{ 9679 int opcode = this->as_Mach()->ideal_Opcode(); 9680 XXAIR_Inst opinst = get_xxair_inst(opcode); 9681 AddressLiteral adr = get_mask(opcode); 9682 int vector_len = 1; 9683 (_masm.*opinst)($dst$$XMMRegister, $src$$XMMRegister, adr, vector_len, $scratch$$Register); 9684 %} 9685 ins_pipe( pipe_slow ); 9686 %} 9687 9688 instruct vabsneg8D(vecZ dst, vecZ src, rRegI scratch) %{ 9689 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9690 match(Set dst (AbsVD src)); 9691 match(Set dst (NegVD src)); 9692 effect(TEMP scratch); 9693 format %{ "vand(xor)pd $dst,$src,[mask]\t# absneg packed8D" %} 9694 ins_encode %{ 9695 int opcode = this->as_Mach()->ideal_Opcode(); 9696 XXAIR_Inst opinst = get_xxair_inst(opcode); 9697 AddressLiteral adr = get_mask(opcode); 9698 int vector_len = 2; 9699 (_masm.*opinst)($dst$$XMMRegister, $src$$XMMRegister, adr, vector_len, $scratch$$Register); 9700 %} 9701 ins_pipe( pipe_slow ); 9702 %} 9703 9704 instruct vabsneg2F(vecD dst, vecD src, rRegI scratch) %{ 9705 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 9706 match(Set dst (AbsVF src)); 9707 match(Set dst (NegVF src)); 9708 effect(TEMP scratch); 9709 format %{ "and(xor)ps $dst,$src,[mask]\t# absneg packed2F" %} 9710 ins_encode %{ 9711 int opcode = this->as_Mach()->ideal_Opcode(); 9712 XAR_Inst opinst = get_xar_inst(opcode); 9713 AddressLiteral adr = get_mask(opcode); 9714 if ($dst$$XMMRegister != $src$$XMMRegister) 9715 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 9716 (_masm.*opinst)($dst$$XMMRegister, adr, $scratch$$Register); 9717 %} 9718 ins_pipe( pipe_slow ); 9719 %} 9720 9721 instruct vabsneg4F(vecX dst, rRegI scratch) %{ 9722 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 9723 match(Set dst (AbsVF dst)); 9724 match(Set dst (NegVF dst)); 9725 effect(TEMP scratch); 9726 format %{ "vand(xor)ps $dst,[mask]\t# absneg packed4F" %} 9727 ins_cost(150); 9728 ins_encode %{ 9729 int opcode = this->as_Mach()->ideal_Opcode(); 9730 XAR_Inst opinst = get_xar_inst(opcode); 9731 AddressLiteral adr = get_mask(opcode); 9732 (_masm.*opinst)($dst$$XMMRegister, adr, $scratch$$Register); 9733 %} 9734 ins_pipe( pipe_slow ); 9735 %} 9736 9737 instruct vabsneg8F(vecY dst, vecY src, rRegI scratch) %{ 9738 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9739 match(Set dst (AbsVF src)); 9740 match(Set dst (NegVF src)); 9741 effect(TEMP scratch); 9742 format %{ "vand(xor)ps $dst,$src,[mask]\t# absneg packed8F" %} 9743 ins_cost(150); 9744 ins_encode %{ 9745 int opcode = this->as_Mach()->ideal_Opcode(); 9746 XXAIR_Inst opinst = get_xxair_inst(opcode); 9747 AddressLiteral adr = get_mask(opcode); 9748 int vector_len = 1; 9749 (_masm.*opinst)($dst$$XMMRegister, $src$$XMMRegister, adr, vector_len, $scratch$$Register); 9750 %} 9751 ins_pipe( pipe_slow ); 9752 %} 9753 9754 instruct vabsneg16F(vecZ dst, vecZ src, rRegI scratch) %{ 9755 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9756 match(Set dst (AbsVF src)); 9757 match(Set dst (NegVF src)); 9758 effect(TEMP scratch); 9759 format %{ "vand(xor)ps $dst,$src,[mask]\t# absneg packed16F" %} 9760 ins_cost(150); 9761 ins_encode %{ 9762 int opcode = this->as_Mach()->ideal_Opcode(); 9763 XXAIR_Inst opinst = get_xxair_inst(opcode); 9764 AddressLiteral adr = get_mask(opcode); 9765 int vector_len = 2; 9766 (_masm.*opinst)($dst$$XMMRegister, $src$$XMMRegister, adr, vector_len, $scratch$$Register); 9767 %} 9768 ins_pipe( pipe_slow ); 9769 %} 9770 9771 // --------------------------------- FMA -------------------------------------- 9772 9773 // a * b + c 9774 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9775 predicate(UseFMA && n->as_Vector()->length() == 2); 9776 match(Set c (FmaVD c (Binary a b))); 9777 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9778 ins_cost(150); 9779 ins_encode %{ 9780 int vector_len = 0; 9781 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9782 %} 9783 ins_pipe( pipe_slow ); 9784 %} 9785 9786 // a * b + c |