src/cpu/x86/vm/x86.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 6340864 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/x86.ad

Print this page




 483   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 484 #else
 485   static address float_signmask()  { return (address)float_signmask_pool; }
 486   static address float_signflip()  { return (address)float_signflip_pool; }
 487   static address double_signmask() { return (address)double_signmask_pool; }
 488   static address double_signflip() { return (address)double_signflip_pool; }
 489 #endif
 490 
 491 // Map Types to machine register types
 492 const int Matcher::base2reg[Type::lastype] = {
 493   Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
 494   Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
 495   Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
 496   Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
 497   0, 0/*abio*/,
 498   Op_RegP /* Return address */, 0, /* the memories */
 499   Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
 500   0  /*bottom*/
 501 };
 502 


















 503 // Max vector size in bytes. 0 if not supported.
 504 const int Matcher::vector_width_in_bytes(BasicType bt) {
 505   assert(is_java_primitive(bt), "only primitive type vectors");
 506   if (UseSSE < 2) return 0;
 507   // SSE2 supports 128bit vectors for all types.
 508   // AVX2 supports 256bit vectors for all types.
 509   int size = (UseAVX > 1) ? 32 : 16;
 510   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 511   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 512     size = 32;
 513   // Use flag to limit vector size.
 514   size = MIN2(size,(int)MaxVectorSize);
 515   // Minimum 2 values in vector (or 4 for bytes).
 516   switch (bt) {
 517   case T_DOUBLE:
 518   case T_LONG:
 519     if (size < 16) return 0;
 520   case T_FLOAT:
 521   case T_INT:
 522     if (size < 8) return 0;


1422   ins_pipe(pipe_slow);
1423 %}
1424 
1425 instruct absF_reg(regF dst) %{
1426   predicate((UseSSE>=1) && (UseAVX == 0));
1427   match(Set dst (AbsF dst));
1428   ins_cost(150);
1429   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
1430   ins_encode %{
1431     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1432   %}
1433   ins_pipe(pipe_slow);
1434 %}
1435 
1436 instruct absF_reg_reg(regF dst, regF src) %{
1437   predicate(UseAVX > 0);
1438   match(Set dst (AbsF src));
1439   ins_cost(150);
1440   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1441   ins_encode %{

1442     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1443               ExternalAddress(float_signmask()));
1444   %}
1445   ins_pipe(pipe_slow);
1446 %}
1447 
1448 instruct absD_reg(regD dst) %{
1449   predicate((UseSSE>=2) && (UseAVX == 0));
1450   match(Set dst (AbsD dst));
1451   ins_cost(150);
1452   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
1453             "# abs double by sign masking" %}
1454   ins_encode %{
1455     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1456   %}
1457   ins_pipe(pipe_slow);
1458 %}
1459 
1460 instruct absD_reg_reg(regD dst, regD src) %{
1461   predicate(UseAVX > 0);
1462   match(Set dst (AbsD src));
1463   ins_cost(150);
1464   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
1465             "# abs double by sign masking" %}
1466   ins_encode %{

1467     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1468               ExternalAddress(double_signmask()));
1469   %}
1470   ins_pipe(pipe_slow);
1471 %}
1472 
1473 instruct negF_reg(regF dst) %{
1474   predicate((UseSSE>=1) && (UseAVX == 0));
1475   match(Set dst (NegF dst));
1476   ins_cost(150);
1477   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
1478   ins_encode %{
1479     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1480   %}
1481   ins_pipe(pipe_slow);
1482 %}
1483 
1484 instruct negF_reg_reg(regF dst, regF src) %{
1485   predicate(UseAVX > 0);
1486   match(Set dst (NegF src));
1487   ins_cost(150);
1488   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1489   ins_encode %{

1490     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1491               ExternalAddress(float_signflip()));
1492   %}
1493   ins_pipe(pipe_slow);
1494 %}
1495 
1496 instruct negD_reg(regD dst) %{
1497   predicate((UseSSE>=2) && (UseAVX == 0));
1498   match(Set dst (NegD dst));
1499   ins_cost(150);
1500   format %{ "xorpd   $dst, [0x8000000000000000]\t"
1501             "# neg double by sign flipping" %}
1502   ins_encode %{
1503     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1504   %}
1505   ins_pipe(pipe_slow);
1506 %}
1507 
1508 instruct negD_reg_reg(regD dst, regD src) %{
1509   predicate(UseAVX > 0);
1510   match(Set dst (NegD src));
1511   ins_cost(150);
1512   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
1513             "# neg double by sign flipping" %}
1514   ins_encode %{

1515     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1516               ExternalAddress(double_signflip()));
1517   %}
1518   ins_pipe(pipe_slow);
1519 %}
1520 
1521 instruct sqrtF_reg(regF dst, regF src) %{
1522   predicate(UseSSE>=1);
1523   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1524 
1525   format %{ "sqrtss  $dst, $src" %}
1526   ins_cost(150);
1527   ins_encode %{
1528     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1529   %}
1530   ins_pipe(pipe_slow);
1531 %}
1532 
1533 instruct sqrtF_mem(regF dst, memory src) %{
1534   predicate(UseSSE>=1);
1535   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1536 


2365   predicate(n->as_Vector()->length() == 2);
2366   match(Set dst (ReplicateD zero));
2367   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
2368   ins_encode %{
2369     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2370   %}
2371   ins_pipe( fpu_reg_reg );
2372 %}
2373 
2374 instruct Repl4D_zero(vecY dst, immD0 zero) %{
2375   predicate(n->as_Vector()->length() == 4);
2376   match(Set dst (ReplicateD zero));
2377   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2378   ins_encode %{
2379     bool vector256 = true;
2380     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2381   %}
2382   ins_pipe( fpu_reg_reg );
2383 %}
2384 
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 483   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 484 #else
 485   static address float_signmask()  { return (address)float_signmask_pool; }
 486   static address float_signflip()  { return (address)float_signflip_pool; }
 487   static address double_signmask() { return (address)double_signmask_pool; }
 488   static address double_signflip() { return (address)double_signflip_pool; }
 489 #endif
 490 
 491 // Map Types to machine register types
 492 const int Matcher::base2reg[Type::lastype] = {
 493   Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
 494   Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
 495   Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
 496   Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
 497   0, 0/*abio*/,
 498   Op_RegP /* Return address */, 0, /* the memories */
 499   Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
 500   0  /*bottom*/
 501 };
 502 
 503 const bool Matcher::match_rule_supported(int opcode) {
 504   if (!has_match_rule(opcode))
 505     return false;
 506 
 507   switch (opcode) {
 508     case Op_PopCountI:
 509     case Op_PopCountL:
 510       if (!UsePopCountInstruction)
 511         return false;
 512     case Op_MulVI:
 513       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
 514         return false;
 515     break;
 516   }
 517 
 518   return true;  // Per default match rules are supported.
 519 }
 520 
 521 // Max vector size in bytes. 0 if not supported.
 522 const int Matcher::vector_width_in_bytes(BasicType bt) {
 523   assert(is_java_primitive(bt), "only primitive type vectors");
 524   if (UseSSE < 2) return 0;
 525   // SSE2 supports 128bit vectors for all types.
 526   // AVX2 supports 256bit vectors for all types.
 527   int size = (UseAVX > 1) ? 32 : 16;
 528   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 529   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 530     size = 32;
 531   // Use flag to limit vector size.
 532   size = MIN2(size,(int)MaxVectorSize);
 533   // Minimum 2 values in vector (or 4 for bytes).
 534   switch (bt) {
 535   case T_DOUBLE:
 536   case T_LONG:
 537     if (size < 16) return 0;
 538   case T_FLOAT:
 539   case T_INT:
 540     if (size < 8) return 0;


1440   ins_pipe(pipe_slow);
1441 %}
1442 
1443 instruct absF_reg(regF dst) %{
1444   predicate((UseSSE>=1) && (UseAVX == 0));
1445   match(Set dst (AbsF dst));
1446   ins_cost(150);
1447   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
1448   ins_encode %{
1449     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1450   %}
1451   ins_pipe(pipe_slow);
1452 %}
1453 
1454 instruct absF_reg_reg(regF dst, regF src) %{
1455   predicate(UseAVX > 0);
1456   match(Set dst (AbsF src));
1457   ins_cost(150);
1458   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1459   ins_encode %{
1460     bool vector256 = false;
1461     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1462               ExternalAddress(float_signmask()), vector256);
1463   %}
1464   ins_pipe(pipe_slow);
1465 %}
1466 
1467 instruct absD_reg(regD dst) %{
1468   predicate((UseSSE>=2) && (UseAVX == 0));
1469   match(Set dst (AbsD dst));
1470   ins_cost(150);
1471   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
1472             "# abs double by sign masking" %}
1473   ins_encode %{
1474     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1475   %}
1476   ins_pipe(pipe_slow);
1477 %}
1478 
1479 instruct absD_reg_reg(regD dst, regD src) %{
1480   predicate(UseAVX > 0);
1481   match(Set dst (AbsD src));
1482   ins_cost(150);
1483   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
1484             "# abs double by sign masking" %}
1485   ins_encode %{
1486     bool vector256 = false;
1487     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1488               ExternalAddress(double_signmask()), vector256);
1489   %}
1490   ins_pipe(pipe_slow);
1491 %}
1492 
1493 instruct negF_reg(regF dst) %{
1494   predicate((UseSSE>=1) && (UseAVX == 0));
1495   match(Set dst (NegF dst));
1496   ins_cost(150);
1497   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
1498   ins_encode %{
1499     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1500   %}
1501   ins_pipe(pipe_slow);
1502 %}
1503 
1504 instruct negF_reg_reg(regF dst, regF src) %{
1505   predicate(UseAVX > 0);
1506   match(Set dst (NegF src));
1507   ins_cost(150);
1508   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1509   ins_encode %{
1510     bool vector256 = false;
1511     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1512               ExternalAddress(float_signflip()), vector256);
1513   %}
1514   ins_pipe(pipe_slow);
1515 %}
1516 
1517 instruct negD_reg(regD dst) %{
1518   predicate((UseSSE>=2) && (UseAVX == 0));
1519   match(Set dst (NegD dst));
1520   ins_cost(150);
1521   format %{ "xorpd   $dst, [0x8000000000000000]\t"
1522             "# neg double by sign flipping" %}
1523   ins_encode %{
1524     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1525   %}
1526   ins_pipe(pipe_slow);
1527 %}
1528 
1529 instruct negD_reg_reg(regD dst, regD src) %{
1530   predicate(UseAVX > 0);
1531   match(Set dst (NegD src));
1532   ins_cost(150);
1533   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
1534             "# neg double by sign flipping" %}
1535   ins_encode %{
1536     bool vector256 = false;
1537     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1538               ExternalAddress(double_signflip()), vector256);
1539   %}
1540   ins_pipe(pipe_slow);
1541 %}
1542 
1543 instruct sqrtF_reg(regF dst, regF src) %{
1544   predicate(UseSSE>=1);
1545   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1546 
1547   format %{ "sqrtss  $dst, $src" %}
1548   ins_cost(150);
1549   ins_encode %{
1550     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1551   %}
1552   ins_pipe(pipe_slow);
1553 %}
1554 
1555 instruct sqrtF_mem(regF dst, memory src) %{
1556   predicate(UseSSE>=1);
1557   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1558 


2387   predicate(n->as_Vector()->length() == 2);
2388   match(Set dst (ReplicateD zero));
2389   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
2390   ins_encode %{
2391     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2392   %}
2393   ins_pipe( fpu_reg_reg );
2394 %}
2395 
2396 instruct Repl4D_zero(vecY dst, immD0 zero) %{
2397   predicate(n->as_Vector()->length() == 4);
2398   match(Set dst (ReplicateD zero));
2399   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2400   ins_encode %{
2401     bool vector256 = true;
2402     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2403   %}
2404   ins_pipe( fpu_reg_reg );
2405 %}
2406 
2407 // ====================VECTOR ARITHMETIC=======================================
2408 
2409 // --------------------------------- ADD --------------------------------------
2410 
2411 // Bytes vector add
2412 instruct vadd4B(vecS dst, vecS src) %{
2413   predicate(n->as_Vector()->length() == 4);
2414   match(Set dst (AddVB dst src));
2415   format %{ "paddb   $dst,$src\t! add packed4B" %}
2416   ins_encode %{
2417     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2418   %}
2419   ins_pipe( pipe_slow );
2420 %}
2421 
2422 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
2423   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2424   match(Set dst (AddVB src1 src2));
2425   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
2426   ins_encode %{
2427     bool vector256 = false;
2428     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2429   %}
2430   ins_pipe( pipe_slow );
2431 %}
2432 
2433 instruct vadd8B(vecD dst, vecD src) %{
2434   predicate(n->as_Vector()->length() == 8);
2435   match(Set dst (AddVB dst src));
2436   format %{ "paddb   $dst,$src\t! add packed8B" %}
2437   ins_encode %{
2438     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2439   %}
2440   ins_pipe( pipe_slow );
2441 %}
2442 
2443 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
2444   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2445   match(Set dst (AddVB src1 src2));
2446   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
2447   ins_encode %{
2448     bool vector256 = false;
2449     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2450   %}
2451   ins_pipe( pipe_slow );
2452 %}
2453 
2454 instruct vadd16B(vecX dst, vecX src) %{
2455   predicate(n->as_Vector()->length() == 16);
2456   match(Set dst (AddVB dst src));
2457   format %{ "paddb   $dst,$src\t! add packed16B" %}
2458   ins_encode %{
2459     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2460   %}
2461   ins_pipe( pipe_slow );
2462 %}
2463 
2464 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
2465   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2466   match(Set dst (AddVB src1 src2));
2467   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
2468   ins_encode %{
2469     bool vector256 = false;
2470     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2471   %}
2472   ins_pipe( pipe_slow );
2473 %}
2474 
2475 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
2476   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2477   match(Set dst (AddVB src (LoadVector mem)));
2478   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
2479   ins_encode %{
2480     bool vector256 = false;
2481     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2482   %}
2483   ins_pipe( pipe_slow );
2484 %}
2485 
2486 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
2487   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2488   match(Set dst (AddVB src1 src2));
2489   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
2490   ins_encode %{
2491     bool vector256 = true;
2492     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2493   %}
2494   ins_pipe( pipe_slow );
2495 %}
2496 
2497 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
2498   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2499   match(Set dst (AddVB src (LoadVector mem)));
2500   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
2501   ins_encode %{
2502     bool vector256 = true;
2503     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2504   %}
2505   ins_pipe( pipe_slow );
2506 %}
2507 
2508 // Shorts/Chars vector add
2509 instruct vadd2S(vecS dst, vecS src) %{
2510   predicate(n->as_Vector()->length() == 2);
2511   match(Set dst (AddVS dst src));
2512   format %{ "paddw   $dst,$src\t! add packed2S" %}
2513   ins_encode %{
2514     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2515   %}
2516   ins_pipe( pipe_slow );
2517 %}
2518 
2519 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
2520   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2521   match(Set dst (AddVS src1 src2));
2522   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
2523   ins_encode %{
2524     bool vector256 = false;
2525     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2526   %}
2527   ins_pipe( pipe_slow );
2528 %}
2529 
2530 instruct vadd4S(vecD dst, vecD src) %{
2531   predicate(n->as_Vector()->length() == 4);
2532   match(Set dst (AddVS dst src));
2533   format %{ "paddw   $dst,$src\t! add packed4S" %}
2534   ins_encode %{
2535     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2536   %}
2537   ins_pipe( pipe_slow );
2538 %}
2539 
2540 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
2541   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2542   match(Set dst (AddVS src1 src2));
2543   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
2544   ins_encode %{
2545     bool vector256 = false;
2546     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2547   %}
2548   ins_pipe( pipe_slow );
2549 %}
2550 
2551 instruct vadd8S(vecX dst, vecX src) %{
2552   predicate(n->as_Vector()->length() == 8);
2553   match(Set dst (AddVS dst src));
2554   format %{ "paddw   $dst,$src\t! add packed8S" %}
2555   ins_encode %{
2556     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2557   %}
2558   ins_pipe( pipe_slow );
2559 %}
2560 
2561 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
2562   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2563   match(Set dst (AddVS src1 src2));
2564   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
2565   ins_encode %{
2566     bool vector256 = false;
2567     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2568   %}
2569   ins_pipe( pipe_slow );
2570 %}
2571 
2572 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
2573   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2574   match(Set dst (AddVS src (LoadVector mem)));
2575   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
2576   ins_encode %{
2577     bool vector256 = false;
2578     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2579   %}
2580   ins_pipe( pipe_slow );
2581 %}
2582 
2583 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
2584   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2585   match(Set dst (AddVS src1 src2));
2586   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
2587   ins_encode %{
2588     bool vector256 = true;
2589     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2590   %}
2591   ins_pipe( pipe_slow );
2592 %}
2593 
2594 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
2595   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2596   match(Set dst (AddVS src (LoadVector mem)));
2597   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
2598   ins_encode %{
2599     bool vector256 = true;
2600     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2601   %}
2602   ins_pipe( pipe_slow );
2603 %}
2604 
2605 // Integers vector add
2606 instruct vadd2I(vecD dst, vecD src) %{
2607   predicate(n->as_Vector()->length() == 2);
2608   match(Set dst (AddVI dst src));
2609   format %{ "paddd   $dst,$src\t! add packed2I" %}
2610   ins_encode %{
2611     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2612   %}
2613   ins_pipe( pipe_slow );
2614 %}
2615 
2616 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
2617   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2618   match(Set dst (AddVI src1 src2));
2619   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
2620   ins_encode %{
2621     bool vector256 = false;
2622     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2623   %}
2624   ins_pipe( pipe_slow );
2625 %}
2626 
2627 instruct vadd4I(vecX dst, vecX src) %{
2628   predicate(n->as_Vector()->length() == 4);
2629   match(Set dst (AddVI dst src));
2630   format %{ "paddd   $dst,$src\t! add packed4I" %}
2631   ins_encode %{
2632     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2633   %}
2634   ins_pipe( pipe_slow );
2635 %}
2636 
2637 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
2638   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2639   match(Set dst (AddVI src1 src2));
2640   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
2641   ins_encode %{
2642     bool vector256 = false;
2643     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2644   %}
2645   ins_pipe( pipe_slow );
2646 %}
2647 
2648 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
2649   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2650   match(Set dst (AddVI src (LoadVector mem)));
2651   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
2652   ins_encode %{
2653     bool vector256 = false;
2654     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2655   %}
2656   ins_pipe( pipe_slow );
2657 %}
2658 
2659 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
2660   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2661   match(Set dst (AddVI src1 src2));
2662   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
2663   ins_encode %{
2664     bool vector256 = true;
2665     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2666   %}
2667   ins_pipe( pipe_slow );
2668 %}
2669 
2670 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
2671   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2672   match(Set dst (AddVI src (LoadVector mem)));
2673   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
2674   ins_encode %{
2675     bool vector256 = true;
2676     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2677   %}
2678   ins_pipe( pipe_slow );
2679 %}
2680 
2681 // Longs vector add
2682 instruct vadd2L(vecX dst, vecX src) %{
2683   predicate(n->as_Vector()->length() == 2);
2684   match(Set dst (AddVL dst src));
2685   format %{ "paddq   $dst,$src\t! add packed2L" %}
2686   ins_encode %{
2687     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
2688   %}
2689   ins_pipe( pipe_slow );
2690 %}
2691 
2692 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
2693   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2694   match(Set dst (AddVL src1 src2));
2695   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
2696   ins_encode %{
2697     bool vector256 = false;
2698     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2699   %}
2700   ins_pipe( pipe_slow );
2701 %}
2702 
2703 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
2704   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2705   match(Set dst (AddVL src (LoadVector mem)));
2706   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
2707   ins_encode %{
2708     bool vector256 = false;
2709     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2710   %}
2711   ins_pipe( pipe_slow );
2712 %}
2713 
2714 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
2715   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2716   match(Set dst (AddVL src1 src2));
2717   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
2718   ins_encode %{
2719     bool vector256 = true;
2720     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2721   %}
2722   ins_pipe( pipe_slow );
2723 %}
2724 
2725 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
2726   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2727   match(Set dst (AddVL src (LoadVector mem)));
2728   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
2729   ins_encode %{
2730     bool vector256 = true;
2731     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2732   %}
2733   ins_pipe( pipe_slow );
2734 %}
2735 
2736 // Floats vector add
2737 instruct vadd2F(vecD dst, vecD src) %{
2738   predicate(n->as_Vector()->length() == 2);
2739   match(Set dst (AddVF dst src));
2740   format %{ "addps   $dst,$src\t! add packed2F" %}
2741   ins_encode %{
2742     __ addps($dst$$XMMRegister, $src$$XMMRegister);
2743   %}
2744   ins_pipe( pipe_slow );
2745 %}
2746 
2747 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
2748   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2749   match(Set dst (AddVF src1 src2));
2750   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
2751   ins_encode %{
2752     bool vector256 = false;
2753     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2754   %}
2755   ins_pipe( pipe_slow );
2756 %}
2757 
2758 instruct vadd4F(vecX dst, vecX src) %{
2759   predicate(n->as_Vector()->length() == 4);
2760   match(Set dst (AddVF dst src));
2761   format %{ "addps   $dst,$src\t! add packed4F" %}
2762   ins_encode %{
2763     __ addps($dst$$XMMRegister, $src$$XMMRegister);
2764   %}
2765   ins_pipe( pipe_slow );
2766 %}
2767 
2768 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
2769   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2770   match(Set dst (AddVF src1 src2));
2771   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
2772   ins_encode %{
2773     bool vector256 = false;
2774     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2775   %}
2776   ins_pipe( pipe_slow );
2777 %}
2778 
2779 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
2780   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2781   match(Set dst (AddVF src (LoadVector mem)));
2782   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
2783   ins_encode %{
2784     bool vector256 = false;
2785     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2786   %}
2787   ins_pipe( pipe_slow );
2788 %}
2789 
2790 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
2791   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2792   match(Set dst (AddVF src1 src2));
2793   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
2794   ins_encode %{
2795     bool vector256 = true;
2796     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2797   %}
2798   ins_pipe( pipe_slow );
2799 %}
2800 
2801 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
2802   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2803   match(Set dst (AddVF src (LoadVector mem)));
2804   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
2805   ins_encode %{
2806     bool vector256 = true;
2807     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2808   %}
2809   ins_pipe( pipe_slow );
2810 %}
2811 
2812 // Doubles vector add
2813 instruct vadd2D(vecX dst, vecX src) %{
2814   predicate(n->as_Vector()->length() == 2);
2815   match(Set dst (AddVD dst src));
2816   format %{ "addpd   $dst,$src\t! add packed2D" %}
2817   ins_encode %{
2818     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
2819   %}
2820   ins_pipe( pipe_slow );
2821 %}
2822 
2823 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
2824   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2825   match(Set dst (AddVD src1 src2));
2826   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
2827   ins_encode %{
2828     bool vector256 = false;
2829     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2830   %}
2831   ins_pipe( pipe_slow );
2832 %}
2833 
2834 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
2835   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2836   match(Set dst (AddVD src (LoadVector mem)));
2837   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
2838   ins_encode %{
2839     bool vector256 = false;
2840     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2841   %}
2842   ins_pipe( pipe_slow );
2843 %}
2844 
2845 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
2846   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2847   match(Set dst (AddVD src1 src2));
2848   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
2849   ins_encode %{
2850     bool vector256 = true;
2851     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2852   %}
2853   ins_pipe( pipe_slow );
2854 %}
2855 
2856 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
2857   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2858   match(Set dst (AddVD src (LoadVector mem)));
2859   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
2860   ins_encode %{
2861     bool vector256 = true;
2862     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2863   %}
2864   ins_pipe( pipe_slow );
2865 %}
2866 
2867 // --------------------------------- SUB --------------------------------------
2868 
2869 // Bytes vector sub
2870 instruct vsub4B(vecS dst, vecS src) %{
2871   predicate(n->as_Vector()->length() == 4);
2872   match(Set dst (SubVB dst src));
2873   format %{ "psubb   $dst,$src\t! sub packed4B" %}
2874   ins_encode %{
2875     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2876   %}
2877   ins_pipe( pipe_slow );
2878 %}
2879 
2880 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
2881   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2882   match(Set dst (SubVB src1 src2));
2883   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
2884   ins_encode %{
2885     bool vector256 = false;
2886     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2887   %}
2888   ins_pipe( pipe_slow );
2889 %}
2890 
2891 instruct vsub8B(vecD dst, vecD src) %{
2892   predicate(n->as_Vector()->length() == 8);
2893   match(Set dst (SubVB dst src));
2894   format %{ "psubb   $dst,$src\t! sub packed8B" %}
2895   ins_encode %{
2896     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2897   %}
2898   ins_pipe( pipe_slow );
2899 %}
2900 
2901 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
2902   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2903   match(Set dst (SubVB src1 src2));
2904   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
2905   ins_encode %{
2906     bool vector256 = false;
2907     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2908   %}
2909   ins_pipe( pipe_slow );
2910 %}
2911 
2912 instruct vsub16B(vecX dst, vecX src) %{
2913   predicate(n->as_Vector()->length() == 16);
2914   match(Set dst (SubVB dst src));
2915   format %{ "psubb   $dst,$src\t! sub packed16B" %}
2916   ins_encode %{
2917     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2918   %}
2919   ins_pipe( pipe_slow );
2920 %}
2921 
2922 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
2923   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2924   match(Set dst (SubVB src1 src2));
2925   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
2926   ins_encode %{
2927     bool vector256 = false;
2928     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2929   %}
2930   ins_pipe( pipe_slow );
2931 %}
2932 
2933 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
2934   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2935   match(Set dst (SubVB src (LoadVector mem)));
2936   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
2937   ins_encode %{
2938     bool vector256 = false;
2939     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2940   %}
2941   ins_pipe( pipe_slow );
2942 %}
2943 
2944 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
2945   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2946   match(Set dst (SubVB src1 src2));
2947   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
2948   ins_encode %{
2949     bool vector256 = true;
2950     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2951   %}
2952   ins_pipe( pipe_slow );
2953 %}
2954 
2955 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
2956   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2957   match(Set dst (SubVB src (LoadVector mem)));
2958   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
2959   ins_encode %{
2960     bool vector256 = true;
2961     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2962   %}
2963   ins_pipe( pipe_slow );
2964 %}
2965 
2966 // Shorts/Chars vector sub
2967 instruct vsub2S(vecS dst, vecS src) %{
2968   predicate(n->as_Vector()->length() == 2);
2969   match(Set dst (SubVS dst src));
2970   format %{ "psubw   $dst,$src\t! sub packed2S" %}
2971   ins_encode %{
2972     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2973   %}
2974   ins_pipe( pipe_slow );
2975 %}
2976 
2977 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
2978   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2979   match(Set dst (SubVS src1 src2));
2980   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
2981   ins_encode %{
2982     bool vector256 = false;
2983     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2984   %}
2985   ins_pipe( pipe_slow );
2986 %}
2987 
2988 instruct vsub4S(vecD dst, vecD src) %{
2989   predicate(n->as_Vector()->length() == 4);
2990   match(Set dst (SubVS dst src));
2991   format %{ "psubw   $dst,$src\t! sub packed4S" %}
2992   ins_encode %{
2993     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2994   %}
2995   ins_pipe( pipe_slow );
2996 %}
2997 
2998 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
2999   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3000   match(Set dst (SubVS src1 src2));
3001   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
3002   ins_encode %{
3003     bool vector256 = false;
3004     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3005   %}
3006   ins_pipe( pipe_slow );
3007 %}
3008 
3009 instruct vsub8S(vecX dst, vecX src) %{
3010   predicate(n->as_Vector()->length() == 8);
3011   match(Set dst (SubVS dst src));
3012   format %{ "psubw   $dst,$src\t! sub packed8S" %}
3013   ins_encode %{
3014     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3015   %}
3016   ins_pipe( pipe_slow );
3017 %}
3018 
3019 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3020   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3021   match(Set dst (SubVS src1 src2));
3022   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
3023   ins_encode %{
3024     bool vector256 = false;
3025     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3026   %}
3027   ins_pipe( pipe_slow );
3028 %}
3029 
3030 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3031   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3032   match(Set dst (SubVS src (LoadVector mem)));
3033   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
3034   ins_encode %{
3035     bool vector256 = false;
3036     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3037   %}
3038   ins_pipe( pipe_slow );
3039 %}
3040 
3041 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3042   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3043   match(Set dst (SubVS src1 src2));
3044   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
3045   ins_encode %{
3046     bool vector256 = true;
3047     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3048   %}
3049   ins_pipe( pipe_slow );
3050 %}
3051 
3052 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3053   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3054   match(Set dst (SubVS src (LoadVector mem)));
3055   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
3056   ins_encode %{
3057     bool vector256 = true;
3058     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3059   %}
3060   ins_pipe( pipe_slow );
3061 %}
3062 
3063 // Integers vector sub
3064 instruct vsub2I(vecD dst, vecD src) %{
3065   predicate(n->as_Vector()->length() == 2);
3066   match(Set dst (SubVI dst src));
3067   format %{ "psubd   $dst,$src\t! sub packed2I" %}
3068   ins_encode %{
3069     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3070   %}
3071   ins_pipe( pipe_slow );
3072 %}
3073 
3074 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3075   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3076   match(Set dst (SubVI src1 src2));
3077   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
3078   ins_encode %{
3079     bool vector256 = false;
3080     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3081   %}
3082   ins_pipe( pipe_slow );
3083 %}
3084 
3085 instruct vsub4I(vecX dst, vecX src) %{
3086   predicate(n->as_Vector()->length() == 4);
3087   match(Set dst (SubVI dst src));
3088   format %{ "psubd   $dst,$src\t! sub packed4I" %}
3089   ins_encode %{
3090     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3091   %}
3092   ins_pipe( pipe_slow );
3093 %}
3094 
3095 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3096   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3097   match(Set dst (SubVI src1 src2));
3098   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
3099   ins_encode %{
3100     bool vector256 = false;
3101     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3102   %}
3103   ins_pipe( pipe_slow );
3104 %}
3105 
3106 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3107   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3108   match(Set dst (SubVI src (LoadVector mem)));
3109   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
3110   ins_encode %{
3111     bool vector256 = false;
3112     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3113   %}
3114   ins_pipe( pipe_slow );
3115 %}
3116 
3117 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3118   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3119   match(Set dst (SubVI src1 src2));
3120   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
3121   ins_encode %{
3122     bool vector256 = true;
3123     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3124   %}
3125   ins_pipe( pipe_slow );
3126 %}
3127 
3128 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3129   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3130   match(Set dst (SubVI src (LoadVector mem)));
3131   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
3132   ins_encode %{
3133     bool vector256 = true;
3134     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3135   %}
3136   ins_pipe( pipe_slow );
3137 %}
3138 
3139 // Longs vector sub
3140 instruct vsub2L(vecX dst, vecX src) %{
3141   predicate(n->as_Vector()->length() == 2);
3142   match(Set dst (SubVL dst src));
3143   format %{ "psubq   $dst,$src\t! sub packed2L" %}
3144   ins_encode %{
3145     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3146   %}
3147   ins_pipe( pipe_slow );
3148 %}
3149 
3150 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3151   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3152   match(Set dst (SubVL src1 src2));
3153   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
3154   ins_encode %{
3155     bool vector256 = false;
3156     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3157   %}
3158   ins_pipe( pipe_slow );
3159 %}
3160 
3161 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3162   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3163   match(Set dst (SubVL src (LoadVector mem)));
3164   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
3165   ins_encode %{
3166     bool vector256 = false;
3167     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3168   %}
3169   ins_pipe( pipe_slow );
3170 %}
3171 
3172 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3173   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3174   match(Set dst (SubVL src1 src2));
3175   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
3176   ins_encode %{
3177     bool vector256 = true;
3178     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3179   %}
3180   ins_pipe( pipe_slow );
3181 %}
3182 
3183 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3184   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3185   match(Set dst (SubVL src (LoadVector mem)));
3186   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
3187   ins_encode %{
3188     bool vector256 = true;
3189     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3190   %}
3191   ins_pipe( pipe_slow );
3192 %}
3193 
3194 // Floats vector sub
3195 instruct vsub2F(vecD dst, vecD src) %{
3196   predicate(n->as_Vector()->length() == 2);
3197   match(Set dst (SubVF dst src));
3198   format %{ "subps   $dst,$src\t! sub packed2F" %}
3199   ins_encode %{
3200     __ subps($dst$$XMMRegister, $src$$XMMRegister);
3201   %}
3202   ins_pipe( pipe_slow );
3203 %}
3204 
3205 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3206   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3207   match(Set dst (SubVF src1 src2));
3208   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
3209   ins_encode %{
3210     bool vector256 = false;
3211     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3212   %}
3213   ins_pipe( pipe_slow );
3214 %}
3215 
3216 instruct vsub4F(vecX dst, vecX src) %{
3217   predicate(n->as_Vector()->length() == 4);
3218   match(Set dst (SubVF dst src));
3219   format %{ "subps   $dst,$src\t! sub packed4F" %}
3220   ins_encode %{
3221     __ subps($dst$$XMMRegister, $src$$XMMRegister);
3222   %}
3223   ins_pipe( pipe_slow );
3224 %}
3225 
3226 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3227   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3228   match(Set dst (SubVF src1 src2));
3229   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
3230   ins_encode %{
3231     bool vector256 = false;
3232     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3233   %}
3234   ins_pipe( pipe_slow );
3235 %}
3236 
3237 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3238   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3239   match(Set dst (SubVF src (LoadVector mem)));
3240   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
3241   ins_encode %{
3242     bool vector256 = false;
3243     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3244   %}
3245   ins_pipe( pipe_slow );
3246 %}
3247 
3248 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3249   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3250   match(Set dst (SubVF src1 src2));
3251   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
3252   ins_encode %{
3253     bool vector256 = true;
3254     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3255   %}
3256   ins_pipe( pipe_slow );
3257 %}
3258 
3259 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3260   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3261   match(Set dst (SubVF src (LoadVector mem)));
3262   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
3263   ins_encode %{
3264     bool vector256 = true;
3265     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3266   %}
3267   ins_pipe( pipe_slow );
3268 %}
3269 
3270 // Doubles vector sub
3271 instruct vsub2D(vecX dst, vecX src) %{
3272   predicate(n->as_Vector()->length() == 2);
3273   match(Set dst (SubVD dst src));
3274   format %{ "subpd   $dst,$src\t! sub packed2D" %}
3275   ins_encode %{
3276     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3277   %}
3278   ins_pipe( pipe_slow );
3279 %}
3280 
3281 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3282   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3283   match(Set dst (SubVD src1 src2));
3284   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
3285   ins_encode %{
3286     bool vector256 = false;
3287     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3288   %}
3289   ins_pipe( pipe_slow );
3290 %}
3291 
3292 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
3293   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3294   match(Set dst (SubVD src (LoadVector mem)));
3295   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
3296   ins_encode %{
3297     bool vector256 = false;
3298     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3299   %}
3300   ins_pipe( pipe_slow );
3301 %}
3302 
3303 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
3304   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3305   match(Set dst (SubVD src1 src2));
3306   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
3307   ins_encode %{
3308     bool vector256 = true;
3309     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3310   %}
3311   ins_pipe( pipe_slow );
3312 %}
3313 
3314 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
3315   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3316   match(Set dst (SubVD src (LoadVector mem)));
3317   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
3318   ins_encode %{
3319     bool vector256 = true;
3320     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3321   %}
3322   ins_pipe( pipe_slow );
3323 %}
3324 
3325 // --------------------------------- MUL --------------------------------------
3326 
3327 // Shorts/Chars vector mul
3328 instruct vmul2S(vecS dst, vecS src) %{
3329   predicate(n->as_Vector()->length() == 2);
3330   match(Set dst (MulVS dst src));
3331   format %{ "pmullw $dst,$src\t! mul packed2S" %}
3332   ins_encode %{
3333     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3334   %}
3335   ins_pipe( pipe_slow );
3336 %}
3337 
3338 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
3339   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3340   match(Set dst (MulVS src1 src2));
3341   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
3342   ins_encode %{
3343     bool vector256 = false;
3344     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3345   %}
3346   ins_pipe( pipe_slow );
3347 %}
3348 
3349 instruct vmul4S(vecD dst, vecD src) %{
3350   predicate(n->as_Vector()->length() == 4);
3351   match(Set dst (MulVS dst src));
3352   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
3353   ins_encode %{
3354     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3355   %}
3356   ins_pipe( pipe_slow );
3357 %}
3358 
3359 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
3360   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3361   match(Set dst (MulVS src1 src2));
3362   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
3363   ins_encode %{
3364     bool vector256 = false;
3365     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3366   %}
3367   ins_pipe( pipe_slow );
3368 %}
3369 
3370 instruct vmul8S(vecX dst, vecX src) %{
3371   predicate(n->as_Vector()->length() == 8);
3372   match(Set dst (MulVS dst src));
3373   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
3374   ins_encode %{
3375     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3376   %}
3377   ins_pipe( pipe_slow );
3378 %}
3379 
3380 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
3381   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3382   match(Set dst (MulVS src1 src2));
3383   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
3384   ins_encode %{
3385     bool vector256 = false;
3386     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3387   %}
3388   ins_pipe( pipe_slow );
3389 %}
3390 
3391 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
3392   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3393   match(Set dst (MulVS src (LoadVector mem)));
3394   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
3395   ins_encode %{
3396     bool vector256 = false;
3397     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3398   %}
3399   ins_pipe( pipe_slow );
3400 %}
3401 
3402 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
3403   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3404   match(Set dst (MulVS src1 src2));
3405   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
3406   ins_encode %{
3407     bool vector256 = true;
3408     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3409   %}
3410   ins_pipe( pipe_slow );
3411 %}
3412 
3413 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
3414   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3415   match(Set dst (MulVS src (LoadVector mem)));
3416   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
3417   ins_encode %{
3418     bool vector256 = true;
3419     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3420   %}
3421   ins_pipe( pipe_slow );
3422 %}
3423 
3424 // Integers vector mul (sse4_1)
3425 instruct vmul2I(vecD dst, vecD src) %{
3426   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
3427   match(Set dst (MulVI dst src));
3428   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
3429   ins_encode %{
3430     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3431   %}
3432   ins_pipe( pipe_slow );
3433 %}
3434 
3435 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
3436   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3437   match(Set dst (MulVI src1 src2));
3438   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
3439   ins_encode %{
3440     bool vector256 = false;
3441     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3442   %}
3443   ins_pipe( pipe_slow );
3444 %}
3445 
3446 instruct vmul4I(vecX dst, vecX src) %{
3447   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
3448   match(Set dst (MulVI dst src));
3449   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
3450   ins_encode %{
3451     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3452   %}
3453   ins_pipe( pipe_slow );
3454 %}
3455 
3456 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
3457   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3458   match(Set dst (MulVI src1 src2));
3459   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
3460   ins_encode %{
3461     bool vector256 = false;
3462     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3463   %}
3464   ins_pipe( pipe_slow );
3465 %}
3466 
3467 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
3468   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3469   match(Set dst (MulVI src (LoadVector mem)));
3470   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
3471   ins_encode %{
3472     bool vector256 = false;
3473     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3474   %}
3475   ins_pipe( pipe_slow );
3476 %}
3477 
3478 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
3479   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3480   match(Set dst (MulVI src1 src2));
3481   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
3482   ins_encode %{
3483     bool vector256 = true;
3484     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3485   %}
3486   ins_pipe( pipe_slow );
3487 %}
3488 
3489 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
3490   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3491   match(Set dst (MulVI src (LoadVector mem)));
3492   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
3493   ins_encode %{
3494     bool vector256 = true;
3495     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3496   %}
3497   ins_pipe( pipe_slow );
3498 %}
3499 
3500 // Floats vector mul
3501 instruct vmul2F(vecD dst, vecD src) %{
3502   predicate(n->as_Vector()->length() == 2);
3503   match(Set dst (MulVF dst src));
3504   format %{ "mulps   $dst,$src\t! mul packed2F" %}
3505   ins_encode %{
3506     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3507   %}
3508   ins_pipe( pipe_slow );
3509 %}
3510 
3511 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
3512   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3513   match(Set dst (MulVF src1 src2));
3514   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
3515   ins_encode %{
3516     bool vector256 = false;
3517     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3518   %}
3519   ins_pipe( pipe_slow );
3520 %}
3521 
3522 instruct vmul4F(vecX dst, vecX src) %{
3523   predicate(n->as_Vector()->length() == 4);
3524   match(Set dst (MulVF dst src));
3525   format %{ "mulps   $dst,$src\t! mul packed4F" %}
3526   ins_encode %{
3527     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3528   %}
3529   ins_pipe( pipe_slow );
3530 %}
3531 
3532 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
3533   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3534   match(Set dst (MulVF src1 src2));
3535   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
3536   ins_encode %{
3537     bool vector256 = false;
3538     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3539   %}
3540   ins_pipe( pipe_slow );
3541 %}
3542 
3543 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
3544   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3545   match(Set dst (MulVF src (LoadVector mem)));
3546   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
3547   ins_encode %{
3548     bool vector256 = false;
3549     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3550   %}
3551   ins_pipe( pipe_slow );
3552 %}
3553 
3554 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
3555   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3556   match(Set dst (MulVF src1 src2));
3557   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
3558   ins_encode %{
3559     bool vector256 = true;
3560     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3561   %}
3562   ins_pipe( pipe_slow );
3563 %}
3564 
3565 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
3566   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3567   match(Set dst (MulVF src (LoadVector mem)));
3568   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
3569   ins_encode %{
3570     bool vector256 = true;
3571     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3572   %}
3573   ins_pipe( pipe_slow );
3574 %}
3575 
3576 // Doubles vector mul
3577 instruct vmul2D(vecX dst, vecX src) %{
3578   predicate(n->as_Vector()->length() == 2);
3579   match(Set dst (MulVD dst src));
3580   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
3581   ins_encode %{
3582     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
3583   %}
3584   ins_pipe( pipe_slow );
3585 %}
3586 
3587 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
3588   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3589   match(Set dst (MulVD src1 src2));
3590   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
3591   ins_encode %{
3592     bool vector256 = false;
3593     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3594   %}
3595   ins_pipe( pipe_slow );
3596 %}
3597 
3598 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
3599   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3600   match(Set dst (MulVD src (LoadVector mem)));
3601   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
3602   ins_encode %{
3603     bool vector256 = false;
3604     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3605   %}
3606   ins_pipe( pipe_slow );
3607 %}
3608 
3609 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
3610   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3611   match(Set dst (MulVD src1 src2));
3612   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
3613   ins_encode %{
3614     bool vector256 = true;
3615     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3616   %}
3617   ins_pipe( pipe_slow );
3618 %}
3619 
3620 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
3621   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3622   match(Set dst (MulVD src (LoadVector mem)));
3623   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
3624   ins_encode %{
3625     bool vector256 = true;
3626     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3627   %}
3628   ins_pipe( pipe_slow );
3629 %}
3630 
3631 // --------------------------------- DIV --------------------------------------
3632 
3633 // Floats vector div
3634 instruct vdiv2F(vecD dst, vecD src) %{
3635   predicate(n->as_Vector()->length() == 2);
3636   match(Set dst (DivVF dst src));
3637   format %{ "divps   $dst,$src\t! div packed2F" %}
3638   ins_encode %{
3639     __ divps($dst$$XMMRegister, $src$$XMMRegister);
3640   %}
3641   ins_pipe( pipe_slow );
3642 %}
3643 
3644 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
3645   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3646   match(Set dst (DivVF src1 src2));
3647   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
3648   ins_encode %{
3649     bool vector256 = false;
3650     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3651   %}
3652   ins_pipe( pipe_slow );
3653 %}
3654 
3655 instruct vdiv4F(vecX dst, vecX src) %{
3656   predicate(n->as_Vector()->length() == 4);
3657   match(Set dst (DivVF dst src));
3658   format %{ "divps   $dst,$src\t! div packed4F" %}
3659   ins_encode %{
3660     __ divps($dst$$XMMRegister, $src$$XMMRegister);
3661   %}
3662   ins_pipe( pipe_slow );
3663 %}
3664 
3665 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
3666   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3667   match(Set dst (DivVF src1 src2));
3668   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
3669   ins_encode %{
3670     bool vector256 = false;
3671     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3672   %}
3673   ins_pipe( pipe_slow );
3674 %}
3675 
3676 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
3677   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3678   match(Set dst (DivVF src (LoadVector mem)));
3679   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
3680   ins_encode %{
3681     bool vector256 = false;
3682     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3683   %}
3684   ins_pipe( pipe_slow );
3685 %}
3686 
3687 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
3688   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3689   match(Set dst (DivVF src1 src2));
3690   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
3691   ins_encode %{
3692     bool vector256 = true;
3693     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3694   %}
3695   ins_pipe( pipe_slow );
3696 %}
3697 
3698 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
3699   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3700   match(Set dst (DivVF src (LoadVector mem)));
3701   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
3702   ins_encode %{
3703     bool vector256 = true;
3704     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3705   %}
3706   ins_pipe( pipe_slow );
3707 %}
3708 
3709 // Doubles vector div
3710 instruct vdiv2D(vecX dst, vecX src) %{
3711   predicate(n->as_Vector()->length() == 2);
3712   match(Set dst (DivVD dst src));
3713   format %{ "divpd   $dst,$src\t! div packed2D" %}
3714   ins_encode %{
3715     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
3716   %}
3717   ins_pipe( pipe_slow );
3718 %}
3719 
3720 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
3721   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3722   match(Set dst (DivVD src1 src2));
3723   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
3724   ins_encode %{
3725     bool vector256 = false;
3726     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3727   %}
3728   ins_pipe( pipe_slow );
3729 %}
3730 
3731 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
3732   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3733   match(Set dst (DivVD src (LoadVector mem)));
3734   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
3735   ins_encode %{
3736     bool vector256 = false;
3737     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3738   %}
3739   ins_pipe( pipe_slow );
3740 %}
3741 
3742 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
3743   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3744   match(Set dst (DivVD src1 src2));
3745   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
3746   ins_encode %{
3747     bool vector256 = true;
3748     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3749   %}
3750   ins_pipe( pipe_slow );
3751 %}
3752 
3753 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
3754   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3755   match(Set dst (DivVD src (LoadVector mem)));
3756   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
3757   ins_encode %{
3758     bool vector256 = true;
3759     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3760   %}
3761   ins_pipe( pipe_slow );
3762 %}
3763 
3764 // ------------------------------ LeftShift -----------------------------------
3765 
3766 // Shorts/Chars vector left shift
3767 instruct vsll2S(vecS dst, regF shift) %{
3768   predicate(n->as_Vector()->length() == 2);
3769   match(Set dst (LShiftVS dst shift));
3770   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
3771   ins_encode %{
3772     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3773   %}
3774   ins_pipe( pipe_slow );
3775 %}
3776 
3777 instruct vsll2S_imm(vecS dst, immI8 shift) %{
3778   predicate(n->as_Vector()->length() == 2);
3779   match(Set dst (LShiftVS dst shift));
3780   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
3781   ins_encode %{
3782     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3783   %}
3784   ins_pipe( pipe_slow );
3785 %}
3786 
3787 instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
3788   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3789   match(Set dst (LShiftVS src shift));
3790   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
3791   ins_encode %{
3792     bool vector256 = false;
3793     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3794   %}
3795   ins_pipe( pipe_slow );
3796 %}
3797 
3798 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
3799   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3800   match(Set dst (LShiftVS src shift));
3801   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
3802   ins_encode %{
3803     bool vector256 = false;
3804     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3805   %}
3806   ins_pipe( pipe_slow );
3807 %}
3808 
3809 instruct vsll4S(vecD dst, regF shift) %{
3810   predicate(n->as_Vector()->length() == 4);
3811   match(Set dst (LShiftVS dst shift));
3812   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
3813   ins_encode %{
3814     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3815   %}
3816   ins_pipe( pipe_slow );
3817 %}
3818 
3819 instruct vsll4S_imm(vecD dst, immI8 shift) %{
3820   predicate(n->as_Vector()->length() == 4);
3821   match(Set dst (LShiftVS dst shift));
3822   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
3823   ins_encode %{
3824     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3825   %}
3826   ins_pipe( pipe_slow );
3827 %}
3828 
3829 instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
3830   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3831   match(Set dst (LShiftVS src shift));
3832   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
3833   ins_encode %{
3834     bool vector256 = false;
3835     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3836   %}
3837   ins_pipe( pipe_slow );
3838 %}
3839 
3840 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
3841   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3842   match(Set dst (LShiftVS src shift));
3843   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
3844   ins_encode %{
3845     bool vector256 = false;
3846     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3847   %}
3848   ins_pipe( pipe_slow );
3849 %}
3850 
3851 instruct vsll8S(vecX dst, regF shift) %{
3852   predicate(n->as_Vector()->length() == 8);
3853   match(Set dst (LShiftVS dst shift));
3854   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
3855   ins_encode %{
3856     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3857   %}
3858   ins_pipe( pipe_slow );
3859 %}
3860 
3861 instruct vsll8S_imm(vecX dst, immI8 shift) %{
3862   predicate(n->as_Vector()->length() == 8);
3863   match(Set dst (LShiftVS dst shift));
3864   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
3865   ins_encode %{
3866     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3867   %}
3868   ins_pipe( pipe_slow );
3869 %}
3870 
3871 instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
3872   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3873   match(Set dst (LShiftVS src shift));
3874   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
3875   ins_encode %{
3876     bool vector256 = false;
3877     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3878   %}
3879   ins_pipe( pipe_slow );
3880 %}
3881 
3882 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
3883   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3884   match(Set dst (LShiftVS src shift));
3885   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
3886   ins_encode %{
3887     bool vector256 = false;
3888     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3889   %}
3890   ins_pipe( pipe_slow );
3891 %}
3892 
3893 instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
3894   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3895   match(Set dst (LShiftVS src shift));
3896   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
3897   ins_encode %{
3898     bool vector256 = true;
3899     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3900   %}
3901   ins_pipe( pipe_slow );
3902 %}
3903 
3904 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
3905   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3906   match(Set dst (LShiftVS src shift));
3907   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
3908   ins_encode %{
3909     bool vector256 = true;
3910     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3911   %}
3912   ins_pipe( pipe_slow );
3913 %}
3914 
3915 // Integers vector left shift
3916 instruct vsll2I(vecD dst, regF shift) %{
3917   predicate(n->as_Vector()->length() == 2);
3918   match(Set dst (LShiftVI dst shift));
3919   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
3920   ins_encode %{
3921     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3922   %}
3923   ins_pipe( pipe_slow );
3924 %}
3925 
3926 instruct vsll2I_imm(vecD dst, immI8 shift) %{
3927   predicate(n->as_Vector()->length() == 2);
3928   match(Set dst (LShiftVI dst shift));
3929   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
3930   ins_encode %{
3931     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3932   %}
3933   ins_pipe( pipe_slow );
3934 %}
3935 
3936 instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
3937   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3938   match(Set dst (LShiftVI src shift));
3939   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
3940   ins_encode %{
3941     bool vector256 = false;
3942     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3943   %}
3944   ins_pipe( pipe_slow );
3945 %}
3946 
3947 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
3948   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3949   match(Set dst (LShiftVI src shift));
3950   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
3951   ins_encode %{
3952     bool vector256 = false;
3953     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3954   %}
3955   ins_pipe( pipe_slow );
3956 %}
3957 
3958 instruct vsll4I(vecX dst, regF shift) %{
3959   predicate(n->as_Vector()->length() == 4);
3960   match(Set dst (LShiftVI dst shift));
3961   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
3962   ins_encode %{
3963     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3964   %}
3965   ins_pipe( pipe_slow );
3966 %}
3967 
3968 instruct vsll4I_imm(vecX dst, immI8 shift) %{
3969   predicate(n->as_Vector()->length() == 4);
3970   match(Set dst (LShiftVI dst shift));
3971   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
3972   ins_encode %{
3973     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3974   %}
3975   ins_pipe( pipe_slow );
3976 %}
3977 
3978 instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
3979   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3980   match(Set dst (LShiftVI src shift));
3981   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
3982   ins_encode %{
3983     bool vector256 = false;
3984     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3985   %}
3986   ins_pipe( pipe_slow );
3987 %}
3988 
3989 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
3990   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3991   match(Set dst (LShiftVI src shift));
3992   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
3993   ins_encode %{
3994     bool vector256 = false;
3995     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3996   %}
3997   ins_pipe( pipe_slow );
3998 %}
3999 
4000 instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
4001   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4002   match(Set dst (LShiftVI src shift));
4003   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4004   ins_encode %{
4005     bool vector256 = true;
4006     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4007   %}
4008   ins_pipe( pipe_slow );
4009 %}
4010 
4011 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4012   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4013   match(Set dst (LShiftVI src shift));
4014   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4015   ins_encode %{
4016     bool vector256 = true;
4017     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4018   %}
4019   ins_pipe( pipe_slow );
4020 %}
4021 
4022 // Longs vector left shift
4023 instruct vsll2L(vecX dst, regF shift) %{
4024   predicate(n->as_Vector()->length() == 2);
4025   match(Set dst (LShiftVL dst shift));
4026   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4027   ins_encode %{
4028     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4029   %}
4030   ins_pipe( pipe_slow );
4031 %}
4032 
4033 instruct vsll2L_imm(vecX dst, immI8 shift) %{
4034   predicate(n->as_Vector()->length() == 2);
4035   match(Set dst (LShiftVL dst shift));
4036   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4037   ins_encode %{
4038     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4039   %}
4040   ins_pipe( pipe_slow );
4041 %}
4042 
4043 instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
4044   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4045   match(Set dst (LShiftVL src shift));
4046   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4047   ins_encode %{
4048     bool vector256 = false;
4049     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4050   %}
4051   ins_pipe( pipe_slow );
4052 %}
4053 
4054 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4055   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4056   match(Set dst (LShiftVL src shift));
4057   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4058   ins_encode %{
4059     bool vector256 = false;
4060     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4061   %}
4062   ins_pipe( pipe_slow );
4063 %}
4064 
4065 instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
4066   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4067   match(Set dst (LShiftVL src shift));
4068   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4069   ins_encode %{
4070     bool vector256 = true;
4071     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4072   %}
4073   ins_pipe( pipe_slow );
4074 %}
4075 
4076 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4077   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4078   match(Set dst (LShiftVL src shift));
4079   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4080   ins_encode %{
4081     bool vector256 = true;
4082     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4083   %}
4084   ins_pipe( pipe_slow );
4085 %}
4086 
4087 // ----------------------- LogicalRightShift -----------------------------------
4088 
4089 // Shorts/Chars vector logical right shift
4090 instruct vsrl2S(vecS dst, regF shift) %{
4091   predicate(n->as_Vector()->length() == 2);
4092   match(Set dst (URShiftVS dst shift));
4093   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
4094   ins_encode %{
4095     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4096   %}
4097   ins_pipe( pipe_slow );
4098 %}
4099 
4100 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
4101   predicate(n->as_Vector()->length() == 2);
4102   match(Set dst (URShiftVS dst shift));
4103   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
4104   ins_encode %{
4105     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4106   %}
4107   ins_pipe( pipe_slow );
4108 %}
4109 
4110 instruct vsrl2S_reg(vecS dst, vecS src, regF shift) %{
4111   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4112   match(Set dst (URShiftVS src shift));
4113   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
4114   ins_encode %{
4115     bool vector256 = false;
4116     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4117   %}
4118   ins_pipe( pipe_slow );
4119 %}
4120 
4121 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4122   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4123   match(Set dst (URShiftVS src shift));
4124   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
4125   ins_encode %{
4126     bool vector256 = false;
4127     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4128   %}
4129   ins_pipe( pipe_slow );
4130 %}
4131 
4132 instruct vsrl4S(vecD dst, regF shift) %{
4133   predicate(n->as_Vector()->length() == 4);
4134   match(Set dst (URShiftVS dst shift));
4135   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
4136   ins_encode %{
4137     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4138   %}
4139   ins_pipe( pipe_slow );
4140 %}
4141 
4142 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
4143   predicate(n->as_Vector()->length() == 4);
4144   match(Set dst (URShiftVS dst shift));
4145   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
4146   ins_encode %{
4147     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4148   %}
4149   ins_pipe( pipe_slow );
4150 %}
4151 
4152 instruct vsrl4S_reg(vecD dst, vecD src, regF shift) %{
4153   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4154   match(Set dst (URShiftVS src shift));
4155   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
4156   ins_encode %{
4157     bool vector256 = false;
4158     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4159   %}
4160   ins_pipe( pipe_slow );
4161 %}
4162 
4163 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4164   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4165   match(Set dst (URShiftVS src shift));
4166   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
4167   ins_encode %{
4168     bool vector256 = false;
4169     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4170   %}
4171   ins_pipe( pipe_slow );
4172 %}
4173 
4174 instruct vsrl8S(vecX dst, regF shift) %{
4175   predicate(n->as_Vector()->length() == 8);
4176   match(Set dst (URShiftVS dst shift));
4177   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
4178   ins_encode %{
4179     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4180   %}
4181   ins_pipe( pipe_slow );
4182 %}
4183 
4184 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
4185   predicate(n->as_Vector()->length() == 8);
4186   match(Set dst (URShiftVS dst shift));
4187   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
4188   ins_encode %{
4189     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4190   %}
4191   ins_pipe( pipe_slow );
4192 %}
4193 
4194 instruct vsrl8S_reg(vecX dst, vecX src, regF shift) %{
4195   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4196   match(Set dst (URShiftVS src shift));
4197   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
4198   ins_encode %{
4199     bool vector256 = false;
4200     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4201   %}
4202   ins_pipe( pipe_slow );
4203 %}
4204 
4205 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4206   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4207   match(Set dst (URShiftVS src shift));
4208   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
4209   ins_encode %{
4210     bool vector256 = false;
4211     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4212   %}
4213   ins_pipe( pipe_slow );
4214 %}
4215 
4216 instruct vsrl16S_reg(vecY dst, vecY src, regF shift) %{
4217   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4218   match(Set dst (URShiftVS src shift));
4219   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
4220   ins_encode %{
4221     bool vector256 = true;
4222     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4223   %}
4224   ins_pipe( pipe_slow );
4225 %}
4226 
4227 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4228   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4229   match(Set dst (URShiftVS src shift));
4230   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
4231   ins_encode %{
4232     bool vector256 = true;
4233     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4234   %}
4235   ins_pipe( pipe_slow );
4236 %}
4237 
4238 // Integers vector logical right shift
4239 instruct vsrl2I(vecD dst, regF shift) %{
4240   predicate(n->as_Vector()->length() == 2);
4241   match(Set dst (URShiftVI dst shift));
4242   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4243   ins_encode %{
4244     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4245   %}
4246   ins_pipe( pipe_slow );
4247 %}
4248 
4249 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4250   predicate(n->as_Vector()->length() == 2);
4251   match(Set dst (URShiftVI dst shift));
4252   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4253   ins_encode %{
4254     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4255   %}
4256   ins_pipe( pipe_slow );
4257 %}
4258 
4259 instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
4260   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4261   match(Set dst (URShiftVI src shift));
4262   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4263   ins_encode %{
4264     bool vector256 = false;
4265     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4266   %}
4267   ins_pipe( pipe_slow );
4268 %}
4269 
4270 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4271   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4272   match(Set dst (URShiftVI src shift));
4273   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4274   ins_encode %{
4275     bool vector256 = false;
4276     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4277   %}
4278   ins_pipe( pipe_slow );
4279 %}
4280 
4281 instruct vsrl4I(vecX dst, regF shift) %{
4282   predicate(n->as_Vector()->length() == 4);
4283   match(Set dst (URShiftVI dst shift));
4284   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
4285   ins_encode %{
4286     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4287   %}
4288   ins_pipe( pipe_slow );
4289 %}
4290 
4291 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
4292   predicate(n->as_Vector()->length() == 4);
4293   match(Set dst (URShiftVI dst shift));
4294   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
4295   ins_encode %{
4296     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4297   %}
4298   ins_pipe( pipe_slow );
4299 %}
4300 
4301 instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
4302   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4303   match(Set dst (URShiftVI src shift));
4304   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
4305   ins_encode %{
4306     bool vector256 = false;
4307     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4308   %}
4309   ins_pipe( pipe_slow );
4310 %}
4311 
4312 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4313   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4314   match(Set dst (URShiftVI src shift));
4315   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
4316   ins_encode %{
4317     bool vector256 = false;
4318     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4319   %}
4320   ins_pipe( pipe_slow );
4321 %}
4322 
4323 instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
4324   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4325   match(Set dst (URShiftVI src shift));
4326   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
4327   ins_encode %{
4328     bool vector256 = true;
4329     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4330   %}
4331   ins_pipe( pipe_slow );
4332 %}
4333 
4334 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4335   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4336   match(Set dst (URShiftVI src shift));
4337   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
4338   ins_encode %{
4339     bool vector256 = true;
4340     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4341   %}
4342   ins_pipe( pipe_slow );
4343 %}
4344 
4345 // Longs vector logical right shift
4346 instruct vsrl2L(vecX dst, regF shift) %{
4347   predicate(n->as_Vector()->length() == 2);
4348   match(Set dst (URShiftVL dst shift));
4349   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
4350   ins_encode %{
4351     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4352   %}
4353   ins_pipe( pipe_slow );
4354 %}
4355 
4356 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
4357   predicate(n->as_Vector()->length() == 2);
4358   match(Set dst (URShiftVL dst shift));
4359   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
4360   ins_encode %{
4361     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
4362   %}
4363   ins_pipe( pipe_slow );
4364 %}
4365 
4366 instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
4367   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4368   match(Set dst (URShiftVL src shift));
4369   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
4370   ins_encode %{
4371     bool vector256 = false;
4372     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4373   %}
4374   ins_pipe( pipe_slow );
4375 %}
4376 
4377 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4378   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4379   match(Set dst (URShiftVL src shift));
4380   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
4381   ins_encode %{
4382     bool vector256 = false;
4383     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4384   %}
4385   ins_pipe( pipe_slow );
4386 %}
4387 
4388 instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
4389   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4390   match(Set dst (URShiftVL src shift));
4391   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
4392   ins_encode %{
4393     bool vector256 = true;
4394     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4395   %}
4396   ins_pipe( pipe_slow );
4397 %}
4398 
4399 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4400   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4401   match(Set dst (URShiftVL src shift));
4402   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
4403   ins_encode %{
4404     bool vector256 = true;
4405     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4406   %}
4407   ins_pipe( pipe_slow );
4408 %}
4409 
4410 // ------------------- ArithmeticRightShift -----------------------------------
4411 
4412 // Shorts/Chars vector arithmetic right shift
4413 instruct vsra2S(vecS dst, regF shift) %{
4414   predicate(n->as_Vector()->length() == 2);
4415   match(Set dst (RShiftVS dst shift));
4416   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
4417   ins_encode %{
4418     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4419   %}
4420   ins_pipe( pipe_slow );
4421 %}
4422 
4423 instruct vsra2S_imm(vecS dst, immI8 shift) %{
4424   predicate(n->as_Vector()->length() == 2);
4425   match(Set dst (RShiftVS dst shift));
4426   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
4427   ins_encode %{
4428     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4429   %}
4430   ins_pipe( pipe_slow );
4431 %}
4432 
4433 instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
4434   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4435   match(Set dst (RShiftVS src shift));
4436   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4437   ins_encode %{
4438     bool vector256 = false;
4439     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4440   %}
4441   ins_pipe( pipe_slow );
4442 %}
4443 
4444 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4445   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4446   match(Set dst (RShiftVS src shift));
4447   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4448   ins_encode %{
4449     bool vector256 = false;
4450     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4451   %}
4452   ins_pipe( pipe_slow );
4453 %}
4454 
4455 instruct vsra4S(vecD dst, regF shift) %{
4456   predicate(n->as_Vector()->length() == 4);
4457   match(Set dst (RShiftVS dst shift));
4458   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
4459   ins_encode %{
4460     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4461   %}
4462   ins_pipe( pipe_slow );
4463 %}
4464 
4465 instruct vsra4S_imm(vecD dst, immI8 shift) %{
4466   predicate(n->as_Vector()->length() == 4);
4467   match(Set dst (RShiftVS dst shift));
4468   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
4469   ins_encode %{
4470     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4471   %}
4472   ins_pipe( pipe_slow );
4473 %}
4474 
4475 instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
4476   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4477   match(Set dst (RShiftVS src shift));
4478   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4479   ins_encode %{
4480     bool vector256 = false;
4481     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4482   %}
4483   ins_pipe( pipe_slow );
4484 %}
4485 
4486 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4487   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4488   match(Set dst (RShiftVS src shift));
4489   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4490   ins_encode %{
4491     bool vector256 = false;
4492     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4493   %}
4494   ins_pipe( pipe_slow );
4495 %}
4496 
4497 instruct vsra8S(vecX dst, regF shift) %{
4498   predicate(n->as_Vector()->length() == 8);
4499   match(Set dst (RShiftVS dst shift));
4500   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
4501   ins_encode %{
4502     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4503   %}
4504   ins_pipe( pipe_slow );
4505 %}
4506 
4507 instruct vsra8S_imm(vecX dst, immI8 shift) %{
4508   predicate(n->as_Vector()->length() == 8);
4509   match(Set dst (RShiftVS dst shift));
4510   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
4511   ins_encode %{
4512     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4513   %}
4514   ins_pipe( pipe_slow );
4515 %}
4516 
4517 instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
4518   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4519   match(Set dst (RShiftVS src shift));
4520   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4521   ins_encode %{
4522     bool vector256 = false;
4523     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4524   %}
4525   ins_pipe( pipe_slow );
4526 %}
4527 
4528 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4529   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4530   match(Set dst (RShiftVS src shift));
4531   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4532   ins_encode %{
4533     bool vector256 = false;
4534     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4535   %}
4536   ins_pipe( pipe_slow );
4537 %}
4538 
4539 instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
4540   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4541   match(Set dst (RShiftVS src shift));
4542   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4543   ins_encode %{
4544     bool vector256 = true;
4545     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4546   %}
4547   ins_pipe( pipe_slow );
4548 %}
4549 
4550 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4551   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4552   match(Set dst (RShiftVS src shift));
4553   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4554   ins_encode %{
4555     bool vector256 = true;
4556     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4557   %}
4558   ins_pipe( pipe_slow );
4559 %}
4560 
4561 // Integers vector arithmetic right shift
4562 instruct vsra2I(vecD dst, regF shift) %{
4563   predicate(n->as_Vector()->length() == 2);
4564   match(Set dst (RShiftVI dst shift));
4565   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
4566   ins_encode %{
4567     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4568   %}
4569   ins_pipe( pipe_slow );
4570 %}
4571 
4572 instruct vsra2I_imm(vecD dst, immI8 shift) %{
4573   predicate(n->as_Vector()->length() == 2);
4574   match(Set dst (RShiftVI dst shift));
4575   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
4576   ins_encode %{
4577     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4578   %}
4579   ins_pipe( pipe_slow );
4580 %}
4581 
4582 instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
4583   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4584   match(Set dst (RShiftVI src shift));
4585   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4586   ins_encode %{
4587     bool vector256 = false;
4588     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4589   %}
4590   ins_pipe( pipe_slow );
4591 %}
4592 
4593 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4594   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4595   match(Set dst (RShiftVI src shift));
4596   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4597   ins_encode %{
4598     bool vector256 = false;
4599     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4600   %}
4601   ins_pipe( pipe_slow );
4602 %}
4603 
4604 instruct vsra4I(vecX dst, regF shift) %{
4605   predicate(n->as_Vector()->length() == 4);
4606   match(Set dst (RShiftVI dst shift));
4607   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
4608   ins_encode %{
4609     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4610   %}
4611   ins_pipe( pipe_slow );
4612 %}
4613 
4614 instruct vsra4I_imm(vecX dst, immI8 shift) %{
4615   predicate(n->as_Vector()->length() == 4);
4616   match(Set dst (RShiftVI dst shift));
4617   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
4618   ins_encode %{
4619     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4620   %}
4621   ins_pipe( pipe_slow );
4622 %}
4623 
4624 instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
4625   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4626   match(Set dst (RShiftVI src shift));
4627   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4628   ins_encode %{
4629     bool vector256 = false;
4630     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4631   %}
4632   ins_pipe( pipe_slow );
4633 %}
4634 
4635 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4636   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4637   match(Set dst (RShiftVI src shift));
4638   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4639   ins_encode %{
4640     bool vector256 = false;
4641     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4642   %}
4643   ins_pipe( pipe_slow );
4644 %}
4645 
4646 instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
4647   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4648   match(Set dst (RShiftVI src shift));
4649   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4650   ins_encode %{
4651     bool vector256 = true;
4652     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4653   %}
4654   ins_pipe( pipe_slow );
4655 %}
4656 
4657 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4658   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4659   match(Set dst (RShiftVI src shift));
4660   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4661   ins_encode %{
4662     bool vector256 = true;
4663     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4664   %}
4665   ins_pipe( pipe_slow );
4666 %}
4667 
4668 // There are no longs vector arithmetic right shift instructions.
4669 
4670 
4671 // --------------------------------- AND --------------------------------------
4672 
4673 instruct vand4B(vecS dst, vecS src) %{
4674   predicate(n->as_Vector()->length_in_bytes() == 4);
4675   match(Set dst (AndV dst src));
4676   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
4677   ins_encode %{
4678     __ pand($dst$$XMMRegister, $src$$XMMRegister);
4679   %}
4680   ins_pipe( pipe_slow );
4681 %}
4682 
4683 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
4684   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4685   match(Set dst (AndV src1 src2));
4686   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
4687   ins_encode %{
4688     bool vector256 = false;
4689     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4690   %}
4691   ins_pipe( pipe_slow );
4692 %}
4693 
4694 instruct vand8B(vecD dst, vecD src) %{
4695   predicate(n->as_Vector()->length_in_bytes() == 8);
4696   match(Set dst (AndV dst src));
4697   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
4698   ins_encode %{
4699     __ pand($dst$$XMMRegister, $src$$XMMRegister);
4700   %}
4701   ins_pipe( pipe_slow );
4702 %}
4703 
4704 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
4705   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4706   match(Set dst (AndV src1 src2));
4707   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
4708   ins_encode %{
4709     bool vector256 = false;
4710     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4711   %}
4712   ins_pipe( pipe_slow );
4713 %}
4714 
4715 instruct vand16B(vecX dst, vecX src) %{
4716   predicate(n->as_Vector()->length_in_bytes() == 16);
4717   match(Set dst (AndV dst src));
4718   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
4719   ins_encode %{
4720     __ pand($dst$$XMMRegister, $src$$XMMRegister);
4721   %}
4722   ins_pipe( pipe_slow );
4723 %}
4724 
4725 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
4726   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4727   match(Set dst (AndV src1 src2));
4728   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
4729   ins_encode %{
4730     bool vector256 = false;
4731     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4732   %}
4733   ins_pipe( pipe_slow );
4734 %}
4735 
4736 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
4737   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4738   match(Set dst (AndV src (LoadVector mem)));
4739   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
4740   ins_encode %{
4741     bool vector256 = false;
4742     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4743   %}
4744   ins_pipe( pipe_slow );
4745 %}
4746 
4747 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
4748   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4749   match(Set dst (AndV src1 src2));
4750   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
4751   ins_encode %{
4752     bool vector256 = true;
4753     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4754   %}
4755   ins_pipe( pipe_slow );
4756 %}
4757 
4758 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
4759   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4760   match(Set dst (AndV src (LoadVector mem)));
4761   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
4762   ins_encode %{
4763     bool vector256 = true;
4764     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4765   %}
4766   ins_pipe( pipe_slow );
4767 %}
4768 
4769 // --------------------------------- OR ---------------------------------------
4770 
4771 instruct vor4B(vecS dst, vecS src) %{
4772   predicate(n->as_Vector()->length_in_bytes() == 4);
4773   match(Set dst (OrV dst src));
4774   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
4775   ins_encode %{
4776     __ por($dst$$XMMRegister, $src$$XMMRegister);
4777   %}
4778   ins_pipe( pipe_slow );
4779 %}
4780 
4781 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
4782   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4783   match(Set dst (OrV src1 src2));
4784   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
4785   ins_encode %{
4786     bool vector256 = false;
4787     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4788   %}
4789   ins_pipe( pipe_slow );
4790 %}
4791 
4792 instruct vor8B(vecD dst, vecD src) %{
4793   predicate(n->as_Vector()->length_in_bytes() == 8);
4794   match(Set dst (OrV dst src));
4795   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
4796   ins_encode %{
4797     __ por($dst$$XMMRegister, $src$$XMMRegister);
4798   %}
4799   ins_pipe( pipe_slow );
4800 %}
4801 
4802 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
4803   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4804   match(Set dst (OrV src1 src2));
4805   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
4806   ins_encode %{
4807     bool vector256 = false;
4808     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4809   %}
4810   ins_pipe( pipe_slow );
4811 %}
4812 
4813 instruct vor16B(vecX dst, vecX src) %{
4814   predicate(n->as_Vector()->length_in_bytes() == 16);
4815   match(Set dst (OrV dst src));
4816   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
4817   ins_encode %{
4818     __ por($dst$$XMMRegister, $src$$XMMRegister);
4819   %}
4820   ins_pipe( pipe_slow );
4821 %}
4822 
4823 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
4824   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4825   match(Set dst (OrV src1 src2));
4826   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
4827   ins_encode %{
4828     bool vector256 = false;
4829     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4830   %}
4831   ins_pipe( pipe_slow );
4832 %}
4833 
4834 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
4835   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4836   match(Set dst (OrV src (LoadVector mem)));
4837   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
4838   ins_encode %{
4839     bool vector256 = false;
4840     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4841   %}
4842   ins_pipe( pipe_slow );
4843 %}
4844 
4845 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
4846   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4847   match(Set dst (OrV src1 src2));
4848   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
4849   ins_encode %{
4850     bool vector256 = true;
4851     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4852   %}
4853   ins_pipe( pipe_slow );
4854 %}
4855 
4856 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
4857   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4858   match(Set dst (OrV src (LoadVector mem)));
4859   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
4860   ins_encode %{
4861     bool vector256 = true;
4862     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4863   %}
4864   ins_pipe( pipe_slow );
4865 %}
4866 
4867 // --------------------------------- XOR --------------------------------------
4868 
4869 instruct vxor4B(vecS dst, vecS src) %{
4870   predicate(n->as_Vector()->length_in_bytes() == 4);
4871   match(Set dst (XorV dst src));
4872   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
4873   ins_encode %{
4874     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4875   %}
4876   ins_pipe( pipe_slow );
4877 %}
4878 
4879 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
4880   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4881   match(Set dst (XorV src1 src2));
4882   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
4883   ins_encode %{
4884     bool vector256 = false;
4885     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4886   %}
4887   ins_pipe( pipe_slow );
4888 %}
4889 
4890 instruct vxor8B(vecD dst, vecD src) %{
4891   predicate(n->as_Vector()->length_in_bytes() == 8);
4892   match(Set dst (XorV dst src));
4893   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
4894   ins_encode %{
4895     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4896   %}
4897   ins_pipe( pipe_slow );
4898 %}
4899 
4900 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
4901   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4902   match(Set dst (XorV src1 src2));
4903   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
4904   ins_encode %{
4905     bool vector256 = false;
4906     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4907   %}
4908   ins_pipe( pipe_slow );
4909 %}
4910 
4911 instruct vxor16B(vecX dst, vecX src) %{
4912   predicate(n->as_Vector()->length_in_bytes() == 16);
4913   match(Set dst (XorV dst src));
4914   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
4915   ins_encode %{
4916     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4917   %}
4918   ins_pipe( pipe_slow );
4919 %}
4920 
4921 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
4922   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4923   match(Set dst (XorV src1 src2));
4924   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
4925   ins_encode %{
4926     bool vector256 = false;
4927     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4928   %}
4929   ins_pipe( pipe_slow );
4930 %}
4931 
4932 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
4933   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4934   match(Set dst (XorV src (LoadVector mem)));
4935   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
4936   ins_encode %{
4937     bool vector256 = false;
4938     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4939   %}
4940   ins_pipe( pipe_slow );
4941 %}
4942 
4943 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
4944   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4945   match(Set dst (XorV src1 src2));
4946   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
4947   ins_encode %{
4948     bool vector256 = true;
4949     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4950   %}
4951   ins_pipe( pipe_slow );
4952 %}
4953 
4954 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
4955   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4956   match(Set dst (XorV src (LoadVector mem)));
4957   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
4958   ins_encode %{
4959     bool vector256 = true;
4960     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4961   %}
4962   ins_pipe( pipe_slow );
4963 %}
4964 
src/cpu/x86/vm/x86.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File