483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 484 #else 485 static address float_signmask() { return (address)float_signmask_pool; } 486 static address float_signflip() { return (address)float_signflip_pool; } 487 static address double_signmask() { return (address)double_signmask_pool; } 488 static address double_signflip() { return (address)double_signflip_pool; } 489 #endif 490 491 // Map Types to machine register types 492 const int Matcher::base2reg[Type::lastype] = { 493 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, 494 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ 495 Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ 496 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ 497 0, 0/*abio*/, 498 Op_RegP /* Return address */, 0, /* the memories */ 499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, 500 0 /*bottom*/ 501 }; 502 503 // Max vector size in bytes. 0 if not supported. 504 const int Matcher::vector_width_in_bytes(BasicType bt) { 505 assert(is_java_primitive(bt), "only primitive type vectors"); 506 if (UseSSE < 2) return 0; 507 // SSE2 supports 128bit vectors for all types. 508 // AVX2 supports 256bit vectors for all types. 509 int size = (UseAVX > 1) ? 32 : 16; 510 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 511 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 512 size = 32; 513 // Use flag to limit vector size. 514 size = MIN2(size,(int)MaxVectorSize); 515 // Minimum 2 values in vector (or 4 for bytes). 516 switch (bt) { 517 case T_DOUBLE: 518 case T_LONG: 519 if (size < 16) return 0; 520 case T_FLOAT: 521 case T_INT: 522 if (size < 8) return 0; 1422 ins_pipe(pipe_slow); 1423 %} 1424 1425 instruct absF_reg(regF dst) %{ 1426 predicate((UseSSE>=1) && (UseAVX == 0)); 1427 match(Set dst (AbsF dst)); 1428 ins_cost(150); 1429 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1430 ins_encode %{ 1431 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1432 %} 1433 ins_pipe(pipe_slow); 1434 %} 1435 1436 instruct absF_reg_reg(regF dst, regF src) %{ 1437 predicate(UseAVX > 0); 1438 match(Set dst (AbsF src)); 1439 ins_cost(150); 1440 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1441 ins_encode %{ 1442 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1443 ExternalAddress(float_signmask())); 1444 %} 1445 ins_pipe(pipe_slow); 1446 %} 1447 1448 instruct absD_reg(regD dst) %{ 1449 predicate((UseSSE>=2) && (UseAVX == 0)); 1450 match(Set dst (AbsD dst)); 1451 ins_cost(150); 1452 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1453 "# abs double by sign masking" %} 1454 ins_encode %{ 1455 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1456 %} 1457 ins_pipe(pipe_slow); 1458 %} 1459 1460 instruct absD_reg_reg(regD dst, regD src) %{ 1461 predicate(UseAVX > 0); 1462 match(Set dst (AbsD src)); 1463 ins_cost(150); 1464 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1465 "# abs double by sign masking" %} 1466 ins_encode %{ 1467 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1468 ExternalAddress(double_signmask())); 1469 %} 1470 ins_pipe(pipe_slow); 1471 %} 1472 1473 instruct negF_reg(regF dst) %{ 1474 predicate((UseSSE>=1) && (UseAVX == 0)); 1475 match(Set dst (NegF dst)); 1476 ins_cost(150); 1477 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1478 ins_encode %{ 1479 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1480 %} 1481 ins_pipe(pipe_slow); 1482 %} 1483 1484 instruct negF_reg_reg(regF dst, regF src) %{ 1485 predicate(UseAVX > 0); 1486 match(Set dst (NegF src)); 1487 ins_cost(150); 1488 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1489 ins_encode %{ 1490 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1491 ExternalAddress(float_signflip())); 1492 %} 1493 ins_pipe(pipe_slow); 1494 %} 1495 1496 instruct negD_reg(regD dst) %{ 1497 predicate((UseSSE>=2) && (UseAVX == 0)); 1498 match(Set dst (NegD dst)); 1499 ins_cost(150); 1500 format %{ "xorpd $dst, [0x8000000000000000]\t" 1501 "# neg double by sign flipping" %} 1502 ins_encode %{ 1503 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1504 %} 1505 ins_pipe(pipe_slow); 1506 %} 1507 1508 instruct negD_reg_reg(regD dst, regD src) %{ 1509 predicate(UseAVX > 0); 1510 match(Set dst (NegD src)); 1511 ins_cost(150); 1512 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1513 "# neg double by sign flipping" %} 1514 ins_encode %{ 1515 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1516 ExternalAddress(double_signflip())); 1517 %} 1518 ins_pipe(pipe_slow); 1519 %} 1520 1521 instruct sqrtF_reg(regF dst, regF src) %{ 1522 predicate(UseSSE>=1); 1523 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1524 1525 format %{ "sqrtss $dst, $src" %} 1526 ins_cost(150); 1527 ins_encode %{ 1528 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1529 %} 1530 ins_pipe(pipe_slow); 1531 %} 1532 1533 instruct sqrtF_mem(regF dst, memory src) %{ 1534 predicate(UseSSE>=1); 1535 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1536 2365 predicate(n->as_Vector()->length() == 2); 2366 match(Set dst (ReplicateD zero)); 2367 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2368 ins_encode %{ 2369 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2370 %} 2371 ins_pipe( fpu_reg_reg ); 2372 %} 2373 2374 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2375 predicate(n->as_Vector()->length() == 4); 2376 match(Set dst (ReplicateD zero)); 2377 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2378 ins_encode %{ 2379 bool vector256 = true; 2380 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2381 %} 2382 ins_pipe( fpu_reg_reg ); 2383 %} 2384 | 483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 484 #else 485 static address float_signmask() { return (address)float_signmask_pool; } 486 static address float_signflip() { return (address)float_signflip_pool; } 487 static address double_signmask() { return (address)double_signmask_pool; } 488 static address double_signflip() { return (address)double_signflip_pool; } 489 #endif 490 491 // Map Types to machine register types 492 const int Matcher::base2reg[Type::lastype] = { 493 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, 494 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ 495 Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ 496 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ 497 0, 0/*abio*/, 498 Op_RegP /* Return address */, 0, /* the memories */ 499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, 500 0 /*bottom*/ 501 }; 502 503 const bool Matcher::match_rule_supported(int opcode) { 504 if (!has_match_rule(opcode)) 505 return false; 506 507 switch (opcode) { 508 case Op_PopCountI: 509 case Op_PopCountL: 510 if (!UsePopCountInstruction) 511 return false; 512 case Op_MulVI: 513 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 514 return false; 515 break; 516 } 517 518 return true; // Per default match rules are supported. 519 } 520 521 // Max vector size in bytes. 0 if not supported. 522 const int Matcher::vector_width_in_bytes(BasicType bt) { 523 assert(is_java_primitive(bt), "only primitive type vectors"); 524 if (UseSSE < 2) return 0; 525 // SSE2 supports 128bit vectors for all types. 526 // AVX2 supports 256bit vectors for all types. 527 int size = (UseAVX > 1) ? 32 : 16; 528 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 529 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 530 size = 32; 531 // Use flag to limit vector size. 532 size = MIN2(size,(int)MaxVectorSize); 533 // Minimum 2 values in vector (or 4 for bytes). 534 switch (bt) { 535 case T_DOUBLE: 536 case T_LONG: 537 if (size < 16) return 0; 538 case T_FLOAT: 539 case T_INT: 540 if (size < 8) return 0; 1440 ins_pipe(pipe_slow); 1441 %} 1442 1443 instruct absF_reg(regF dst) %{ 1444 predicate((UseSSE>=1) && (UseAVX == 0)); 1445 match(Set dst (AbsF dst)); 1446 ins_cost(150); 1447 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1448 ins_encode %{ 1449 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1450 %} 1451 ins_pipe(pipe_slow); 1452 %} 1453 1454 instruct absF_reg_reg(regF dst, regF src) %{ 1455 predicate(UseAVX > 0); 1456 match(Set dst (AbsF src)); 1457 ins_cost(150); 1458 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1459 ins_encode %{ 1460 bool vector256 = false; 1461 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1462 ExternalAddress(float_signmask()), vector256); 1463 %} 1464 ins_pipe(pipe_slow); 1465 %} 1466 1467 instruct absD_reg(regD dst) %{ 1468 predicate((UseSSE>=2) && (UseAVX == 0)); 1469 match(Set dst (AbsD dst)); 1470 ins_cost(150); 1471 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1472 "# abs double by sign masking" %} 1473 ins_encode %{ 1474 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1475 %} 1476 ins_pipe(pipe_slow); 1477 %} 1478 1479 instruct absD_reg_reg(regD dst, regD src) %{ 1480 predicate(UseAVX > 0); 1481 match(Set dst (AbsD src)); 1482 ins_cost(150); 1483 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1484 "# abs double by sign masking" %} 1485 ins_encode %{ 1486 bool vector256 = false; 1487 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1488 ExternalAddress(double_signmask()), vector256); 1489 %} 1490 ins_pipe(pipe_slow); 1491 %} 1492 1493 instruct negF_reg(regF dst) %{ 1494 predicate((UseSSE>=1) && (UseAVX == 0)); 1495 match(Set dst (NegF dst)); 1496 ins_cost(150); 1497 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1498 ins_encode %{ 1499 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1500 %} 1501 ins_pipe(pipe_slow); 1502 %} 1503 1504 instruct negF_reg_reg(regF dst, regF src) %{ 1505 predicate(UseAVX > 0); 1506 match(Set dst (NegF src)); 1507 ins_cost(150); 1508 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1509 ins_encode %{ 1510 bool vector256 = false; 1511 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1512 ExternalAddress(float_signflip()), vector256); 1513 %} 1514 ins_pipe(pipe_slow); 1515 %} 1516 1517 instruct negD_reg(regD dst) %{ 1518 predicate((UseSSE>=2) && (UseAVX == 0)); 1519 match(Set dst (NegD dst)); 1520 ins_cost(150); 1521 format %{ "xorpd $dst, [0x8000000000000000]\t" 1522 "# neg double by sign flipping" %} 1523 ins_encode %{ 1524 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1525 %} 1526 ins_pipe(pipe_slow); 1527 %} 1528 1529 instruct negD_reg_reg(regD dst, regD src) %{ 1530 predicate(UseAVX > 0); 1531 match(Set dst (NegD src)); 1532 ins_cost(150); 1533 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1534 "# neg double by sign flipping" %} 1535 ins_encode %{ 1536 bool vector256 = false; 1537 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1538 ExternalAddress(double_signflip()), vector256); 1539 %} 1540 ins_pipe(pipe_slow); 1541 %} 1542 1543 instruct sqrtF_reg(regF dst, regF src) %{ 1544 predicate(UseSSE>=1); 1545 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1546 1547 format %{ "sqrtss $dst, $src" %} 1548 ins_cost(150); 1549 ins_encode %{ 1550 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1551 %} 1552 ins_pipe(pipe_slow); 1553 %} 1554 1555 instruct sqrtF_mem(regF dst, memory src) %{ 1556 predicate(UseSSE>=1); 1557 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1558 2387 predicate(n->as_Vector()->length() == 2); 2388 match(Set dst (ReplicateD zero)); 2389 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2390 ins_encode %{ 2391 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2392 %} 2393 ins_pipe( fpu_reg_reg ); 2394 %} 2395 2396 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2397 predicate(n->as_Vector()->length() == 4); 2398 match(Set dst (ReplicateD zero)); 2399 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2400 ins_encode %{ 2401 bool vector256 = true; 2402 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2403 %} 2404 ins_pipe( fpu_reg_reg ); 2405 %} 2406 2407 // ====================VECTOR ARITHMETIC======================================= 2408 2409 // --------------------------------- ADD -------------------------------------- 2410 2411 // Bytes vector add 2412 instruct vadd4B(vecS dst, vecS src) %{ 2413 predicate(n->as_Vector()->length() == 4); 2414 match(Set dst (AddVB dst src)); 2415 format %{ "paddb $dst,$src\t! add packed4B" %} 2416 ins_encode %{ 2417 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2418 %} 2419 ins_pipe( pipe_slow ); 2420 %} 2421 2422 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 2423 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2424 match(Set dst (AddVB src1 src2)); 2425 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 2426 ins_encode %{ 2427 bool vector256 = false; 2428 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2429 %} 2430 ins_pipe( pipe_slow ); 2431 %} 2432 2433 instruct vadd8B(vecD dst, vecD src) %{ 2434 predicate(n->as_Vector()->length() == 8); 2435 match(Set dst (AddVB dst src)); 2436 format %{ "paddb $dst,$src\t! add packed8B" %} 2437 ins_encode %{ 2438 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2439 %} 2440 ins_pipe( pipe_slow ); 2441 %} 2442 2443 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 2444 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2445 match(Set dst (AddVB src1 src2)); 2446 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 2447 ins_encode %{ 2448 bool vector256 = false; 2449 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2450 %} 2451 ins_pipe( pipe_slow ); 2452 %} 2453 2454 instruct vadd16B(vecX dst, vecX src) %{ 2455 predicate(n->as_Vector()->length() == 16); 2456 match(Set dst (AddVB dst src)); 2457 format %{ "paddb $dst,$src\t! add packed16B" %} 2458 ins_encode %{ 2459 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2460 %} 2461 ins_pipe( pipe_slow ); 2462 %} 2463 2464 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 2465 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2466 match(Set dst (AddVB src1 src2)); 2467 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 2468 ins_encode %{ 2469 bool vector256 = false; 2470 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2471 %} 2472 ins_pipe( pipe_slow ); 2473 %} 2474 2475 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 2476 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2477 match(Set dst (AddVB src (LoadVector mem))); 2478 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 2479 ins_encode %{ 2480 bool vector256 = false; 2481 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2482 %} 2483 ins_pipe( pipe_slow ); 2484 %} 2485 2486 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 2487 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2488 match(Set dst (AddVB src1 src2)); 2489 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 2490 ins_encode %{ 2491 bool vector256 = true; 2492 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2493 %} 2494 ins_pipe( pipe_slow ); 2495 %} 2496 2497 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 2498 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2499 match(Set dst (AddVB src (LoadVector mem))); 2500 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 2501 ins_encode %{ 2502 bool vector256 = true; 2503 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2504 %} 2505 ins_pipe( pipe_slow ); 2506 %} 2507 2508 // Shorts/Chars vector add 2509 instruct vadd2S(vecS dst, vecS src) %{ 2510 predicate(n->as_Vector()->length() == 2); 2511 match(Set dst (AddVS dst src)); 2512 format %{ "paddw $dst,$src\t! add packed2S" %} 2513 ins_encode %{ 2514 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2515 %} 2516 ins_pipe( pipe_slow ); 2517 %} 2518 2519 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 2520 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2521 match(Set dst (AddVS src1 src2)); 2522 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 2523 ins_encode %{ 2524 bool vector256 = false; 2525 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2526 %} 2527 ins_pipe( pipe_slow ); 2528 %} 2529 2530 instruct vadd4S(vecD dst, vecD src) %{ 2531 predicate(n->as_Vector()->length() == 4); 2532 match(Set dst (AddVS dst src)); 2533 format %{ "paddw $dst,$src\t! add packed4S" %} 2534 ins_encode %{ 2535 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2536 %} 2537 ins_pipe( pipe_slow ); 2538 %} 2539 2540 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 2541 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2542 match(Set dst (AddVS src1 src2)); 2543 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 2544 ins_encode %{ 2545 bool vector256 = false; 2546 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2547 %} 2548 ins_pipe( pipe_slow ); 2549 %} 2550 2551 instruct vadd8S(vecX dst, vecX src) %{ 2552 predicate(n->as_Vector()->length() == 8); 2553 match(Set dst (AddVS dst src)); 2554 format %{ "paddw $dst,$src\t! add packed8S" %} 2555 ins_encode %{ 2556 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2557 %} 2558 ins_pipe( pipe_slow ); 2559 %} 2560 2561 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 2562 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2563 match(Set dst (AddVS src1 src2)); 2564 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 2565 ins_encode %{ 2566 bool vector256 = false; 2567 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2568 %} 2569 ins_pipe( pipe_slow ); 2570 %} 2571 2572 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 2573 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2574 match(Set dst (AddVS src (LoadVector mem))); 2575 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 2576 ins_encode %{ 2577 bool vector256 = false; 2578 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2579 %} 2580 ins_pipe( pipe_slow ); 2581 %} 2582 2583 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 2584 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2585 match(Set dst (AddVS src1 src2)); 2586 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 2587 ins_encode %{ 2588 bool vector256 = true; 2589 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2590 %} 2591 ins_pipe( pipe_slow ); 2592 %} 2593 2594 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 2595 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2596 match(Set dst (AddVS src (LoadVector mem))); 2597 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 2598 ins_encode %{ 2599 bool vector256 = true; 2600 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2601 %} 2602 ins_pipe( pipe_slow ); 2603 %} 2604 2605 // Integers vector add 2606 instruct vadd2I(vecD dst, vecD src) %{ 2607 predicate(n->as_Vector()->length() == 2); 2608 match(Set dst (AddVI dst src)); 2609 format %{ "paddd $dst,$src\t! add packed2I" %} 2610 ins_encode %{ 2611 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2612 %} 2613 ins_pipe( pipe_slow ); 2614 %} 2615 2616 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 2617 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2618 match(Set dst (AddVI src1 src2)); 2619 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 2620 ins_encode %{ 2621 bool vector256 = false; 2622 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2623 %} 2624 ins_pipe( pipe_slow ); 2625 %} 2626 2627 instruct vadd4I(vecX dst, vecX src) %{ 2628 predicate(n->as_Vector()->length() == 4); 2629 match(Set dst (AddVI dst src)); 2630 format %{ "paddd $dst,$src\t! add packed4I" %} 2631 ins_encode %{ 2632 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2633 %} 2634 ins_pipe( pipe_slow ); 2635 %} 2636 2637 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 2638 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2639 match(Set dst (AddVI src1 src2)); 2640 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 2641 ins_encode %{ 2642 bool vector256 = false; 2643 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2644 %} 2645 ins_pipe( pipe_slow ); 2646 %} 2647 2648 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 2649 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2650 match(Set dst (AddVI src (LoadVector mem))); 2651 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 2652 ins_encode %{ 2653 bool vector256 = false; 2654 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2655 %} 2656 ins_pipe( pipe_slow ); 2657 %} 2658 2659 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 2660 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2661 match(Set dst (AddVI src1 src2)); 2662 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 2663 ins_encode %{ 2664 bool vector256 = true; 2665 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2666 %} 2667 ins_pipe( pipe_slow ); 2668 %} 2669 2670 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 2671 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2672 match(Set dst (AddVI src (LoadVector mem))); 2673 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 2674 ins_encode %{ 2675 bool vector256 = true; 2676 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2677 %} 2678 ins_pipe( pipe_slow ); 2679 %} 2680 2681 // Longs vector add 2682 instruct vadd2L(vecX dst, vecX src) %{ 2683 predicate(n->as_Vector()->length() == 2); 2684 match(Set dst (AddVL dst src)); 2685 format %{ "paddq $dst,$src\t! add packed2L" %} 2686 ins_encode %{ 2687 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 2688 %} 2689 ins_pipe( pipe_slow ); 2690 %} 2691 2692 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 2693 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2694 match(Set dst (AddVL src1 src2)); 2695 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 2696 ins_encode %{ 2697 bool vector256 = false; 2698 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2699 %} 2700 ins_pipe( pipe_slow ); 2701 %} 2702 2703 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 2704 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2705 match(Set dst (AddVL src (LoadVector mem))); 2706 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 2707 ins_encode %{ 2708 bool vector256 = false; 2709 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2710 %} 2711 ins_pipe( pipe_slow ); 2712 %} 2713 2714 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 2715 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2716 match(Set dst (AddVL src1 src2)); 2717 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 2718 ins_encode %{ 2719 bool vector256 = true; 2720 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2721 %} 2722 ins_pipe( pipe_slow ); 2723 %} 2724 2725 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 2726 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2727 match(Set dst (AddVL src (LoadVector mem))); 2728 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 2729 ins_encode %{ 2730 bool vector256 = true; 2731 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2732 %} 2733 ins_pipe( pipe_slow ); 2734 %} 2735 2736 // Floats vector add 2737 instruct vadd2F(vecD dst, vecD src) %{ 2738 predicate(n->as_Vector()->length() == 2); 2739 match(Set dst (AddVF dst src)); 2740 format %{ "addps $dst,$src\t! add packed2F" %} 2741 ins_encode %{ 2742 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2743 %} 2744 ins_pipe( pipe_slow ); 2745 %} 2746 2747 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 2748 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2749 match(Set dst (AddVF src1 src2)); 2750 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 2751 ins_encode %{ 2752 bool vector256 = false; 2753 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2754 %} 2755 ins_pipe( pipe_slow ); 2756 %} 2757 2758 instruct vadd4F(vecX dst, vecX src) %{ 2759 predicate(n->as_Vector()->length() == 4); 2760 match(Set dst (AddVF dst src)); 2761 format %{ "addps $dst,$src\t! add packed4F" %} 2762 ins_encode %{ 2763 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2764 %} 2765 ins_pipe( pipe_slow ); 2766 %} 2767 2768 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 2769 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2770 match(Set dst (AddVF src1 src2)); 2771 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 2772 ins_encode %{ 2773 bool vector256 = false; 2774 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2775 %} 2776 ins_pipe( pipe_slow ); 2777 %} 2778 2779 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 2780 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2781 match(Set dst (AddVF src (LoadVector mem))); 2782 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 2783 ins_encode %{ 2784 bool vector256 = false; 2785 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2786 %} 2787 ins_pipe( pipe_slow ); 2788 %} 2789 2790 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 2791 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2792 match(Set dst (AddVF src1 src2)); 2793 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 2794 ins_encode %{ 2795 bool vector256 = true; 2796 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2797 %} 2798 ins_pipe( pipe_slow ); 2799 %} 2800 2801 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 2802 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2803 match(Set dst (AddVF src (LoadVector mem))); 2804 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 2805 ins_encode %{ 2806 bool vector256 = true; 2807 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2808 %} 2809 ins_pipe( pipe_slow ); 2810 %} 2811 2812 // Doubles vector add 2813 instruct vadd2D(vecX dst, vecX src) %{ 2814 predicate(n->as_Vector()->length() == 2); 2815 match(Set dst (AddVD dst src)); 2816 format %{ "addpd $dst,$src\t! add packed2D" %} 2817 ins_encode %{ 2818 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 2819 %} 2820 ins_pipe( pipe_slow ); 2821 %} 2822 2823 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 2824 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2825 match(Set dst (AddVD src1 src2)); 2826 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 2827 ins_encode %{ 2828 bool vector256 = false; 2829 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2830 %} 2831 ins_pipe( pipe_slow ); 2832 %} 2833 2834 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 2835 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2836 match(Set dst (AddVD src (LoadVector mem))); 2837 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 2838 ins_encode %{ 2839 bool vector256 = false; 2840 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2841 %} 2842 ins_pipe( pipe_slow ); 2843 %} 2844 2845 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 2846 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2847 match(Set dst (AddVD src1 src2)); 2848 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 2849 ins_encode %{ 2850 bool vector256 = true; 2851 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2852 %} 2853 ins_pipe( pipe_slow ); 2854 %} 2855 2856 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 2857 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2858 match(Set dst (AddVD src (LoadVector mem))); 2859 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 2860 ins_encode %{ 2861 bool vector256 = true; 2862 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2863 %} 2864 ins_pipe( pipe_slow ); 2865 %} 2866 2867 // --------------------------------- SUB -------------------------------------- 2868 2869 // Bytes vector sub 2870 instruct vsub4B(vecS dst, vecS src) %{ 2871 predicate(n->as_Vector()->length() == 4); 2872 match(Set dst (SubVB dst src)); 2873 format %{ "psubb $dst,$src\t! sub packed4B" %} 2874 ins_encode %{ 2875 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2876 %} 2877 ins_pipe( pipe_slow ); 2878 %} 2879 2880 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 2881 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2882 match(Set dst (SubVB src1 src2)); 2883 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 2884 ins_encode %{ 2885 bool vector256 = false; 2886 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2887 %} 2888 ins_pipe( pipe_slow ); 2889 %} 2890 2891 instruct vsub8B(vecD dst, vecD src) %{ 2892 predicate(n->as_Vector()->length() == 8); 2893 match(Set dst (SubVB dst src)); 2894 format %{ "psubb $dst,$src\t! sub packed8B" %} 2895 ins_encode %{ 2896 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2897 %} 2898 ins_pipe( pipe_slow ); 2899 %} 2900 2901 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 2902 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2903 match(Set dst (SubVB src1 src2)); 2904 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 2905 ins_encode %{ 2906 bool vector256 = false; 2907 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 instruct vsub16B(vecX dst, vecX src) %{ 2913 predicate(n->as_Vector()->length() == 16); 2914 match(Set dst (SubVB dst src)); 2915 format %{ "psubb $dst,$src\t! sub packed16B" %} 2916 ins_encode %{ 2917 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2918 %} 2919 ins_pipe( pipe_slow ); 2920 %} 2921 2922 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 2923 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2924 match(Set dst (SubVB src1 src2)); 2925 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 2926 ins_encode %{ 2927 bool vector256 = false; 2928 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2929 %} 2930 ins_pipe( pipe_slow ); 2931 %} 2932 2933 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 2934 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2935 match(Set dst (SubVB src (LoadVector mem))); 2936 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 2937 ins_encode %{ 2938 bool vector256 = false; 2939 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2940 %} 2941 ins_pipe( pipe_slow ); 2942 %} 2943 2944 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 2945 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2946 match(Set dst (SubVB src1 src2)); 2947 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 2948 ins_encode %{ 2949 bool vector256 = true; 2950 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2951 %} 2952 ins_pipe( pipe_slow ); 2953 %} 2954 2955 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 2956 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2957 match(Set dst (SubVB src (LoadVector mem))); 2958 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 2959 ins_encode %{ 2960 bool vector256 = true; 2961 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2962 %} 2963 ins_pipe( pipe_slow ); 2964 %} 2965 2966 // Shorts/Chars vector sub 2967 instruct vsub2S(vecS dst, vecS src) %{ 2968 predicate(n->as_Vector()->length() == 2); 2969 match(Set dst (SubVS dst src)); 2970 format %{ "psubw $dst,$src\t! sub packed2S" %} 2971 ins_encode %{ 2972 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 2973 %} 2974 ins_pipe( pipe_slow ); 2975 %} 2976 2977 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 2978 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2979 match(Set dst (SubVS src1 src2)); 2980 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 2981 ins_encode %{ 2982 bool vector256 = false; 2983 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2984 %} 2985 ins_pipe( pipe_slow ); 2986 %} 2987 2988 instruct vsub4S(vecD dst, vecD src) %{ 2989 predicate(n->as_Vector()->length() == 4); 2990 match(Set dst (SubVS dst src)); 2991 format %{ "psubw $dst,$src\t! sub packed4S" %} 2992 ins_encode %{ 2993 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 2994 %} 2995 ins_pipe( pipe_slow ); 2996 %} 2997 2998 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 2999 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3000 match(Set dst (SubVS src1 src2)); 3001 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 3002 ins_encode %{ 3003 bool vector256 = false; 3004 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3005 %} 3006 ins_pipe( pipe_slow ); 3007 %} 3008 3009 instruct vsub8S(vecX dst, vecX src) %{ 3010 predicate(n->as_Vector()->length() == 8); 3011 match(Set dst (SubVS dst src)); 3012 format %{ "psubw $dst,$src\t! sub packed8S" %} 3013 ins_encode %{ 3014 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3015 %} 3016 ins_pipe( pipe_slow ); 3017 %} 3018 3019 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3020 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3021 match(Set dst (SubVS src1 src2)); 3022 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3023 ins_encode %{ 3024 bool vector256 = false; 3025 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3026 %} 3027 ins_pipe( pipe_slow ); 3028 %} 3029 3030 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3031 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3032 match(Set dst (SubVS src (LoadVector mem))); 3033 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3034 ins_encode %{ 3035 bool vector256 = false; 3036 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3037 %} 3038 ins_pipe( pipe_slow ); 3039 %} 3040 3041 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3042 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3043 match(Set dst (SubVS src1 src2)); 3044 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3045 ins_encode %{ 3046 bool vector256 = true; 3047 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3048 %} 3049 ins_pipe( pipe_slow ); 3050 %} 3051 3052 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3053 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3054 match(Set dst (SubVS src (LoadVector mem))); 3055 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3056 ins_encode %{ 3057 bool vector256 = true; 3058 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3059 %} 3060 ins_pipe( pipe_slow ); 3061 %} 3062 3063 // Integers vector sub 3064 instruct vsub2I(vecD dst, vecD src) %{ 3065 predicate(n->as_Vector()->length() == 2); 3066 match(Set dst (SubVI dst src)); 3067 format %{ "psubd $dst,$src\t! sub packed2I" %} 3068 ins_encode %{ 3069 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3070 %} 3071 ins_pipe( pipe_slow ); 3072 %} 3073 3074 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3075 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3076 match(Set dst (SubVI src1 src2)); 3077 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3078 ins_encode %{ 3079 bool vector256 = false; 3080 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3081 %} 3082 ins_pipe( pipe_slow ); 3083 %} 3084 3085 instruct vsub4I(vecX dst, vecX src) %{ 3086 predicate(n->as_Vector()->length() == 4); 3087 match(Set dst (SubVI dst src)); 3088 format %{ "psubd $dst,$src\t! sub packed4I" %} 3089 ins_encode %{ 3090 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3091 %} 3092 ins_pipe( pipe_slow ); 3093 %} 3094 3095 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3096 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3097 match(Set dst (SubVI src1 src2)); 3098 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3099 ins_encode %{ 3100 bool vector256 = false; 3101 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3102 %} 3103 ins_pipe( pipe_slow ); 3104 %} 3105 3106 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3107 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3108 match(Set dst (SubVI src (LoadVector mem))); 3109 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3110 ins_encode %{ 3111 bool vector256 = false; 3112 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3113 %} 3114 ins_pipe( pipe_slow ); 3115 %} 3116 3117 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3118 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3119 match(Set dst (SubVI src1 src2)); 3120 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3121 ins_encode %{ 3122 bool vector256 = true; 3123 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3124 %} 3125 ins_pipe( pipe_slow ); 3126 %} 3127 3128 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3129 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3130 match(Set dst (SubVI src (LoadVector mem))); 3131 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3132 ins_encode %{ 3133 bool vector256 = true; 3134 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3135 %} 3136 ins_pipe( pipe_slow ); 3137 %} 3138 3139 // Longs vector sub 3140 instruct vsub2L(vecX dst, vecX src) %{ 3141 predicate(n->as_Vector()->length() == 2); 3142 match(Set dst (SubVL dst src)); 3143 format %{ "psubq $dst,$src\t! sub packed2L" %} 3144 ins_encode %{ 3145 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3146 %} 3147 ins_pipe( pipe_slow ); 3148 %} 3149 3150 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3151 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3152 match(Set dst (SubVL src1 src2)); 3153 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3154 ins_encode %{ 3155 bool vector256 = false; 3156 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3157 %} 3158 ins_pipe( pipe_slow ); 3159 %} 3160 3161 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3162 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3163 match(Set dst (SubVL src (LoadVector mem))); 3164 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3165 ins_encode %{ 3166 bool vector256 = false; 3167 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3168 %} 3169 ins_pipe( pipe_slow ); 3170 %} 3171 3172 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3173 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3174 match(Set dst (SubVL src1 src2)); 3175 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3176 ins_encode %{ 3177 bool vector256 = true; 3178 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3179 %} 3180 ins_pipe( pipe_slow ); 3181 %} 3182 3183 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3184 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3185 match(Set dst (SubVL src (LoadVector mem))); 3186 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3187 ins_encode %{ 3188 bool vector256 = true; 3189 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3190 %} 3191 ins_pipe( pipe_slow ); 3192 %} 3193 3194 // Floats vector sub 3195 instruct vsub2F(vecD dst, vecD src) %{ 3196 predicate(n->as_Vector()->length() == 2); 3197 match(Set dst (SubVF dst src)); 3198 format %{ "subps $dst,$src\t! sub packed2F" %} 3199 ins_encode %{ 3200 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3201 %} 3202 ins_pipe( pipe_slow ); 3203 %} 3204 3205 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3206 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3207 match(Set dst (SubVF src1 src2)); 3208 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3209 ins_encode %{ 3210 bool vector256 = false; 3211 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3212 %} 3213 ins_pipe( pipe_slow ); 3214 %} 3215 3216 instruct vsub4F(vecX dst, vecX src) %{ 3217 predicate(n->as_Vector()->length() == 4); 3218 match(Set dst (SubVF dst src)); 3219 format %{ "subps $dst,$src\t! sub packed4F" %} 3220 ins_encode %{ 3221 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3222 %} 3223 ins_pipe( pipe_slow ); 3224 %} 3225 3226 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3227 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3228 match(Set dst (SubVF src1 src2)); 3229 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3230 ins_encode %{ 3231 bool vector256 = false; 3232 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3233 %} 3234 ins_pipe( pipe_slow ); 3235 %} 3236 3237 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3238 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3239 match(Set dst (SubVF src (LoadVector mem))); 3240 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3241 ins_encode %{ 3242 bool vector256 = false; 3243 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3244 %} 3245 ins_pipe( pipe_slow ); 3246 %} 3247 3248 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3249 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3250 match(Set dst (SubVF src1 src2)); 3251 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3252 ins_encode %{ 3253 bool vector256 = true; 3254 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3255 %} 3256 ins_pipe( pipe_slow ); 3257 %} 3258 3259 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3260 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3261 match(Set dst (SubVF src (LoadVector mem))); 3262 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3263 ins_encode %{ 3264 bool vector256 = true; 3265 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3266 %} 3267 ins_pipe( pipe_slow ); 3268 %} 3269 3270 // Doubles vector sub 3271 instruct vsub2D(vecX dst, vecX src) %{ 3272 predicate(n->as_Vector()->length() == 2); 3273 match(Set dst (SubVD dst src)); 3274 format %{ "subpd $dst,$src\t! sub packed2D" %} 3275 ins_encode %{ 3276 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3277 %} 3278 ins_pipe( pipe_slow ); 3279 %} 3280 3281 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3282 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3283 match(Set dst (SubVD src1 src2)); 3284 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3285 ins_encode %{ 3286 bool vector256 = false; 3287 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3288 %} 3289 ins_pipe( pipe_slow ); 3290 %} 3291 3292 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3293 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3294 match(Set dst (SubVD src (LoadVector mem))); 3295 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3296 ins_encode %{ 3297 bool vector256 = false; 3298 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3299 %} 3300 ins_pipe( pipe_slow ); 3301 %} 3302 3303 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 3304 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3305 match(Set dst (SubVD src1 src2)); 3306 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 3307 ins_encode %{ 3308 bool vector256 = true; 3309 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3310 %} 3311 ins_pipe( pipe_slow ); 3312 %} 3313 3314 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 3315 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3316 match(Set dst (SubVD src (LoadVector mem))); 3317 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 3318 ins_encode %{ 3319 bool vector256 = true; 3320 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3321 %} 3322 ins_pipe( pipe_slow ); 3323 %} 3324 3325 // --------------------------------- MUL -------------------------------------- 3326 3327 // Shorts/Chars vector mul 3328 instruct vmul2S(vecS dst, vecS src) %{ 3329 predicate(n->as_Vector()->length() == 2); 3330 match(Set dst (MulVS dst src)); 3331 format %{ "pmullw $dst,$src\t! mul packed2S" %} 3332 ins_encode %{ 3333 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3334 %} 3335 ins_pipe( pipe_slow ); 3336 %} 3337 3338 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 3339 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3340 match(Set dst (MulVS src1 src2)); 3341 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 3342 ins_encode %{ 3343 bool vector256 = false; 3344 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3345 %} 3346 ins_pipe( pipe_slow ); 3347 %} 3348 3349 instruct vmul4S(vecD dst, vecD src) %{ 3350 predicate(n->as_Vector()->length() == 4); 3351 match(Set dst (MulVS dst src)); 3352 format %{ "pmullw $dst,$src\t! mul packed4S" %} 3353 ins_encode %{ 3354 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3355 %} 3356 ins_pipe( pipe_slow ); 3357 %} 3358 3359 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 3360 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3361 match(Set dst (MulVS src1 src2)); 3362 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 3363 ins_encode %{ 3364 bool vector256 = false; 3365 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3366 %} 3367 ins_pipe( pipe_slow ); 3368 %} 3369 3370 instruct vmul8S(vecX dst, vecX src) %{ 3371 predicate(n->as_Vector()->length() == 8); 3372 match(Set dst (MulVS dst src)); 3373 format %{ "pmullw $dst,$src\t! mul packed8S" %} 3374 ins_encode %{ 3375 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3376 %} 3377 ins_pipe( pipe_slow ); 3378 %} 3379 3380 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 3381 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3382 match(Set dst (MulVS src1 src2)); 3383 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 3384 ins_encode %{ 3385 bool vector256 = false; 3386 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3387 %} 3388 ins_pipe( pipe_slow ); 3389 %} 3390 3391 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 3392 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3393 match(Set dst (MulVS src (LoadVector mem))); 3394 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 3395 ins_encode %{ 3396 bool vector256 = false; 3397 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3398 %} 3399 ins_pipe( pipe_slow ); 3400 %} 3401 3402 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 3403 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3404 match(Set dst (MulVS src1 src2)); 3405 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 3406 ins_encode %{ 3407 bool vector256 = true; 3408 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3409 %} 3410 ins_pipe( pipe_slow ); 3411 %} 3412 3413 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 3414 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3415 match(Set dst (MulVS src (LoadVector mem))); 3416 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 3417 ins_encode %{ 3418 bool vector256 = true; 3419 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3420 %} 3421 ins_pipe( pipe_slow ); 3422 %} 3423 3424 // Integers vector mul (sse4_1) 3425 instruct vmul2I(vecD dst, vecD src) %{ 3426 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 3427 match(Set dst (MulVI dst src)); 3428 format %{ "pmulld $dst,$src\t! mul packed2I" %} 3429 ins_encode %{ 3430 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3431 %} 3432 ins_pipe( pipe_slow ); 3433 %} 3434 3435 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 3436 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3437 match(Set dst (MulVI src1 src2)); 3438 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 3439 ins_encode %{ 3440 bool vector256 = false; 3441 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3442 %} 3443 ins_pipe( pipe_slow ); 3444 %} 3445 3446 instruct vmul4I(vecX dst, vecX src) %{ 3447 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 3448 match(Set dst (MulVI dst src)); 3449 format %{ "pmulld $dst,$src\t! mul packed4I" %} 3450 ins_encode %{ 3451 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3452 %} 3453 ins_pipe( pipe_slow ); 3454 %} 3455 3456 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 3457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3458 match(Set dst (MulVI src1 src2)); 3459 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 3460 ins_encode %{ 3461 bool vector256 = false; 3462 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3463 %} 3464 ins_pipe( pipe_slow ); 3465 %} 3466 3467 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 3468 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3469 match(Set dst (MulVI src (LoadVector mem))); 3470 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 3471 ins_encode %{ 3472 bool vector256 = false; 3473 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3474 %} 3475 ins_pipe( pipe_slow ); 3476 %} 3477 3478 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 3479 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3480 match(Set dst (MulVI src1 src2)); 3481 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 3482 ins_encode %{ 3483 bool vector256 = true; 3484 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3485 %} 3486 ins_pipe( pipe_slow ); 3487 %} 3488 3489 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 3490 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3491 match(Set dst (MulVI src (LoadVector mem))); 3492 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 3493 ins_encode %{ 3494 bool vector256 = true; 3495 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3496 %} 3497 ins_pipe( pipe_slow ); 3498 %} 3499 3500 // Floats vector mul 3501 instruct vmul2F(vecD dst, vecD src) %{ 3502 predicate(n->as_Vector()->length() == 2); 3503 match(Set dst (MulVF dst src)); 3504 format %{ "mulps $dst,$src\t! mul packed2F" %} 3505 ins_encode %{ 3506 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3507 %} 3508 ins_pipe( pipe_slow ); 3509 %} 3510 3511 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 3512 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3513 match(Set dst (MulVF src1 src2)); 3514 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 3515 ins_encode %{ 3516 bool vector256 = false; 3517 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3518 %} 3519 ins_pipe( pipe_slow ); 3520 %} 3521 3522 instruct vmul4F(vecX dst, vecX src) %{ 3523 predicate(n->as_Vector()->length() == 4); 3524 match(Set dst (MulVF dst src)); 3525 format %{ "mulps $dst,$src\t! mul packed4F" %} 3526 ins_encode %{ 3527 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3528 %} 3529 ins_pipe( pipe_slow ); 3530 %} 3531 3532 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 3533 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3534 match(Set dst (MulVF src1 src2)); 3535 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 3536 ins_encode %{ 3537 bool vector256 = false; 3538 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3539 %} 3540 ins_pipe( pipe_slow ); 3541 %} 3542 3543 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 3544 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3545 match(Set dst (MulVF src (LoadVector mem))); 3546 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 3547 ins_encode %{ 3548 bool vector256 = false; 3549 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3550 %} 3551 ins_pipe( pipe_slow ); 3552 %} 3553 3554 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 3555 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3556 match(Set dst (MulVF src1 src2)); 3557 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 3558 ins_encode %{ 3559 bool vector256 = true; 3560 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3561 %} 3562 ins_pipe( pipe_slow ); 3563 %} 3564 3565 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 3566 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3567 match(Set dst (MulVF src (LoadVector mem))); 3568 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 3569 ins_encode %{ 3570 bool vector256 = true; 3571 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3572 %} 3573 ins_pipe( pipe_slow ); 3574 %} 3575 3576 // Doubles vector mul 3577 instruct vmul2D(vecX dst, vecX src) %{ 3578 predicate(n->as_Vector()->length() == 2); 3579 match(Set dst (MulVD dst src)); 3580 format %{ "mulpd $dst,$src\t! mul packed2D" %} 3581 ins_encode %{ 3582 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 3583 %} 3584 ins_pipe( pipe_slow ); 3585 %} 3586 3587 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 3588 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3589 match(Set dst (MulVD src1 src2)); 3590 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 3591 ins_encode %{ 3592 bool vector256 = false; 3593 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3594 %} 3595 ins_pipe( pipe_slow ); 3596 %} 3597 3598 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 3599 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3600 match(Set dst (MulVD src (LoadVector mem))); 3601 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 3602 ins_encode %{ 3603 bool vector256 = false; 3604 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3605 %} 3606 ins_pipe( pipe_slow ); 3607 %} 3608 3609 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 3610 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3611 match(Set dst (MulVD src1 src2)); 3612 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 3613 ins_encode %{ 3614 bool vector256 = true; 3615 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3616 %} 3617 ins_pipe( pipe_slow ); 3618 %} 3619 3620 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 3621 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3622 match(Set dst (MulVD src (LoadVector mem))); 3623 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 3624 ins_encode %{ 3625 bool vector256 = true; 3626 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3627 %} 3628 ins_pipe( pipe_slow ); 3629 %} 3630 3631 // --------------------------------- DIV -------------------------------------- 3632 3633 // Floats vector div 3634 instruct vdiv2F(vecD dst, vecD src) %{ 3635 predicate(n->as_Vector()->length() == 2); 3636 match(Set dst (DivVF dst src)); 3637 format %{ "divps $dst,$src\t! div packed2F" %} 3638 ins_encode %{ 3639 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3640 %} 3641 ins_pipe( pipe_slow ); 3642 %} 3643 3644 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 3645 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3646 match(Set dst (DivVF src1 src2)); 3647 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 3648 ins_encode %{ 3649 bool vector256 = false; 3650 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3651 %} 3652 ins_pipe( pipe_slow ); 3653 %} 3654 3655 instruct vdiv4F(vecX dst, vecX src) %{ 3656 predicate(n->as_Vector()->length() == 4); 3657 match(Set dst (DivVF dst src)); 3658 format %{ "divps $dst,$src\t! div packed4F" %} 3659 ins_encode %{ 3660 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3661 %} 3662 ins_pipe( pipe_slow ); 3663 %} 3664 3665 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 3666 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3667 match(Set dst (DivVF src1 src2)); 3668 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 3669 ins_encode %{ 3670 bool vector256 = false; 3671 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3672 %} 3673 ins_pipe( pipe_slow ); 3674 %} 3675 3676 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 3677 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3678 match(Set dst (DivVF src (LoadVector mem))); 3679 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 3680 ins_encode %{ 3681 bool vector256 = false; 3682 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 3688 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3689 match(Set dst (DivVF src1 src2)); 3690 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 3691 ins_encode %{ 3692 bool vector256 = true; 3693 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 3699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3700 match(Set dst (DivVF src (LoadVector mem))); 3701 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 3702 ins_encode %{ 3703 bool vector256 = true; 3704 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3705 %} 3706 ins_pipe( pipe_slow ); 3707 %} 3708 3709 // Doubles vector div 3710 instruct vdiv2D(vecX dst, vecX src) %{ 3711 predicate(n->as_Vector()->length() == 2); 3712 match(Set dst (DivVD dst src)); 3713 format %{ "divpd $dst,$src\t! div packed2D" %} 3714 ins_encode %{ 3715 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 3721 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3722 match(Set dst (DivVD src1 src2)); 3723 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 3724 ins_encode %{ 3725 bool vector256 = false; 3726 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3733 match(Set dst (DivVD src (LoadVector mem))); 3734 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 3735 ins_encode %{ 3736 bool vector256 = false; 3737 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3738 %} 3739 ins_pipe( pipe_slow ); 3740 %} 3741 3742 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 3743 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3744 match(Set dst (DivVD src1 src2)); 3745 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 3746 ins_encode %{ 3747 bool vector256 = true; 3748 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3749 %} 3750 ins_pipe( pipe_slow ); 3751 %} 3752 3753 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 3754 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3755 match(Set dst (DivVD src (LoadVector mem))); 3756 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 3757 ins_encode %{ 3758 bool vector256 = true; 3759 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3760 %} 3761 ins_pipe( pipe_slow ); 3762 %} 3763 3764 // ------------------------------ LeftShift ----------------------------------- 3765 3766 // Shorts/Chars vector left shift 3767 instruct vsll2S(vecS dst, regF shift) %{ 3768 predicate(n->as_Vector()->length() == 2); 3769 match(Set dst (LShiftVS dst shift)); 3770 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3771 ins_encode %{ 3772 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3773 %} 3774 ins_pipe( pipe_slow ); 3775 %} 3776 3777 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 3778 predicate(n->as_Vector()->length() == 2); 3779 match(Set dst (LShiftVS dst shift)); 3780 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3781 ins_encode %{ 3782 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3783 %} 3784 ins_pipe( pipe_slow ); 3785 %} 3786 3787 instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{ 3788 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3789 match(Set dst (LShiftVS src shift)); 3790 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3791 ins_encode %{ 3792 bool vector256 = false; 3793 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 3799 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3800 match(Set dst (LShiftVS src shift)); 3801 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3802 ins_encode %{ 3803 bool vector256 = false; 3804 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3805 %} 3806 ins_pipe( pipe_slow ); 3807 %} 3808 3809 instruct vsll4S(vecD dst, regF shift) %{ 3810 predicate(n->as_Vector()->length() == 4); 3811 match(Set dst (LShiftVS dst shift)); 3812 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3813 ins_encode %{ 3814 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3815 %} 3816 ins_pipe( pipe_slow ); 3817 %} 3818 3819 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 3820 predicate(n->as_Vector()->length() == 4); 3821 match(Set dst (LShiftVS dst shift)); 3822 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3823 ins_encode %{ 3824 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3825 %} 3826 ins_pipe( pipe_slow ); 3827 %} 3828 3829 instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{ 3830 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3831 match(Set dst (LShiftVS src shift)); 3832 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3833 ins_encode %{ 3834 bool vector256 = false; 3835 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3836 %} 3837 ins_pipe( pipe_slow ); 3838 %} 3839 3840 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3841 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3842 match(Set dst (LShiftVS src shift)); 3843 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3844 ins_encode %{ 3845 bool vector256 = false; 3846 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3847 %} 3848 ins_pipe( pipe_slow ); 3849 %} 3850 3851 instruct vsll8S(vecX dst, regF shift) %{ 3852 predicate(n->as_Vector()->length() == 8); 3853 match(Set dst (LShiftVS dst shift)); 3854 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3855 ins_encode %{ 3856 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3857 %} 3858 ins_pipe( pipe_slow ); 3859 %} 3860 3861 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 3862 predicate(n->as_Vector()->length() == 8); 3863 match(Set dst (LShiftVS dst shift)); 3864 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3865 ins_encode %{ 3866 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3867 %} 3868 ins_pipe( pipe_slow ); 3869 %} 3870 3871 instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{ 3872 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3873 match(Set dst (LShiftVS src shift)); 3874 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 3875 ins_encode %{ 3876 bool vector256 = false; 3877 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3878 %} 3879 ins_pipe( pipe_slow ); 3880 %} 3881 3882 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 3883 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3884 match(Set dst (LShiftVS src shift)); 3885 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 3886 ins_encode %{ 3887 bool vector256 = false; 3888 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3889 %} 3890 ins_pipe( pipe_slow ); 3891 %} 3892 3893 instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{ 3894 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3895 match(Set dst (LShiftVS src shift)); 3896 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 3897 ins_encode %{ 3898 bool vector256 = true; 3899 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3900 %} 3901 ins_pipe( pipe_slow ); 3902 %} 3903 3904 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 3905 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3906 match(Set dst (LShiftVS src shift)); 3907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 3908 ins_encode %{ 3909 bool vector256 = true; 3910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3911 %} 3912 ins_pipe( pipe_slow ); 3913 %} 3914 3915 // Integers vector left shift 3916 instruct vsll2I(vecD dst, regF shift) %{ 3917 predicate(n->as_Vector()->length() == 2); 3918 match(Set dst (LShiftVI dst shift)); 3919 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 3920 ins_encode %{ 3921 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 3922 %} 3923 ins_pipe( pipe_slow ); 3924 %} 3925 3926 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 3927 predicate(n->as_Vector()->length() == 2); 3928 match(Set dst (LShiftVI dst shift)); 3929 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 3930 ins_encode %{ 3931 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 3932 %} 3933 ins_pipe( pipe_slow ); 3934 %} 3935 3936 instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{ 3937 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3938 match(Set dst (LShiftVI src shift)); 3939 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 3940 ins_encode %{ 3941 bool vector256 = false; 3942 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3943 %} 3944 ins_pipe( pipe_slow ); 3945 %} 3946 3947 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3948 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3949 match(Set dst (LShiftVI src shift)); 3950 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 3951 ins_encode %{ 3952 bool vector256 = false; 3953 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3954 %} 3955 ins_pipe( pipe_slow ); 3956 %} 3957 3958 instruct vsll4I(vecX dst, regF shift) %{ 3959 predicate(n->as_Vector()->length() == 4); 3960 match(Set dst (LShiftVI dst shift)); 3961 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 3962 ins_encode %{ 3963 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 3964 %} 3965 ins_pipe( pipe_slow ); 3966 %} 3967 3968 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 3969 predicate(n->as_Vector()->length() == 4); 3970 match(Set dst (LShiftVI dst shift)); 3971 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 3972 ins_encode %{ 3973 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 3974 %} 3975 ins_pipe( pipe_slow ); 3976 %} 3977 3978 instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{ 3979 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3980 match(Set dst (LShiftVI src shift)); 3981 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 3982 ins_encode %{ 3983 bool vector256 = false; 3984 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3985 %} 3986 ins_pipe( pipe_slow ); 3987 %} 3988 3989 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 3990 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3991 match(Set dst (LShiftVI src shift)); 3992 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 3993 ins_encode %{ 3994 bool vector256 = false; 3995 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3996 %} 3997 ins_pipe( pipe_slow ); 3998 %} 3999 4000 instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{ 4001 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4002 match(Set dst (LShiftVI src shift)); 4003 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4004 ins_encode %{ 4005 bool vector256 = true; 4006 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4007 %} 4008 ins_pipe( pipe_slow ); 4009 %} 4010 4011 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4012 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4013 match(Set dst (LShiftVI src shift)); 4014 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4015 ins_encode %{ 4016 bool vector256 = true; 4017 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4018 %} 4019 ins_pipe( pipe_slow ); 4020 %} 4021 4022 // Longs vector left shift 4023 instruct vsll2L(vecX dst, regF shift) %{ 4024 predicate(n->as_Vector()->length() == 2); 4025 match(Set dst (LShiftVL dst shift)); 4026 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4027 ins_encode %{ 4028 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4029 %} 4030 ins_pipe( pipe_slow ); 4031 %} 4032 4033 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4034 predicate(n->as_Vector()->length() == 2); 4035 match(Set dst (LShiftVL dst shift)); 4036 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4037 ins_encode %{ 4038 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4039 %} 4040 ins_pipe( pipe_slow ); 4041 %} 4042 4043 instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{ 4044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4045 match(Set dst (LShiftVL src shift)); 4046 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4047 ins_encode %{ 4048 bool vector256 = false; 4049 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4050 %} 4051 ins_pipe( pipe_slow ); 4052 %} 4053 4054 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4055 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4056 match(Set dst (LShiftVL src shift)); 4057 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4058 ins_encode %{ 4059 bool vector256 = false; 4060 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4061 %} 4062 ins_pipe( pipe_slow ); 4063 %} 4064 4065 instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{ 4066 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4067 match(Set dst (LShiftVL src shift)); 4068 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4069 ins_encode %{ 4070 bool vector256 = true; 4071 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4072 %} 4073 ins_pipe( pipe_slow ); 4074 %} 4075 4076 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4077 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4078 match(Set dst (LShiftVL src shift)); 4079 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4080 ins_encode %{ 4081 bool vector256 = true; 4082 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4083 %} 4084 ins_pipe( pipe_slow ); 4085 %} 4086 4087 // ----------------------- LogicalRightShift ----------------------------------- 4088 4089 // Shorts/Chars vector logical right shift produces incorrect Java result 4090 // for negative data because java code convert short value into int with 4091 // sign extension before a shift. 4092 4093 // Integers vector logical right shift 4094 instruct vsrl2I(vecD dst, regF shift) %{ 4095 predicate(n->as_Vector()->length() == 2); 4096 match(Set dst (URShiftVI dst shift)); 4097 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4098 ins_encode %{ 4099 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4100 %} 4101 ins_pipe( pipe_slow ); 4102 %} 4103 4104 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4105 predicate(n->as_Vector()->length() == 2); 4106 match(Set dst (URShiftVI dst shift)); 4107 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4108 ins_encode %{ 4109 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 4114 instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{ 4115 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4116 match(Set dst (URShiftVI src shift)); 4117 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4118 ins_encode %{ 4119 bool vector256 = false; 4120 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4121 %} 4122 ins_pipe( pipe_slow ); 4123 %} 4124 4125 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4126 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4127 match(Set dst (URShiftVI src shift)); 4128 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4129 ins_encode %{ 4130 bool vector256 = false; 4131 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4132 %} 4133 ins_pipe( pipe_slow ); 4134 %} 4135 4136 instruct vsrl4I(vecX dst, regF shift) %{ 4137 predicate(n->as_Vector()->length() == 4); 4138 match(Set dst (URShiftVI dst shift)); 4139 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4140 ins_encode %{ 4141 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4142 %} 4143 ins_pipe( pipe_slow ); 4144 %} 4145 4146 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 4147 predicate(n->as_Vector()->length() == 4); 4148 match(Set dst (URShiftVI dst shift)); 4149 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4150 ins_encode %{ 4151 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4152 %} 4153 ins_pipe( pipe_slow ); 4154 %} 4155 4156 instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{ 4157 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4158 match(Set dst (URShiftVI src shift)); 4159 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4160 ins_encode %{ 4161 bool vector256 = false; 4162 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4163 %} 4164 ins_pipe( pipe_slow ); 4165 %} 4166 4167 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4168 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4169 match(Set dst (URShiftVI src shift)); 4170 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4171 ins_encode %{ 4172 bool vector256 = false; 4173 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4174 %} 4175 ins_pipe( pipe_slow ); 4176 %} 4177 4178 instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{ 4179 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4180 match(Set dst (URShiftVI src shift)); 4181 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4182 ins_encode %{ 4183 bool vector256 = true; 4184 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4185 %} 4186 ins_pipe( pipe_slow ); 4187 %} 4188 4189 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4190 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4191 match(Set dst (URShiftVI src shift)); 4192 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4193 ins_encode %{ 4194 bool vector256 = true; 4195 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4196 %} 4197 ins_pipe( pipe_slow ); 4198 %} 4199 4200 // Longs vector logical right shift 4201 instruct vsrl2L(vecX dst, regF shift) %{ 4202 predicate(n->as_Vector()->length() == 2); 4203 match(Set dst (URShiftVL dst shift)); 4204 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4205 ins_encode %{ 4206 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4207 %} 4208 ins_pipe( pipe_slow ); 4209 %} 4210 4211 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 4212 predicate(n->as_Vector()->length() == 2); 4213 match(Set dst (URShiftVL dst shift)); 4214 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4215 ins_encode %{ 4216 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 4217 %} 4218 ins_pipe( pipe_slow ); 4219 %} 4220 4221 instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{ 4222 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4223 match(Set dst (URShiftVL src shift)); 4224 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4225 ins_encode %{ 4226 bool vector256 = false; 4227 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4228 %} 4229 ins_pipe( pipe_slow ); 4230 %} 4231 4232 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4234 match(Set dst (URShiftVL src shift)); 4235 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4236 ins_encode %{ 4237 bool vector256 = false; 4238 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4239 %} 4240 ins_pipe( pipe_slow ); 4241 %} 4242 4243 instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{ 4244 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4245 match(Set dst (URShiftVL src shift)); 4246 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4247 ins_encode %{ 4248 bool vector256 = true; 4249 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4250 %} 4251 ins_pipe( pipe_slow ); 4252 %} 4253 4254 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4255 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4256 match(Set dst (URShiftVL src shift)); 4257 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4258 ins_encode %{ 4259 bool vector256 = true; 4260 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4261 %} 4262 ins_pipe( pipe_slow ); 4263 %} 4264 4265 // ------------------- ArithmeticRightShift ----------------------------------- 4266 4267 // Shorts/Chars vector arithmetic right shift 4268 instruct vsra2S(vecS dst, regF shift) %{ 4269 predicate(n->as_Vector()->length() == 2); 4270 match(Set dst (RShiftVS dst shift)); 4271 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4272 ins_encode %{ 4273 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4274 %} 4275 ins_pipe( pipe_slow ); 4276 %} 4277 4278 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 4279 predicate(n->as_Vector()->length() == 2); 4280 match(Set dst (RShiftVS dst shift)); 4281 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4282 ins_encode %{ 4283 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4284 %} 4285 ins_pipe( pipe_slow ); 4286 %} 4287 4288 instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{ 4289 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4290 match(Set dst (RShiftVS src shift)); 4291 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4292 ins_encode %{ 4293 bool vector256 = false; 4294 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4295 %} 4296 ins_pipe( pipe_slow ); 4297 %} 4298 4299 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4300 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4301 match(Set dst (RShiftVS src shift)); 4302 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4303 ins_encode %{ 4304 bool vector256 = false; 4305 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4306 %} 4307 ins_pipe( pipe_slow ); 4308 %} 4309 4310 instruct vsra4S(vecD dst, regF shift) %{ 4311 predicate(n->as_Vector()->length() == 4); 4312 match(Set dst (RShiftVS dst shift)); 4313 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4314 ins_encode %{ 4315 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4316 %} 4317 ins_pipe( pipe_slow ); 4318 %} 4319 4320 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 4321 predicate(n->as_Vector()->length() == 4); 4322 match(Set dst (RShiftVS dst shift)); 4323 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4324 ins_encode %{ 4325 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4326 %} 4327 ins_pipe( pipe_slow ); 4328 %} 4329 4330 instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{ 4331 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4332 match(Set dst (RShiftVS src shift)); 4333 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4334 ins_encode %{ 4335 bool vector256 = false; 4336 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4337 %} 4338 ins_pipe( pipe_slow ); 4339 %} 4340 4341 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4342 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4343 match(Set dst (RShiftVS src shift)); 4344 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4345 ins_encode %{ 4346 bool vector256 = false; 4347 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct vsra8S(vecX dst, regF shift) %{ 4353 predicate(n->as_Vector()->length() == 8); 4354 match(Set dst (RShiftVS dst shift)); 4355 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4356 ins_encode %{ 4357 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4358 %} 4359 ins_pipe( pipe_slow ); 4360 %} 4361 4362 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 4363 predicate(n->as_Vector()->length() == 8); 4364 match(Set dst (RShiftVS dst shift)); 4365 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4366 ins_encode %{ 4367 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4368 %} 4369 ins_pipe( pipe_slow ); 4370 %} 4371 4372 instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{ 4373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4374 match(Set dst (RShiftVS src shift)); 4375 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4376 ins_encode %{ 4377 bool vector256 = false; 4378 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4379 %} 4380 ins_pipe( pipe_slow ); 4381 %} 4382 4383 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4384 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4385 match(Set dst (RShiftVS src shift)); 4386 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4387 ins_encode %{ 4388 bool vector256 = false; 4389 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4390 %} 4391 ins_pipe( pipe_slow ); 4392 %} 4393 4394 instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{ 4395 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4396 match(Set dst (RShiftVS src shift)); 4397 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4398 ins_encode %{ 4399 bool vector256 = true; 4400 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4401 %} 4402 ins_pipe( pipe_slow ); 4403 %} 4404 4405 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4406 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4407 match(Set dst (RShiftVS src shift)); 4408 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4409 ins_encode %{ 4410 bool vector256 = true; 4411 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4412 %} 4413 ins_pipe( pipe_slow ); 4414 %} 4415 4416 // Integers vector arithmetic right shift 4417 instruct vsra2I(vecD dst, regF shift) %{ 4418 predicate(n->as_Vector()->length() == 2); 4419 match(Set dst (RShiftVI dst shift)); 4420 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4421 ins_encode %{ 4422 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4423 %} 4424 ins_pipe( pipe_slow ); 4425 %} 4426 4427 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 4428 predicate(n->as_Vector()->length() == 2); 4429 match(Set dst (RShiftVI dst shift)); 4430 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4431 ins_encode %{ 4432 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4433 %} 4434 ins_pipe( pipe_slow ); 4435 %} 4436 4437 instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{ 4438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4439 match(Set dst (RShiftVI src shift)); 4440 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4441 ins_encode %{ 4442 bool vector256 = false; 4443 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4444 %} 4445 ins_pipe( pipe_slow ); 4446 %} 4447 4448 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4450 match(Set dst (RShiftVI src shift)); 4451 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4452 ins_encode %{ 4453 bool vector256 = false; 4454 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4455 %} 4456 ins_pipe( pipe_slow ); 4457 %} 4458 4459 instruct vsra4I(vecX dst, regF shift) %{ 4460 predicate(n->as_Vector()->length() == 4); 4461 match(Set dst (RShiftVI dst shift)); 4462 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4463 ins_encode %{ 4464 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 4470 predicate(n->as_Vector()->length() == 4); 4471 match(Set dst (RShiftVI dst shift)); 4472 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4473 ins_encode %{ 4474 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4475 %} 4476 ins_pipe( pipe_slow ); 4477 %} 4478 4479 instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{ 4480 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4481 match(Set dst (RShiftVI src shift)); 4482 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4483 ins_encode %{ 4484 bool vector256 = false; 4485 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4486 %} 4487 ins_pipe( pipe_slow ); 4488 %} 4489 4490 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4491 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4492 match(Set dst (RShiftVI src shift)); 4493 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4494 ins_encode %{ 4495 bool vector256 = false; 4496 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4497 %} 4498 ins_pipe( pipe_slow ); 4499 %} 4500 4501 instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{ 4502 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4503 match(Set dst (RShiftVI src shift)); 4504 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4505 ins_encode %{ 4506 bool vector256 = true; 4507 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4508 %} 4509 ins_pipe( pipe_slow ); 4510 %} 4511 4512 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4513 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4514 match(Set dst (RShiftVI src shift)); 4515 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4516 ins_encode %{ 4517 bool vector256 = true; 4518 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4519 %} 4520 ins_pipe( pipe_slow ); 4521 %} 4522 4523 // There are no longs vector arithmetic right shift instructions. 4524 4525 4526 // --------------------------------- AND -------------------------------------- 4527 4528 instruct vand4B(vecS dst, vecS src) %{ 4529 predicate(n->as_Vector()->length_in_bytes() == 4); 4530 match(Set dst (AndV dst src)); 4531 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 4532 ins_encode %{ 4533 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4534 %} 4535 ins_pipe( pipe_slow ); 4536 %} 4537 4538 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 4539 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4540 match(Set dst (AndV src1 src2)); 4541 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 4542 ins_encode %{ 4543 bool vector256 = false; 4544 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4545 %} 4546 ins_pipe( pipe_slow ); 4547 %} 4548 4549 instruct vand8B(vecD dst, vecD src) %{ 4550 predicate(n->as_Vector()->length_in_bytes() == 8); 4551 match(Set dst (AndV dst src)); 4552 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 4553 ins_encode %{ 4554 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4555 %} 4556 ins_pipe( pipe_slow ); 4557 %} 4558 4559 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 4560 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4561 match(Set dst (AndV src1 src2)); 4562 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 4563 ins_encode %{ 4564 bool vector256 = false; 4565 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4566 %} 4567 ins_pipe( pipe_slow ); 4568 %} 4569 4570 instruct vand16B(vecX dst, vecX src) %{ 4571 predicate(n->as_Vector()->length_in_bytes() == 16); 4572 match(Set dst (AndV dst src)); 4573 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 4574 ins_encode %{ 4575 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4576 %} 4577 ins_pipe( pipe_slow ); 4578 %} 4579 4580 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 4581 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4582 match(Set dst (AndV src1 src2)); 4583 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 4584 ins_encode %{ 4585 bool vector256 = false; 4586 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4587 %} 4588 ins_pipe( pipe_slow ); 4589 %} 4590 4591 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 4592 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4593 match(Set dst (AndV src (LoadVector mem))); 4594 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 4595 ins_encode %{ 4596 bool vector256 = false; 4597 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 4603 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4604 match(Set dst (AndV src1 src2)); 4605 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 4606 ins_encode %{ 4607 bool vector256 = true; 4608 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4609 %} 4610 ins_pipe( pipe_slow ); 4611 %} 4612 4613 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 4614 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4615 match(Set dst (AndV src (LoadVector mem))); 4616 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 4617 ins_encode %{ 4618 bool vector256 = true; 4619 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4620 %} 4621 ins_pipe( pipe_slow ); 4622 %} 4623 4624 // --------------------------------- OR --------------------------------------- 4625 4626 instruct vor4B(vecS dst, vecS src) %{ 4627 predicate(n->as_Vector()->length_in_bytes() == 4); 4628 match(Set dst (OrV dst src)); 4629 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 4630 ins_encode %{ 4631 __ por($dst$$XMMRegister, $src$$XMMRegister); 4632 %} 4633 ins_pipe( pipe_slow ); 4634 %} 4635 4636 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4637 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4638 match(Set dst (OrV src1 src2)); 4639 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 4640 ins_encode %{ 4641 bool vector256 = false; 4642 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4643 %} 4644 ins_pipe( pipe_slow ); 4645 %} 4646 4647 instruct vor8B(vecD dst, vecD src) %{ 4648 predicate(n->as_Vector()->length_in_bytes() == 8); 4649 match(Set dst (OrV dst src)); 4650 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 4651 ins_encode %{ 4652 __ por($dst$$XMMRegister, $src$$XMMRegister); 4653 %} 4654 ins_pipe( pipe_slow ); 4655 %} 4656 4657 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4658 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4659 match(Set dst (OrV src1 src2)); 4660 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 4661 ins_encode %{ 4662 bool vector256 = false; 4663 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4664 %} 4665 ins_pipe( pipe_slow ); 4666 %} 4667 4668 instruct vor16B(vecX dst, vecX src) %{ 4669 predicate(n->as_Vector()->length_in_bytes() == 16); 4670 match(Set dst (OrV dst src)); 4671 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 4672 ins_encode %{ 4673 __ por($dst$$XMMRegister, $src$$XMMRegister); 4674 %} 4675 ins_pipe( pipe_slow ); 4676 %} 4677 4678 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4679 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4680 match(Set dst (OrV src1 src2)); 4681 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 4682 ins_encode %{ 4683 bool vector256 = false; 4684 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 4690 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4691 match(Set dst (OrV src (LoadVector mem))); 4692 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 4693 ins_encode %{ 4694 bool vector256 = false; 4695 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4696 %} 4697 ins_pipe( pipe_slow ); 4698 %} 4699 4700 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4701 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4702 match(Set dst (OrV src1 src2)); 4703 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 4704 ins_encode %{ 4705 bool vector256 = true; 4706 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 4712 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4713 match(Set dst (OrV src (LoadVector mem))); 4714 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 4715 ins_encode %{ 4716 bool vector256 = true; 4717 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 // --------------------------------- XOR -------------------------------------- 4723 4724 instruct vxor4B(vecS dst, vecS src) %{ 4725 predicate(n->as_Vector()->length_in_bytes() == 4); 4726 match(Set dst (XorV dst src)); 4727 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 4728 ins_encode %{ 4729 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4730 %} 4731 ins_pipe( pipe_slow ); 4732 %} 4733 4734 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4735 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4736 match(Set dst (XorV src1 src2)); 4737 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 4738 ins_encode %{ 4739 bool vector256 = false; 4740 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4741 %} 4742 ins_pipe( pipe_slow ); 4743 %} 4744 4745 instruct vxor8B(vecD dst, vecD src) %{ 4746 predicate(n->as_Vector()->length_in_bytes() == 8); 4747 match(Set dst (XorV dst src)); 4748 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 4749 ins_encode %{ 4750 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4751 %} 4752 ins_pipe( pipe_slow ); 4753 %} 4754 4755 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4756 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4757 match(Set dst (XorV src1 src2)); 4758 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 4759 ins_encode %{ 4760 bool vector256 = false; 4761 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4762 %} 4763 ins_pipe( pipe_slow ); 4764 %} 4765 4766 instruct vxor16B(vecX dst, vecX src) %{ 4767 predicate(n->as_Vector()->length_in_bytes() == 16); 4768 match(Set dst (XorV dst src)); 4769 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 4770 ins_encode %{ 4771 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4772 %} 4773 ins_pipe( pipe_slow ); 4774 %} 4775 4776 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4777 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4778 match(Set dst (XorV src1 src2)); 4779 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 4780 ins_encode %{ 4781 bool vector256 = false; 4782 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4783 %} 4784 ins_pipe( pipe_slow ); 4785 %} 4786 4787 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 4788 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4789 match(Set dst (XorV src (LoadVector mem))); 4790 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 4791 ins_encode %{ 4792 bool vector256 = false; 4793 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4794 %} 4795 ins_pipe( pipe_slow ); 4796 %} 4797 4798 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4799 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4800 match(Set dst (XorV src1 src2)); 4801 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 4802 ins_encode %{ 4803 bool vector256 = true; 4804 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4805 %} 4806 ins_pipe( pipe_slow ); 4807 %} 4808 4809 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 4810 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4811 match(Set dst (XorV src (LoadVector mem))); 4812 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 4813 ins_encode %{ 4814 bool vector256 = true; 4815 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4816 %} 4817 ins_pipe( pipe_slow ); 4818 %} 4819 |