1699 case Op_SqrtVD: 1700 if (UseAVX < 1) // enabled for AVX only 1701 ret_value = false; 1702 break; 1703 case Op_CompareAndSwapL: 1704 #ifdef _LP64 1705 case Op_CompareAndSwapP: 1706 #endif 1707 if (!VM_Version::supports_cx8()) 1708 ret_value = false; 1709 break; 1710 case Op_CMoveVD: 1711 if (UseAVX < 1 || UseAVX > 2) 1712 ret_value = false; 1713 break; 1714 } 1715 1716 return ret_value; // Per default match rules are supported. 1717 } 1718 1719 const int Matcher::float_pressure(int default_pressure_threshold) { 1720 int float_pressure_threshold = default_pressure_threshold; 1721 #ifdef _LP64 1722 if (UseAVX > 2) { 1723 // Increase pressure threshold on machines with AVX3 which have 1724 // 2x more XMM registers. 1725 float_pressure_threshold = default_pressure_threshold * 2; 1726 } 1727 #endif 1728 return float_pressure_threshold; 1729 } 1730 1731 // Max vector size in bytes. 0 if not supported. 1732 const int Matcher::vector_width_in_bytes(BasicType bt) { 1733 assert(is_java_primitive(bt), "only primitive type vectors"); 1734 if (UseSSE < 2) return 0; 1735 // SSE2 supports 128bit vectors for all types. 1736 // AVX2 supports 256bit vectors for all types. 1737 // AVX2/EVEX supports 512bit vectors for all types. 1738 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1742 // Use flag to limit vector size. 1743 size = MIN2(size,(int)MaxVectorSize); 1744 // Minimum 2 values in vector (or 4 for bytes). 1745 switch (bt) { 1746 case T_DOUBLE: 1747 case T_LONG: 1748 if (size < 16) return 0; 1749 break; 1750 case T_FLOAT: 1751 case T_INT: 1752 if (size < 8) return 0; 1753 break; 1754 case T_BOOLEAN: 1755 if (size < 4) return 0; 1756 break; 1757 case T_CHAR: 1758 if (size < 4) return 0; 1759 break; 1760 case T_BYTE: 1761 if (size < 4) return 0; 1762 if ((size > 32) && !VM_Version::supports_avx512bw()) return 0; 1763 break; 1764 case T_SHORT: 1765 if (size < 4) return 0; 1766 if ((size > 16) && !VM_Version::supports_avx512bw()) return 0; 1767 break; 1768 default: 1769 ShouldNotReachHere(); 1770 } 1771 return size; 1772 } 1773 1774 // Limits on vector size (number of elements) loaded into vector. 1775 const int Matcher::max_vector_size(const BasicType bt) { 1776 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1777 } 1778 const int Matcher::min_vector_size(const BasicType bt) { 1779 int max_size = max_vector_size(bt); 1780 // Min size which can be loaded into vector is 4 bytes. 1781 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1782 return MIN2(size,max_size); 1783 } 1784 1785 // Vector ideal reg corresponding to specidied size in bytes 1786 const int Matcher::vector_ideal_reg(int size) { 1950 break; 1951 case Op_VecD: 1952 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1953 break; 1954 case Op_VecX: 1955 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1956 break; 1957 case Op_VecY: 1958 case Op_VecZ: 1959 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1960 break; 1961 default: 1962 ShouldNotReachHere(); 1963 } 1964 } 1965 #endif 1966 } 1967 bool is_single_byte = false; 1968 int vec_len = 0; 1969 if ((UseAVX > 2) && (stack_offset != 0)) { 1970 switch (ireg) { 1971 case Op_VecS: 1972 case Op_VecD: 1973 case Op_VecX: 1974 break; 1975 case Op_VecY: 1976 vec_len = 1; 1977 break; 1978 case Op_VecZ: 1979 vec_len = 2; 1980 break; 1981 } 1982 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0); 1983 } 1984 int offset_size = 0; 1985 int size = 5; 1986 if (UseAVX > 2 ) { 1987 if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { 1988 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1989 size += 2; // Need an additional two bytes for EVEX encoding 1990 } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { 1991 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1992 } else { 1993 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1994 size += 2; // Need an additional two bytes for EVEX encodding 1995 } 1996 } else { 1997 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1998 } 1999 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2000 return size+offset_size; 2001 } 2002 2003 static inline jfloat replicate4_imm(int con, int width) { 2004 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2005 assert(width == 1 || width == 2, "only byte or short types here"); 2006 int bit_width = width * 8; 2007 jint val = con; 2008 val &= (1 << bit_width) - 1; // mask off sign bits 2009 while(bit_width < 32) { 2010 val |= (val << bit_width); 2694 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2695 ins_cost(150); 2696 ins_encode %{ 2697 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2698 %} 2699 ins_pipe(pipe_slow); 2700 %} 2701 2702 instruct absF_reg(regF dst) %{ 2703 predicate((UseSSE>=1) && (UseAVX == 0)); 2704 match(Set dst (AbsF dst)); 2705 ins_cost(150); 2706 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2707 ins_encode %{ 2708 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2709 %} 2710 ins_pipe(pipe_slow); 2711 %} 2712 2713 instruct absF_reg_reg(regF dst, regF src) %{ 2714 predicate(UseAVX > 0); 2715 match(Set dst (AbsF src)); 2716 ins_cost(150); 2717 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2718 ins_encode %{ 2719 int vector_len = 0; 2720 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2721 ExternalAddress(float_signmask()), vector_len); 2722 %} 2723 ins_pipe(pipe_slow); 2724 %} 2725 2726 instruct absD_reg(regD dst) %{ 2727 predicate((UseSSE>=2) && (UseAVX == 0)); 2728 match(Set dst (AbsD dst)); 2729 ins_cost(150); 2730 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2731 "# abs double by sign masking" %} 2732 ins_encode %{ 2733 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2734 %} 2735 ins_pipe(pipe_slow); 2736 %} 2737 2738 instruct absD_reg_reg(regD dst, regD src) %{ 2739 predicate(UseAVX > 0); 2740 match(Set dst (AbsD src)); 2741 ins_cost(150); 2742 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2743 "# abs double by sign masking" %} 2744 ins_encode %{ 2745 int vector_len = 0; 2746 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2747 ExternalAddress(double_signmask()), vector_len); 2748 %} 2749 ins_pipe(pipe_slow); 2750 %} 2751 2752 instruct negF_reg(regF dst) %{ 2753 predicate((UseSSE>=1) && (UseAVX == 0)); 2754 match(Set dst (NegF dst)); 2755 ins_cost(150); 2756 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2757 ins_encode %{ 2758 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2759 %} 2760 ins_pipe(pipe_slow); 2761 %} 2762 2763 instruct negF_reg_reg(regF dst, regF src) %{ 2764 predicate(UseAVX > 0); 2765 match(Set dst (NegF src)); 2766 ins_cost(150); 2767 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2768 ins_encode %{ 2769 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2770 ExternalAddress(float_signflip())); 4537 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4538 predicate(UseSSE > 2 && UseAVX == 0); 4539 match(Set dst (AddReductionVI src1 src2)); 4540 effect(TEMP tmp2, TEMP tmp); 4541 format %{ "movdqu $tmp2,$src2\n\t" 4542 "phaddd $tmp2,$tmp2\n\t" 4543 "movd $tmp,$src1\n\t" 4544 "paddd $tmp,$tmp2\n\t" 4545 "movd $dst,$tmp\t! add reduction2I" %} 4546 ins_encode %{ 4547 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4548 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4549 __ movdl($tmp$$XMMRegister, $src1$$Register); 4550 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4551 __ movdl($dst$$Register, $tmp$$XMMRegister); 4552 %} 4553 ins_pipe( pipe_slow ); 4554 %} 4555 4556 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4557 predicate(UseAVX > 0 && UseAVX < 3); 4558 match(Set dst (AddReductionVI src1 src2)); 4559 effect(TEMP tmp, TEMP tmp2); 4560 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4561 "movd $tmp2,$src1\n\t" 4562 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4563 "movd $dst,$tmp2\t! add reduction2I" %} 4564 ins_encode %{ 4565 int vector_len = 0; 4566 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4567 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4568 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4569 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4570 %} 4571 ins_pipe( pipe_slow ); 4572 %} 4573 4574 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4575 predicate(UseAVX > 2); 4576 match(Set dst (AddReductionVI src1 src2)); 4577 effect(TEMP tmp, TEMP tmp2); 4578 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4579 "vpaddd $tmp,$src2,$tmp2\n\t" 4580 "movd $tmp2,$src1\n\t" 4581 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4582 "movd $dst,$tmp2\t! add reduction2I" %} 4583 ins_encode %{ 4584 int vector_len = 0; 4585 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4586 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4587 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4588 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4589 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4590 %} 4591 ins_pipe( pipe_slow ); 4592 %} 4593 4594 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4595 predicate(UseSSE > 2 && UseAVX == 0); 4596 match(Set dst (AddReductionVI src1 src2)); 4597 effect(TEMP tmp2, TEMP tmp); 4598 format %{ "movdqu $tmp2,$src2\n\t" 4599 "phaddd $tmp2,$tmp2\n\t" 4600 "phaddd $tmp2,$tmp2\n\t" 4601 "movd $tmp,$src1\n\t" 4602 "paddd $tmp,$tmp2\n\t" 4603 "movd $dst,$tmp\t! add reduction4I" %} 4604 ins_encode %{ 4605 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4606 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4607 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4608 __ movdl($tmp$$XMMRegister, $src1$$Register); 4609 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4610 __ movdl($dst$$Register, $tmp$$XMMRegister); 4611 %} 4612 ins_pipe( pipe_slow ); 4613 %} 4614 4615 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4616 predicate(UseAVX > 0 && UseAVX < 3); 4617 match(Set dst (AddReductionVI src1 src2)); 4618 effect(TEMP tmp, TEMP tmp2); 4619 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4620 "vphaddd $tmp,$tmp,$tmp2\n\t" 4621 "movd $tmp2,$src1\n\t" 4622 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4623 "movd $dst,$tmp2\t! add reduction4I" %} 4624 ins_encode %{ 4625 int vector_len = 0; 4626 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4627 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4628 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4629 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4630 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4631 %} 4632 ins_pipe( pipe_slow ); 4633 %} 4634 4635 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4636 predicate(UseAVX > 2); 4637 match(Set dst (AddReductionVI src1 src2)); 4638 effect(TEMP tmp, TEMP tmp2); 4639 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4640 "vpaddd $tmp,$src2,$tmp2\n\t" 4641 "pshufd $tmp2,$tmp,0x1\n\t" 4642 "vpaddd $tmp,$tmp,$tmp2\n\t" 4643 "movd $tmp2,$src1\n\t" 4644 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4645 "movd $dst,$tmp2\t! add reduction4I" %} 4646 ins_encode %{ 4647 int vector_len = 0; 4648 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4649 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4650 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4651 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4652 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4653 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4654 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4655 %} 4656 ins_pipe( pipe_slow ); 4657 %} 4658 4659 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4660 predicate(UseAVX > 0 && UseAVX < 3); 4661 match(Set dst (AddReductionVI src1 src2)); 4662 effect(TEMP tmp, TEMP tmp2); 4663 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4664 "vphaddd $tmp,$tmp,$tmp2\n\t" 4665 "vextracti128 $tmp2,$tmp\n\t" 4666 "vpaddd $tmp,$tmp,$tmp2\n\t" 4667 "movd $tmp2,$src1\n\t" 4668 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4669 "movd $dst,$tmp2\t! add reduction8I" %} 4670 ins_encode %{ 4671 int vector_len = 1; 4672 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4673 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4674 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4675 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4676 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4677 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4678 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4679 %} 4680 ins_pipe( pipe_slow ); 4695 "movd $dst,$tmp2\t! add reduction8I" %} 4696 ins_encode %{ 4697 int vector_len = 0; 4698 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4699 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4700 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4701 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4702 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4703 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4704 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4705 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4706 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4712 predicate(UseAVX > 2); 4713 match(Set dst (AddReductionVI src1 src2)); 4714 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4715 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4716 "vpaddd $tmp3,$tmp3,$src2\n\t" 4717 "vextracti128 $tmp,$tmp3\n\t" 4718 "vpaddd $tmp,$tmp,$tmp3\n\t" 4719 "pshufd $tmp2,$tmp,0xE\n\t" 4720 "vpaddd $tmp,$tmp,$tmp2\n\t" 4721 "pshufd $tmp2,$tmp,0x1\n\t" 4722 "vpaddd $tmp,$tmp,$tmp2\n\t" 4723 "movd $tmp2,$src1\n\t" 4724 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4725 "movd $dst,$tmp2\t! mul reduction16I" %} 4726 ins_encode %{ 4727 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4728 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4729 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4730 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4731 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4732 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4733 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4734 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4735 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4736 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4737 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4738 %} 4739 ins_pipe( pipe_slow ); 4740 %} 4741 4742 #ifdef _LP64 4743 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4744 predicate(UseAVX > 2); 4745 match(Set dst (AddReductionVL src1 src2)); 4746 effect(TEMP tmp, TEMP tmp2); 4747 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4748 "vpaddq $tmp,$src2,$tmp2\n\t" 4749 "movdq $tmp2,$src1\n\t" 4750 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4751 "movdq $dst,$tmp2\t! add reduction2L" %} 4752 ins_encode %{ 4753 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4754 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4755 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4756 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4757 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4758 %} 4759 ins_pipe( pipe_slow ); 4760 %} 4761 4762 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4763 predicate(UseAVX > 2); 4764 match(Set dst (AddReductionVL src1 src2)); 4765 effect(TEMP tmp, TEMP tmp2); 4766 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4767 "vpaddq $tmp2,$tmp,$src2\n\t" 4768 "pshufd $tmp,$tmp2,0xE\n\t" 4769 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4770 "movdq $tmp,$src1\n\t" 4771 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4772 "movdq $dst,$tmp2\t! add reduction4L" %} 4773 ins_encode %{ 4774 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4775 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4776 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4777 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4778 __ movdq($tmp$$XMMRegister, $src1$$Register); 4779 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4780 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4781 %} 4782 ins_pipe( pipe_slow ); 4783 %} 4784 4785 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4786 predicate(UseAVX > 2); 4787 match(Set dst (AddReductionVL src1 src2)); 4788 effect(TEMP tmp, TEMP tmp2); 4789 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4790 "vpaddq $tmp2,$tmp2,$src2\n\t" 4791 "vextracti128 $tmp,$tmp2\n\t" 4792 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4793 "pshufd $tmp,$tmp2,0xE\n\t" 4794 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4795 "movdq $tmp,$src1\n\t" 4796 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4797 "movdq $dst,$tmp2\t! add reduction8L" %} 4798 ins_encode %{ 4799 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4800 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4801 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4802 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4803 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4804 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4805 __ movdq($tmp$$XMMRegister, $src1$$Register); 4806 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4807 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4808 %} 4809 ins_pipe( pipe_slow ); 4810 %} 4811 #endif 4812 4813 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4814 predicate(UseSSE >= 1 && UseAVX == 0); 4815 match(Set dst (AddReductionVF src1 src2)); 4816 effect(TEMP tmp, TEMP tmp2); 4817 format %{ "movdqu $tmp,$src1\n\t" 4818 "addss $tmp,$src2\n\t" 4819 "pshufd $tmp2,$src2,0x01\n\t" 4820 "addss $tmp,$tmp2\n\t" 4821 "movdqu $dst,$tmp\t! add reduction2F" %} 4822 ins_encode %{ 4823 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4824 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4825 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4826 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4827 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4833 predicate(UseAVX > 0); 4834 match(Set dst (AddReductionVF src1 src2)); 4835 effect(TEMP tmp2, TEMP tmp); 4836 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4837 "pshufd $tmp,$src2,0x01\n\t" 4838 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 4839 ins_encode %{ 4840 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4841 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4842 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4843 %} 4844 ins_pipe( pipe_slow ); 4845 %} 4846 4847 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4848 predicate(UseSSE >= 1 && UseAVX == 0); 4849 match(Set dst (AddReductionVF src1 src2)); 4850 effect(TEMP tmp, TEMP tmp2); 4851 format %{ "movdqu $tmp,$src1\n\t" 4852 "addss $tmp,$src2\n\t" 4853 "pshufd $tmp2,$src2,0x01\n\t" 4854 "addss $tmp,$tmp2\n\t" 4855 "pshufd $tmp2,$src2,0x02\n\t" 4856 "addss $tmp,$tmp2\n\t" 4857 "pshufd $tmp2,$src2,0x03\n\t" 4858 "addss $tmp,$tmp2\n\t" 4859 "movdqu $dst,$tmp\t! add reduction4F" %} 4860 ins_encode %{ 4861 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4862 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4863 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4864 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4865 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4866 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4867 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4868 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4869 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4870 %} 4871 ins_pipe( pipe_slow ); 4872 %} 4873 4874 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4875 predicate(UseAVX > 0); 4876 match(Set dst (AddReductionVF src1 src2)); 4877 effect(TEMP tmp, TEMP tmp2); 4878 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4879 "pshufd $tmp,$src2,0x01\n\t" 4880 "vaddss $tmp2,$tmp2,$tmp\n\t" 4881 "pshufd $tmp,$src2,0x02\n\t" 4882 "vaddss $tmp2,$tmp2,$tmp\n\t" 4883 "pshufd $tmp,$src2,0x03\n\t" 4884 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 4885 ins_encode %{ 4886 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4887 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4888 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4889 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4890 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4891 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4892 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4893 %} 4894 ins_pipe( pipe_slow ); 4895 %} 4896 4897 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4898 predicate(UseAVX > 0); 4899 match(Set dst (AddReductionVF src1 src2)); 4900 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4901 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4902 "pshufd $tmp,$src2,0x01\n\t" 4903 "vaddss $tmp2,$tmp2,$tmp\n\t" 4904 "pshufd $tmp,$src2,0x02\n\t" 4905 "vaddss $tmp2,$tmp2,$tmp\n\t" 4906 "pshufd $tmp,$src2,0x03\n\t" 4907 "vaddss $tmp2,$tmp2,$tmp\n\t" 4908 "vextractf128 $tmp3,$src2\n\t" 4909 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4910 "pshufd $tmp,$tmp3,0x01\n\t" 4911 "vaddss $tmp2,$tmp2,$tmp\n\t" 4912 "pshufd $tmp,$tmp3,0x02\n\t" 4913 "vaddss $tmp2,$tmp2,$tmp\n\t" 4914 "pshufd $tmp,$tmp3,0x03\n\t" 4915 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 4916 ins_encode %{ 4917 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4918 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4919 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4920 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4921 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4922 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4923 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4924 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4925 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4926 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4927 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4928 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4929 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4930 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4931 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4932 %} 4933 ins_pipe( pipe_slow ); 4934 %} 4935 4936 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4937 predicate(UseAVX > 2); 4938 match(Set dst (AddReductionVF src1 src2)); 4939 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4940 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4941 "pshufd $tmp,$src2,0x01\n\t" 4942 "vaddss $tmp2,$tmp2,$tmp\n\t" 4943 "pshufd $tmp,$src2,0x02\n\t" 4944 "vaddss $tmp2,$tmp2,$tmp\n\t" 4945 "pshufd $tmp,$src2,0x03\n\t" 4946 "vaddss $tmp2,$tmp2,$tmp\n\t" 4947 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4948 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4949 "pshufd $tmp,$tmp3,0x01\n\t" 4950 "vaddss $tmp2,$tmp2,$tmp\n\t" 4951 "pshufd $tmp,$tmp3,0x02\n\t" 4952 "vaddss $tmp2,$tmp2,$tmp\n\t" 4953 "pshufd $tmp,$tmp3,0x03\n\t" 4954 "vaddss $tmp2,$tmp2,$tmp\n\t" 4955 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4956 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4957 "pshufd $tmp,$tmp3,0x01\n\t" 4958 "vaddss $tmp2,$tmp2,$tmp\n\t" 4959 "pshufd $tmp,$tmp3,0x02\n\t" 4960 "vaddss $tmp2,$tmp2,$tmp\n\t" 4961 "pshufd $tmp,$tmp3,0x03\n\t" 4962 "vaddss $tmp2,$tmp2,$tmp\n\t" 4963 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4964 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4965 "pshufd $tmp,$tmp3,0x01\n\t" 4966 "vaddss $tmp2,$tmp2,$tmp\n\t" 4967 "pshufd $tmp,$tmp3,0x02\n\t" 4968 "vaddss $tmp2,$tmp2,$tmp\n\t" 4969 "pshufd $tmp,$tmp3,0x03\n\t" 4970 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} 4971 ins_encode %{ 4972 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4973 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4974 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4975 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4976 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4977 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4978 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4979 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4980 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4981 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4982 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4983 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4984 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4985 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4986 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4987 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4988 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4989 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4990 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4991 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4992 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4993 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4994 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4995 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4996 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4997 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4998 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4999 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5000 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5001 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5002 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5003 %} 5004 ins_pipe( pipe_slow ); 5005 %} 5006 5007 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 5008 predicate(UseSSE >= 1 && UseAVX == 0); 5009 match(Set dst (AddReductionVD src1 src2)); 5010 effect(TEMP tmp, TEMP dst); 5011 format %{ "movdqu $tmp,$src1\n\t" 5012 "addsd $tmp,$src2\n\t" 5013 "pshufd $dst,$src2,0xE\n\t" 5014 "addsd $dst,$tmp\t! add reduction2D" %} 5015 ins_encode %{ 5016 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5017 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 5018 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 5019 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5020 %} 5021 ins_pipe( pipe_slow ); 5022 %} 5023 5024 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 5025 predicate(UseAVX > 0); 5026 match(Set dst (AddReductionVD src1 src2)); 5027 effect(TEMP tmp, TEMP tmp2); 5028 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 5029 "pshufd $tmp,$src2,0xE\n\t" 5030 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 5031 ins_encode %{ 5032 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5034 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5035 %} 5036 ins_pipe( pipe_slow ); 5037 %} 5038 5039 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 5040 predicate(UseAVX > 0); 5041 match(Set dst (AddReductionVD src1 src2)); 5042 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5043 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 5044 "pshufd $tmp,$src2,0xE\n\t" 5045 "vaddsd $tmp2,$tmp2,$tmp\n\t" 5046 "vextractf128 $tmp3,$src2\n\t" 5047 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 5048 "pshufd $tmp,$tmp3,0xE\n\t" 5049 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 5050 ins_encode %{ 5051 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5052 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5053 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5054 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5055 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5056 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5057 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5058 %} 5059 ins_pipe( pipe_slow ); 5060 %} 5061 5062 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 5063 predicate(UseAVX > 2); 5064 match(Set dst (AddReductionVD src1 src2)); 5065 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5066 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 5067 "pshufd $tmp,$src2,0xE\n\t" 5068 "vaddsd $tmp2,$tmp2,$tmp\n\t" 5069 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 5070 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 5071 "pshufd $tmp,$tmp3,0xE\n\t" 5072 "vaddsd $tmp2,$tmp2,$tmp\n\t" 5073 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 5074 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 5075 "pshufd $tmp,$tmp3,0xE\n\t" 5076 "vaddsd $tmp2,$tmp2,$tmp\n\t" 5077 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 5078 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 5079 "pshufd $tmp,$tmp3,0xE\n\t" 5080 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} 5081 ins_encode %{ 5082 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5083 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5084 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5085 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5086 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5087 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5088 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5089 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5090 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5091 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5092 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5093 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5094 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5095 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5096 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5097 %} 5098 ins_pipe( pipe_slow ); 5099 %} 5100 5101 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5102 predicate(UseSSE > 3 && UseAVX == 0); 5103 match(Set dst (MulReductionVI src1 src2)); 5104 effect(TEMP tmp, TEMP tmp2); 5105 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5106 "pmulld $tmp2,$src2\n\t" 5107 "movd $tmp,$src1\n\t" 5108 "pmulld $tmp2,$tmp\n\t" 5109 "movd $dst,$tmp2\t! mul reduction2I" %} 5110 ins_encode %{ 5111 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5112 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5113 __ movdl($tmp$$XMMRegister, $src1$$Register); 5114 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5115 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5116 %} 5199 "movd $dst,$tmp2\t! mul reduction8I" %} 5200 ins_encode %{ 5201 int vector_len = 0; 5202 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5203 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5204 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5205 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5206 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5207 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5208 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5209 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5210 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5211 %} 5212 ins_pipe( pipe_slow ); 5213 %} 5214 5215 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5216 predicate(UseAVX > 2); 5217 match(Set dst (MulReductionVI src1 src2)); 5218 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5219 format %{ "vextracti64x4 $tmp3,$src2\n\t" 5220 "vpmulld $tmp3,$tmp3,$src2\n\t" 5221 "vextracti128 $tmp,$tmp3\n\t" 5222 "vpmulld $tmp,$tmp,$src2\n\t" 5223 "pshufd $tmp2,$tmp,0xE\n\t" 5224 "vpmulld $tmp,$tmp,$tmp2\n\t" 5225 "pshufd $tmp2,$tmp,0x1\n\t" 5226 "vpmulld $tmp,$tmp,$tmp2\n\t" 5227 "movd $tmp2,$src1\n\t" 5228 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5229 "movd $dst,$tmp2\t! mul reduction16I" %} 5230 ins_encode %{ 5231 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 5232 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5233 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5234 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5235 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5236 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5237 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5238 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5239 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5240 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5241 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5242 %} 5243 ins_pipe( pipe_slow ); 5244 %} 5245 5246 #ifdef _LP64 5247 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5248 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5249 match(Set dst (MulReductionVL src1 src2)); 5250 effect(TEMP tmp, TEMP tmp2); 5251 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5252 "vpmullq $tmp,$src2,$tmp2\n\t" 5253 "movdq $tmp2,$src1\n\t" 5254 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5255 "movdq $dst,$tmp2\t! mul reduction2L" %} 5256 ins_encode %{ 5257 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5258 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5259 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5260 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5261 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5262 %} 5263 ins_pipe( pipe_slow ); 5264 %} 5265 5266 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5267 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5268 match(Set dst (MulReductionVL src1 src2)); 5269 effect(TEMP tmp, TEMP tmp2); 5270 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 5271 "vpmullq $tmp2,$tmp,$src2\n\t" 5272 "pshufd $tmp,$tmp2,0xE\n\t" 5273 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5274 "movdq $tmp,$src1\n\t" 5275 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5276 "movdq $dst,$tmp2\t! mul reduction4L" %} 5277 ins_encode %{ 5278 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 5279 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5280 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5281 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5282 __ movdq($tmp$$XMMRegister, $src1$$Register); 5283 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5284 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5285 %} 5286 ins_pipe( pipe_slow ); 5287 %} 5288 5289 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5290 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5291 match(Set dst (MulReductionVL src1 src2)); 5292 effect(TEMP tmp, TEMP tmp2); 5293 format %{ "vextracti64x4 $tmp2,$src2\n\t" 5294 "vpmullq $tmp2,$tmp2,$src2\n\t" 5295 "vextracti128 $tmp,$tmp2\n\t" 5296 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5297 "pshufd $tmp,$tmp2,0xE\n\t" 5298 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5299 "movdq $tmp,$src1\n\t" 5300 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5301 "movdq $dst,$tmp2\t! mul reduction8L" %} 5302 ins_encode %{ 5303 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 5304 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5305 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5306 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5307 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5308 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5309 __ movdq($tmp$$XMMRegister, $src1$$Register); 5310 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5311 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5312 %} 5313 ins_pipe( pipe_slow ); 5314 %} 5315 #endif 5316 5317 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5318 predicate(UseSSE >= 1 && UseAVX == 0); 5319 match(Set dst (MulReductionVF src1 src2)); 5320 effect(TEMP tmp, TEMP tmp2); 5321 format %{ "movdqu $tmp,$src1\n\t" 5322 "mulss $tmp,$src2\n\t" 5323 "pshufd $tmp2,$src2,0x01\n\t" 5324 "mulss $tmp,$tmp2\n\t" 5325 "movdqu $dst,$tmp\t! mul reduction2F" %} 5326 ins_encode %{ 5327 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5328 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5329 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5330 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5331 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5332 %} 5333 ins_pipe( pipe_slow ); 5334 %} 5335 5336 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5337 predicate(UseAVX > 0); 5338 match(Set dst (MulReductionVF src1 src2)); 5339 effect(TEMP tmp, TEMP tmp2); 5340 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5341 "pshufd $tmp,$src2,0x01\n\t" 5342 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} 5343 ins_encode %{ 5344 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5345 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5346 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5347 %} 5348 ins_pipe( pipe_slow ); 5349 %} 5350 5351 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5352 predicate(UseSSE >= 1 && UseAVX == 0); 5353 match(Set dst (MulReductionVF src1 src2)); 5354 effect(TEMP tmp, TEMP tmp2); 5355 format %{ "movdqu $tmp,$src1\n\t" 5356 "mulss $tmp,$src2\n\t" 5357 "pshufd $tmp2,$src2,0x01\n\t" 5358 "mulss $tmp,$tmp2\n\t" 5359 "pshufd $tmp2,$src2,0x02\n\t" 5360 "mulss $tmp,$tmp2\n\t" 5361 "pshufd $tmp2,$src2,0x03\n\t" 5362 "mulss $tmp,$tmp2\n\t" 5363 "movdqu $dst,$tmp\t! mul reduction4F" %} 5364 ins_encode %{ 5365 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5366 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5367 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5368 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5369 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 5370 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5371 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 5372 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5373 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5374 %} 5375 ins_pipe( pipe_slow ); 5376 %} 5377 5378 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5379 predicate(UseAVX > 0); 5380 match(Set dst (MulReductionVF src1 src2)); 5381 effect(TEMP tmp, TEMP tmp2); 5382 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5383 "pshufd $tmp,$src2,0x01\n\t" 5384 "vmulss $tmp2,$tmp2,$tmp\n\t" 5385 "pshufd $tmp,$src2,0x02\n\t" 5386 "vmulss $tmp2,$tmp2,$tmp\n\t" 5387 "pshufd $tmp,$src2,0x03\n\t" 5388 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} 5389 ins_encode %{ 5390 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5391 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5392 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5393 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5394 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5395 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5396 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5397 %} 5398 ins_pipe( pipe_slow ); 5399 %} 5400 5401 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 5402 predicate(UseAVX > 0); 5403 match(Set dst (MulReductionVF src1 src2)); 5404 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5405 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5406 "pshufd $tmp,$src2,0x01\n\t" 5407 "vmulss $tmp2,$tmp2,$tmp\n\t" 5408 "pshufd $tmp,$src2,0x02\n\t" 5409 "vmulss $tmp2,$tmp2,$tmp\n\t" 5410 "pshufd $tmp,$src2,0x03\n\t" 5411 "vmulss $tmp2,$tmp2,$tmp\n\t" 5412 "vextractf128 $tmp3,$src2\n\t" 5413 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5414 "pshufd $tmp,$tmp3,0x01\n\t" 5415 "vmulss $tmp2,$tmp2,$tmp\n\t" 5416 "pshufd $tmp,$tmp3,0x02\n\t" 5417 "vmulss $tmp2,$tmp2,$tmp\n\t" 5418 "pshufd $tmp,$tmp3,0x03\n\t" 5419 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 5420 ins_encode %{ 5421 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5422 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5423 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5424 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5425 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5426 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5427 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5428 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5429 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5430 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5431 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5432 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5433 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5434 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5435 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5436 %} 5437 ins_pipe( pipe_slow ); 5438 %} 5439 5440 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5441 predicate(UseAVX > 2); 5442 match(Set dst (MulReductionVF src1 src2)); 5443 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5444 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5445 "pshufd $tmp,$src2,0x01\n\t" 5446 "vmulss $tmp2,$tmp2,$tmp\n\t" 5447 "pshufd $tmp,$src2,0x02\n\t" 5448 "vmulss $tmp2,$tmp2,$tmp\n\t" 5449 "pshufd $tmp,$src2,0x03\n\t" 5450 "vmulss $tmp2,$tmp2,$tmp\n\t" 5451 "vextractf32x4 $tmp3,$src2, 0x1\n\t" 5452 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5453 "pshufd $tmp,$tmp3,0x01\n\t" 5454 "vmulss $tmp2,$tmp2,$tmp\n\t" 5455 "pshufd $tmp,$tmp3,0x02\n\t" 5456 "vmulss $tmp2,$tmp2,$tmp\n\t" 5457 "pshufd $tmp,$tmp3,0x03\n\t" 5458 "vmulss $tmp2,$tmp2,$tmp\n\t" 5459 "vextractf32x4 $tmp3,$src2, 0x2\n\t" 5460 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5461 "pshufd $tmp,$tmp3,0x01\n\t" 5462 "vmulss $tmp2,$tmp2,$tmp\n\t" 5463 "pshufd $tmp,$tmp3,0x02\n\t" 5464 "vmulss $tmp2,$tmp2,$tmp\n\t" 5465 "pshufd $tmp,$tmp3,0x03\n\t" 5466 "vmulss $tmp2,$tmp2,$tmp\n\t" 5467 "vextractf32x4 $tmp3,$src2, 0x3\n\t" 5468 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5469 "pshufd $tmp,$tmp3,0x01\n\t" 5470 "vmulss $tmp2,$tmp2,$tmp\n\t" 5471 "pshufd $tmp,$tmp3,0x02\n\t" 5472 "vmulss $tmp2,$tmp2,$tmp\n\t" 5473 "pshufd $tmp,$tmp3,0x03\n\t" 5474 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} 5475 ins_encode %{ 5476 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5477 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5478 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5479 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5480 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5481 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5482 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5483 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5484 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5485 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5486 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5487 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5488 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5489 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5490 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5491 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5492 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5493 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5494 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5495 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5496 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5497 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5498 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5499 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5500 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5501 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5502 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5503 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5504 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5505 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5506 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5507 %} 5508 ins_pipe( pipe_slow ); 5509 %} 5510 5511 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 5512 predicate(UseSSE >= 1 && UseAVX == 0); 5513 match(Set dst (MulReductionVD src1 src2)); 5514 effect(TEMP tmp, TEMP dst); 5515 format %{ "movdqu $tmp,$src1\n\t" 5516 "mulsd $tmp,$src2\n\t" 5517 "pshufd $dst,$src2,0xE\n\t" 5518 "mulsd $dst,$tmp\t! mul reduction2D" %} 5519 ins_encode %{ 5520 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5521 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 5522 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 5523 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5524 %} 5525 ins_pipe( pipe_slow ); 5526 %} 5527 5528 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 5529 predicate(UseAVX > 0); 5530 match(Set dst (MulReductionVD src1 src2)); 5531 effect(TEMP tmp, TEMP tmp2); 5532 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5533 "pshufd $tmp,$src2,0xE\n\t" 5534 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 5535 ins_encode %{ 5536 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5537 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5538 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5539 %} 5540 ins_pipe( pipe_slow ); 5541 %} 5542 5543 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 5544 predicate(UseAVX > 0); 5545 match(Set dst (MulReductionVD src1 src2)); 5546 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5547 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5548 "pshufd $tmp,$src2,0xE\n\t" 5549 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5550 "vextractf128 $tmp3,$src2\n\t" 5551 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5552 "pshufd $tmp,$tmp3,0xE\n\t" 5553 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 5554 ins_encode %{ 5555 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5556 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5557 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5558 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5559 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5560 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5561 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5562 %} 5563 ins_pipe( pipe_slow ); 5564 %} 5565 5566 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 5567 predicate(UseAVX > 2); 5568 match(Set dst (MulReductionVD src1 src2)); 5569 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5570 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5571 "pshufd $tmp,$src2,0xE\n\t" 5572 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5573 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 5574 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5575 "pshufd $tmp,$src2,0xE\n\t" 5576 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5577 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 5578 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5579 "pshufd $tmp,$tmp3,0xE\n\t" 5580 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5581 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 5582 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5583 "pshufd $tmp,$tmp3,0xE\n\t" 5584 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} 5585 ins_encode %{ 5586 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5587 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5588 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5589 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5590 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5591 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5592 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5593 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5594 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5595 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5596 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5597 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5598 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5599 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5600 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5601 %} 5602 ins_pipe( pipe_slow ); 5603 %} 5604 5605 // ====================VECTOR ARITHMETIC======================================= 5606 5607 // --------------------------------- ADD -------------------------------------- 5608 5609 // Bytes vector add 5610 instruct vadd4B(vecS dst, vecS src) %{ 5611 predicate(n->as_Vector()->length() == 4); 5612 match(Set dst (AddVB dst src)); 5613 format %{ "paddb $dst,$src\t! add packed4B" %} 5614 ins_encode %{ 5615 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5616 %} 5617 ins_pipe( pipe_slow ); 5618 %} 5619 5620 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5621 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5622 match(Set dst (AddVB src1 src2)); 5623 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5624 ins_encode %{ 5625 int vector_len = 0; 5626 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5627 %} 5628 ins_pipe( pipe_slow ); 5629 %} 5630 5631 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5632 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5633 match(Set dst (AddVB src (LoadVector mem))); 5634 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5635 ins_encode %{ 5636 int vector_len = 0; 5637 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5638 %} 5639 ins_pipe( pipe_slow ); 5640 %} 5641 5642 instruct vadd8B(vecD dst, vecD src) %{ 5643 predicate(n->as_Vector()->length() == 8); 5644 match(Set dst (AddVB dst src)); 5645 format %{ "paddb $dst,$src\t! add packed8B" %} 5646 ins_encode %{ 5647 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5648 %} 5649 ins_pipe( pipe_slow ); 5650 %} 5651 5652 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5653 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5654 match(Set dst (AddVB src1 src2)); 5655 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5656 ins_encode %{ 5657 int vector_len = 0; 5658 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5659 %} 5660 ins_pipe( pipe_slow ); 5661 %} 5662 5663 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 5664 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5665 match(Set dst (AddVB src (LoadVector mem))); 5666 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5667 ins_encode %{ 5668 int vector_len = 0; 5669 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5670 %} 5671 ins_pipe( pipe_slow ); 5672 %} 5673 5674 instruct vadd16B(vecX dst, vecX src) %{ 5675 predicate(n->as_Vector()->length() == 16); 5676 match(Set dst (AddVB dst src)); 5677 format %{ "paddb $dst,$src\t! add packed16B" %} 5678 ins_encode %{ 5679 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5680 %} 5681 ins_pipe( pipe_slow ); 5682 %} 5683 5684 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5685 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5686 match(Set dst (AddVB src1 src2)); 5687 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5688 ins_encode %{ 5689 int vector_len = 0; 5690 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5691 %} 5692 ins_pipe( pipe_slow ); 5693 %} 5694 5695 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 5696 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5697 match(Set dst (AddVB src (LoadVector mem))); 5698 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5699 ins_encode %{ 5700 int vector_len = 0; 5701 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 5707 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5708 match(Set dst (AddVB src1 src2)); 5709 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5710 ins_encode %{ 5711 int vector_len = 1; 5712 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5713 %} 5714 ins_pipe( pipe_slow ); 5715 %} 5716 5717 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 5718 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5719 match(Set dst (AddVB src (LoadVector mem))); 5720 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5721 ins_encode %{ 5722 int vector_len = 1; 5723 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5724 %} 5725 ins_pipe( pipe_slow ); 5726 %} 5727 5728 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5729 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5730 match(Set dst (AddVB src1 src2)); 5731 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5732 ins_encode %{ 5733 int vector_len = 2; 5734 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5735 %} 5736 ins_pipe( pipe_slow ); 5737 %} 5738 5739 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5740 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5741 match(Set dst (AddVB src (LoadVector mem))); 5742 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5743 ins_encode %{ 5744 int vector_len = 2; 5745 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 // Shorts/Chars vector add 5751 instruct vadd2S(vecS dst, vecS src) %{ 5752 predicate(n->as_Vector()->length() == 2); 5753 match(Set dst (AddVS dst src)); 5754 format %{ "paddw $dst,$src\t! add packed2S" %} 5755 ins_encode %{ 5756 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5757 %} 5758 ins_pipe( pipe_slow ); 5759 %} 5760 5761 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5762 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5763 match(Set dst (AddVS src1 src2)); 5764 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5765 ins_encode %{ 5766 int vector_len = 0; 5767 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5768 %} 5769 ins_pipe( pipe_slow ); 5770 %} 5771 5772 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 5773 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5774 match(Set dst (AddVS src (LoadVector mem))); 5775 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5776 ins_encode %{ 5777 int vector_len = 0; 5778 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 instruct vadd4S(vecD dst, vecD src) %{ 5784 predicate(n->as_Vector()->length() == 4); 5785 match(Set dst (AddVS dst src)); 5786 format %{ "paddw $dst,$src\t! add packed4S" %} 5787 ins_encode %{ 5788 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5789 %} 5790 ins_pipe( pipe_slow ); 5791 %} 5792 5793 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5794 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5795 match(Set dst (AddVS src1 src2)); 5796 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5797 ins_encode %{ 5798 int vector_len = 0; 5799 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5800 %} 5801 ins_pipe( pipe_slow ); 5802 %} 5803 5804 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 5805 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5806 match(Set dst (AddVS src (LoadVector mem))); 5807 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5808 ins_encode %{ 5809 int vector_len = 0; 5810 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5811 %} 5812 ins_pipe( pipe_slow ); 5813 %} 5814 5815 instruct vadd8S(vecX dst, vecX src) %{ 5816 predicate(n->as_Vector()->length() == 8); 5817 match(Set dst (AddVS dst src)); 5818 format %{ "paddw $dst,$src\t! add packed8S" %} 5819 ins_encode %{ 5820 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5821 %} 5822 ins_pipe( pipe_slow ); 5823 %} 5824 5825 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5826 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5827 match(Set dst (AddVS src1 src2)); 5828 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5829 ins_encode %{ 5830 int vector_len = 0; 5831 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 5837 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5838 match(Set dst (AddVS src (LoadVector mem))); 5839 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5840 ins_encode %{ 5841 int vector_len = 0; 5842 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5843 %} 5844 ins_pipe( pipe_slow ); 5845 %} 5846 5847 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 5848 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5849 match(Set dst (AddVS src1 src2)); 5850 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5851 ins_encode %{ 5852 int vector_len = 1; 5853 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 5859 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5860 match(Set dst (AddVS src (LoadVector mem))); 5861 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 5862 ins_encode %{ 5863 int vector_len = 1; 5864 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5865 %} 5866 ins_pipe( pipe_slow ); 5867 %} 5868 5869 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5870 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5871 match(Set dst (AddVS src1 src2)); 5872 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 5873 ins_encode %{ 5874 int vector_len = 2; 5875 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5876 %} 5877 ins_pipe( pipe_slow ); 5878 %} 5879 5880 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 5881 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5882 match(Set dst (AddVS src (LoadVector mem))); 5883 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 5884 ins_encode %{ 5885 int vector_len = 2; 5886 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5887 %} 5888 ins_pipe( pipe_slow ); 5889 %} 5890 5891 // Integers vector add 5892 instruct vadd2I(vecD dst, vecD src) %{ 5893 predicate(n->as_Vector()->length() == 2); 5894 match(Set dst (AddVI dst src)); 5895 format %{ "paddd $dst,$src\t! add packed2I" %} 5896 ins_encode %{ 5897 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5903 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5904 match(Set dst (AddVI src1 src2)); 5905 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5906 ins_encode %{ 5907 int vector_len = 0; 5908 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 5914 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5915 match(Set dst (AddVI src (LoadVector mem))); 5916 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 5917 ins_encode %{ 5918 int vector_len = 0; 5919 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 instruct vadd4I(vecX dst, vecX src) %{ 5925 predicate(n->as_Vector()->length() == 4); 5926 match(Set dst (AddVI dst src)); 5927 format %{ "paddd $dst,$src\t! add packed4I" %} 5928 ins_encode %{ 5929 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5930 %} 5931 ins_pipe( pipe_slow ); 5932 %} 5933 5934 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5935 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5936 match(Set dst (AddVI src1 src2)); 5937 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5938 ins_encode %{ 5939 int vector_len = 0; 5940 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5941 %} 5942 ins_pipe( pipe_slow ); 5943 %} 5944 5945 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 5946 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5947 match(Set dst (AddVI src (LoadVector mem))); 5948 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 5949 ins_encode %{ 5950 int vector_len = 0; 5951 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5952 %} 5953 ins_pipe( pipe_slow ); 5954 %} 5955 5956 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 5957 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5958 match(Set dst (AddVI src1 src2)); 5959 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 5960 ins_encode %{ 5961 int vector_len = 1; 5962 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 5968 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5969 match(Set dst (AddVI src (LoadVector mem))); 5970 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 5971 ins_encode %{ 5972 int vector_len = 1; 5973 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5979 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5980 match(Set dst (AddVI src1 src2)); 5981 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 5982 ins_encode %{ 5983 int vector_len = 2; 5984 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5985 %} 5986 ins_pipe( pipe_slow ); 5987 %} 5988 5989 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 5990 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5991 match(Set dst (AddVI src (LoadVector mem))); 5992 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 5993 ins_encode %{ 5994 int vector_len = 2; 5995 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5996 %} 5997 ins_pipe( pipe_slow ); 5998 %} 5999 6000 // Longs vector add 6001 instruct vadd2L(vecX dst, vecX src) %{ 6002 predicate(n->as_Vector()->length() == 2); 6003 match(Set dst (AddVL dst src)); 6004 format %{ "paddq $dst,$src\t! add packed2L" %} 6005 ins_encode %{ 6006 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6007 %} 6008 ins_pipe( pipe_slow ); 6009 %} 6010 6011 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6012 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6013 match(Set dst (AddVL src1 src2)); 6014 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6015 ins_encode %{ 6016 int vector_len = 0; 6017 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6018 %} 6019 ins_pipe( pipe_slow ); 6020 %} 6021 6022 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6023 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6024 match(Set dst (AddVL src (LoadVector mem))); 6025 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6026 ins_encode %{ 6027 int vector_len = 0; 6028 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6034 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6035 match(Set dst (AddVL src1 src2)); 6036 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6037 ins_encode %{ 6038 int vector_len = 1; 6039 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6040 %} 6041 ins_pipe( pipe_slow ); 6042 %} 6043 6044 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6045 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6046 match(Set dst (AddVL src (LoadVector mem))); 6047 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6048 ins_encode %{ 6049 int vector_len = 1; 6050 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6051 %} 6052 ins_pipe( pipe_slow ); 6053 %} 6054 6055 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6056 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6057 match(Set dst (AddVL src1 src2)); 6058 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6059 ins_encode %{ 6060 int vector_len = 2; 6061 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6062 %} 6063 ins_pipe( pipe_slow ); 6064 %} 6065 6066 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6067 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6068 match(Set dst (AddVL src (LoadVector mem))); 6069 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6070 ins_encode %{ 6071 int vector_len = 2; 6072 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6073 %} 6074 ins_pipe( pipe_slow ); 6075 %} 6076 6077 // Floats vector add 6078 instruct vadd2F(vecD dst, vecD src) %{ 6079 predicate(n->as_Vector()->length() == 2); 6080 match(Set dst (AddVF dst src)); 6081 format %{ "addps $dst,$src\t! add packed2F" %} 6082 ins_encode %{ 6083 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6084 %} 6085 ins_pipe( pipe_slow ); 6086 %} 6087 6088 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6089 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6090 match(Set dst (AddVF src1 src2)); 6091 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6092 ins_encode %{ 6093 int vector_len = 0; 6094 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6095 %} 6096 ins_pipe( pipe_slow ); 6097 %} 6098 6099 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6100 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6101 match(Set dst (AddVF src (LoadVector mem))); 6102 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6103 ins_encode %{ 6104 int vector_len = 0; 6105 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 instruct vadd4F(vecX dst, vecX src) %{ 6111 predicate(n->as_Vector()->length() == 4); 6112 match(Set dst (AddVF dst src)); 6113 format %{ "addps $dst,$src\t! add packed4F" %} 6114 ins_encode %{ 6115 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6116 %} 6117 ins_pipe( pipe_slow ); 6118 %} 6119 6120 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6122 match(Set dst (AddVF src1 src2)); 6123 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6124 ins_encode %{ 6125 int vector_len = 0; 6126 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6127 %} 6128 ins_pipe( pipe_slow ); 6129 %} 6130 6131 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6132 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6133 match(Set dst (AddVF src (LoadVector mem))); 6134 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6135 ins_encode %{ 6136 int vector_len = 0; 6137 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6138 %} 6139 ins_pipe( pipe_slow ); 6140 %} 6141 6142 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6143 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6144 match(Set dst (AddVF src1 src2)); 6145 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6146 ins_encode %{ 6147 int vector_len = 1; 6148 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6149 %} 6150 ins_pipe( pipe_slow ); 6151 %} 6152 6153 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6154 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6155 match(Set dst (AddVF src (LoadVector mem))); 6156 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6157 ins_encode %{ 6158 int vector_len = 1; 6159 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6160 %} 6161 ins_pipe( pipe_slow ); 6162 %} 6163 6164 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6165 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6166 match(Set dst (AddVF src1 src2)); 6167 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6168 ins_encode %{ 6169 int vector_len = 2; 6170 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6171 %} 6172 ins_pipe( pipe_slow ); 6173 %} 6174 6175 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6176 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6177 match(Set dst (AddVF src (LoadVector mem))); 6178 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6179 ins_encode %{ 6180 int vector_len = 2; 6181 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 // Doubles vector add 6187 instruct vadd2D(vecX dst, vecX src) %{ 6188 predicate(n->as_Vector()->length() == 2); 6189 match(Set dst (AddVD dst src)); 6190 format %{ "addpd $dst,$src\t! add packed2D" %} 6191 ins_encode %{ 6192 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6193 %} 6194 ins_pipe( pipe_slow ); 6195 %} 6196 6197 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6198 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6199 match(Set dst (AddVD src1 src2)); 6200 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6201 ins_encode %{ 6202 int vector_len = 0; 6203 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6204 %} 6205 ins_pipe( pipe_slow ); 6206 %} 6207 6208 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6209 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6210 match(Set dst (AddVD src (LoadVector mem))); 6211 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6212 ins_encode %{ 6213 int vector_len = 0; 6214 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6220 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6221 match(Set dst (AddVD src1 src2)); 6222 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6223 ins_encode %{ 6224 int vector_len = 1; 6225 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6231 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6232 match(Set dst (AddVD src (LoadVector mem))); 6233 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6234 ins_encode %{ 6235 int vector_len = 1; 6236 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6237 %} 6238 ins_pipe( pipe_slow ); 6239 %} 6240 6241 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6242 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6243 match(Set dst (AddVD src1 src2)); 6244 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6245 ins_encode %{ 6246 int vector_len = 2; 6247 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6253 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6254 match(Set dst (AddVD src (LoadVector mem))); 6255 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6256 ins_encode %{ 6257 int vector_len = 2; 6258 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 // --------------------------------- SUB -------------------------------------- 6264 6265 // Bytes vector sub 6266 instruct vsub4B(vecS dst, vecS src) %{ 6267 predicate(n->as_Vector()->length() == 4); 6268 match(Set dst (SubVB dst src)); 6269 format %{ "psubb $dst,$src\t! sub packed4B" %} 6270 ins_encode %{ 6271 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6275 6276 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6277 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6278 match(Set dst (SubVB src1 src2)); 6279 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6280 ins_encode %{ 6281 int vector_len = 0; 6282 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6283 %} 6284 ins_pipe( pipe_slow ); 6285 %} 6286 6287 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6288 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6289 match(Set dst (SubVB src (LoadVector mem))); 6290 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6291 ins_encode %{ 6292 int vector_len = 0; 6293 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6294 %} 6295 ins_pipe( pipe_slow ); 6296 %} 6297 6298 instruct vsub8B(vecD dst, vecD src) %{ 6299 predicate(n->as_Vector()->length() == 8); 6300 match(Set dst (SubVB dst src)); 6301 format %{ "psubb $dst,$src\t! sub packed8B" %} 6302 ins_encode %{ 6303 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6304 %} 6305 ins_pipe( pipe_slow ); 6306 %} 6307 6308 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6309 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6310 match(Set dst (SubVB src1 src2)); 6311 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6312 ins_encode %{ 6313 int vector_len = 0; 6314 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6315 %} 6316 ins_pipe( pipe_slow ); 6317 %} 6318 6319 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6320 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6321 match(Set dst (SubVB src (LoadVector mem))); 6322 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6323 ins_encode %{ 6324 int vector_len = 0; 6325 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6326 %} 6327 ins_pipe( pipe_slow ); 6328 %} 6329 6330 instruct vsub16B(vecX dst, vecX src) %{ 6331 predicate(n->as_Vector()->length() == 16); 6332 match(Set dst (SubVB dst src)); 6333 format %{ "psubb $dst,$src\t! sub packed16B" %} 6334 ins_encode %{ 6335 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6336 %} 6337 ins_pipe( pipe_slow ); 6338 %} 6339 6340 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6341 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6342 match(Set dst (SubVB src1 src2)); 6343 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6344 ins_encode %{ 6345 int vector_len = 0; 6346 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6347 %} 6348 ins_pipe( pipe_slow ); 6349 %} 6350 6351 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6352 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6353 match(Set dst (SubVB src (LoadVector mem))); 6354 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6355 ins_encode %{ 6356 int vector_len = 0; 6357 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6358 %} 6359 ins_pipe( pipe_slow ); 6360 %} 6361 6362 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6363 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6364 match(Set dst (SubVB src1 src2)); 6365 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6366 ins_encode %{ 6367 int vector_len = 1; 6368 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6369 %} 6370 ins_pipe( pipe_slow ); 6371 %} 6372 6373 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6374 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6375 match(Set dst (SubVB src (LoadVector mem))); 6376 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6377 ins_encode %{ 6378 int vector_len = 1; 6379 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6380 %} 6381 ins_pipe( pipe_slow ); 6382 %} 6383 6384 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6385 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6386 match(Set dst (SubVB src1 src2)); 6387 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6388 ins_encode %{ 6389 int vector_len = 2; 6390 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6396 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6397 match(Set dst (SubVB src (LoadVector mem))); 6398 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6399 ins_encode %{ 6400 int vector_len = 2; 6401 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6402 %} 6403 ins_pipe( pipe_slow ); 6404 %} 6405 6406 // Shorts/Chars vector sub 6407 instruct vsub2S(vecS dst, vecS src) %{ 6408 predicate(n->as_Vector()->length() == 2); 6409 match(Set dst (SubVS dst src)); 6410 format %{ "psubw $dst,$src\t! sub packed2S" %} 6411 ins_encode %{ 6412 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6413 %} 6414 ins_pipe( pipe_slow ); 6415 %} 6416 6417 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6418 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6419 match(Set dst (SubVS src1 src2)); 6420 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6421 ins_encode %{ 6422 int vector_len = 0; 6423 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6424 %} 6425 ins_pipe( pipe_slow ); 6426 %} 6427 6428 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6429 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6430 match(Set dst (SubVS src (LoadVector mem))); 6431 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6432 ins_encode %{ 6433 int vector_len = 0; 6434 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6435 %} 6436 ins_pipe( pipe_slow ); 6437 %} 6438 6439 instruct vsub4S(vecD dst, vecD src) %{ 6440 predicate(n->as_Vector()->length() == 4); 6441 match(Set dst (SubVS dst src)); 6442 format %{ "psubw $dst,$src\t! sub packed4S" %} 6443 ins_encode %{ 6444 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6445 %} 6446 ins_pipe( pipe_slow ); 6447 %} 6448 6449 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6450 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6451 match(Set dst (SubVS src1 src2)); 6452 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6453 ins_encode %{ 6454 int vector_len = 0; 6455 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6456 %} 6457 ins_pipe( pipe_slow ); 6458 %} 6459 6460 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6461 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6462 match(Set dst (SubVS src (LoadVector mem))); 6463 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6464 ins_encode %{ 6465 int vector_len = 0; 6466 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6467 %} 6468 ins_pipe( pipe_slow ); 6469 %} 6470 6471 instruct vsub8S(vecX dst, vecX src) %{ 6472 predicate(n->as_Vector()->length() == 8); 6473 match(Set dst (SubVS dst src)); 6474 format %{ "psubw $dst,$src\t! sub packed8S" %} 6475 ins_encode %{ 6476 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6482 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6483 match(Set dst (SubVS src1 src2)); 6484 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6485 ins_encode %{ 6486 int vector_len = 0; 6487 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6493 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6494 match(Set dst (SubVS src (LoadVector mem))); 6495 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6496 ins_encode %{ 6497 int vector_len = 0; 6498 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6504 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6505 match(Set dst (SubVS src1 src2)); 6506 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6507 ins_encode %{ 6508 int vector_len = 1; 6509 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6515 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6516 match(Set dst (SubVS src (LoadVector mem))); 6517 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6518 ins_encode %{ 6519 int vector_len = 1; 6520 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6526 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6527 match(Set dst (SubVS src1 src2)); 6528 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6529 ins_encode %{ 6530 int vector_len = 2; 6531 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6532 %} 6533 ins_pipe( pipe_slow ); 6534 %} 6535 6536 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6537 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6538 match(Set dst (SubVS src (LoadVector mem))); 6539 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6540 ins_encode %{ 6541 int vector_len = 2; 6542 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 // Integers vector sub 6548 instruct vsub2I(vecD dst, vecD src) %{ 6549 predicate(n->as_Vector()->length() == 2); 6550 match(Set dst (SubVI dst src)); 6551 format %{ "psubd $dst,$src\t! sub packed2I" %} 6552 ins_encode %{ 6553 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6866 match(Set dst (SubVD src (LoadVector mem))); 6867 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 6868 ins_encode %{ 6869 int vector_len = 0; 6870 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 6876 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6877 match(Set dst (SubVD src1 src2)); 6878 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 6879 ins_encode %{ 6880 int vector_len = 1; 6881 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6882 %} 6883 ins_pipe( pipe_slow ); 6884 %} 6885 6886 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 6887 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6888 match(Set dst (SubVD src (LoadVector mem))); 6889 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 6890 ins_encode %{ 6891 int vector_len = 1; 6892 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6893 %} 6894 ins_pipe( pipe_slow ); 6895 %} 6896 6897 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6898 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6899 match(Set dst (SubVD src1 src2)); 6900 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 6901 ins_encode %{ 6902 int vector_len = 2; 6903 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6904 %} 6905 ins_pipe( pipe_slow ); 6906 %} 6907 6908 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 6909 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6910 match(Set dst (SubVD src (LoadVector mem))); 6911 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 6912 ins_encode %{ 6913 int vector_len = 2; 6914 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 // --------------------------------- MUL -------------------------------------- 6920 6921 // Shorts/Chars vector mul 6922 instruct vmul2S(vecS dst, vecS src) %{ 6923 predicate(n->as_Vector()->length() == 2); 6924 match(Set dst (MulVS dst src)); 6925 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6926 ins_encode %{ 6927 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6928 %} 6929 ins_pipe( pipe_slow ); 6930 %} 6931 6932 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6933 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6934 match(Set dst (MulVS src1 src2)); 6935 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6936 ins_encode %{ 6937 int vector_len = 0; 6938 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6939 %} 6940 ins_pipe( pipe_slow ); 6941 %} 6942 6943 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 6944 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6945 match(Set dst (MulVS src (LoadVector mem))); 6946 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 6947 ins_encode %{ 6948 int vector_len = 0; 6949 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6950 %} 6951 ins_pipe( pipe_slow ); 6952 %} 6953 6954 instruct vmul4S(vecD dst, vecD src) %{ 6955 predicate(n->as_Vector()->length() == 4); 6956 match(Set dst (MulVS dst src)); 6957 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6958 ins_encode %{ 6959 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6965 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6966 match(Set dst (MulVS src1 src2)); 6967 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6968 ins_encode %{ 6969 int vector_len = 0; 6970 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6971 %} 6972 ins_pipe( pipe_slow ); 6973 %} 6974 6975 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 6976 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6977 match(Set dst (MulVS src (LoadVector mem))); 6978 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 6979 ins_encode %{ 6980 int vector_len = 0; 6981 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6982 %} 6983 ins_pipe( pipe_slow ); 6984 %} 6985 6986 instruct vmul8S(vecX dst, vecX src) %{ 6987 predicate(n->as_Vector()->length() == 8); 6988 match(Set dst (MulVS dst src)); 6989 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6990 ins_encode %{ 6991 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6997 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6998 match(Set dst (MulVS src1 src2)); 6999 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7000 ins_encode %{ 7001 int vector_len = 0; 7002 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7008 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7009 match(Set dst (MulVS src (LoadVector mem))); 7010 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7011 ins_encode %{ 7012 int vector_len = 0; 7013 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7019 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7020 match(Set dst (MulVS src1 src2)); 7021 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7022 ins_encode %{ 7023 int vector_len = 1; 7024 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7030 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7031 match(Set dst (MulVS src (LoadVector mem))); 7032 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7033 ins_encode %{ 7034 int vector_len = 1; 7035 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7041 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7042 match(Set dst (MulVS src1 src2)); 7043 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7044 ins_encode %{ 7045 int vector_len = 2; 7046 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7052 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7053 match(Set dst (MulVS src (LoadVector mem))); 7054 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7055 ins_encode %{ 7056 int vector_len = 2; 7057 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 // Integers vector mul (sse4_1) 7063 instruct vmul2I(vecD dst, vecD src) %{ 7064 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7065 match(Set dst (MulVI dst src)); 7066 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7067 ins_encode %{ 7068 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7662 %} 7663 ins_pipe( pipe_slow ); 7664 %} 7665 7666 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 7667 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7668 match(Set dst (SqrtVD src)); 7669 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 7670 ins_encode %{ 7671 int vector_len = 1; 7672 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7673 %} 7674 ins_pipe( pipe_slow ); 7675 %} 7676 7677 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 7678 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7679 match(Set dst (SqrtVD (LoadVector mem))); 7680 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 7681 ins_encode %{ 7682 int vector_len = 1; 7683 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 7684 %} 7685 ins_pipe( pipe_slow ); 7686 %} 7687 7688 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 7689 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7690 match(Set dst (SqrtVD src)); 7691 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 7692 ins_encode %{ 7693 int vector_len = 2; 7694 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7695 %} 7696 ins_pipe( pipe_slow ); 7697 %} 7698 7699 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 7700 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7701 match(Set dst (SqrtVD (LoadVector mem))); 7702 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 7703 ins_encode %{ 7704 int vector_len = 2; 7705 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 7706 %} 7707 ins_pipe( pipe_slow ); 7708 %} 7709 7710 // ------------------------------ LeftShift ----------------------------------- 7711 7712 // Shorts/Chars vector left shift 7713 instruct vsll2S(vecS dst, vecS shift) %{ 7714 predicate(n->as_Vector()->length() == 2); 7715 match(Set dst (LShiftVS dst shift)); 7716 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7717 ins_encode %{ 7718 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 7724 predicate(n->as_Vector()->length() == 2); 7725 match(Set dst (LShiftVS dst shift)); 7726 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7727 ins_encode %{ 7728 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7729 %} 7730 ins_pipe( pipe_slow ); 7731 %} 7732 7733 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 7734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7735 match(Set dst (LShiftVS src shift)); 7736 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7737 ins_encode %{ 7738 int vector_len = 0; 7739 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7746 match(Set dst (LShiftVS src shift)); 7747 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7748 ins_encode %{ 7749 int vector_len = 0; 7750 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vsll4S(vecD dst, vecS shift) %{ 7756 predicate(n->as_Vector()->length() == 4); 7757 match(Set dst (LShiftVS dst shift)); 7758 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7759 ins_encode %{ 7760 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7761 %} 7762 ins_pipe( pipe_slow ); 7763 %} 7764 7765 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 7766 predicate(n->as_Vector()->length() == 4); 7767 match(Set dst (LShiftVS dst shift)); 7768 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7769 ins_encode %{ 7770 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 7775 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 7776 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7777 match(Set dst (LShiftVS src shift)); 7778 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7779 ins_encode %{ 7780 int vector_len = 0; 7781 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7782 %} 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7787 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7788 match(Set dst (LShiftVS src shift)); 7789 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7790 ins_encode %{ 7791 int vector_len = 0; 7792 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7793 %} 7794 ins_pipe( pipe_slow ); 7795 %} 7796 7797 instruct vsll8S(vecX dst, vecS shift) %{ 7798 predicate(n->as_Vector()->length() == 8); 7799 match(Set dst (LShiftVS dst shift)); 7800 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7801 ins_encode %{ 7802 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7803 %} 7804 ins_pipe( pipe_slow ); 7805 %} 7806 7807 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 7808 predicate(n->as_Vector()->length() == 8); 7809 match(Set dst (LShiftVS dst shift)); 7810 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7811 ins_encode %{ 7812 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7813 %} 7814 ins_pipe( pipe_slow ); 7815 %} 7816 7817 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 7818 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7819 match(Set dst (LShiftVS src shift)); 7820 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7821 ins_encode %{ 7822 int vector_len = 0; 7823 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7824 %} 7825 ins_pipe( pipe_slow ); 7826 %} 7827 7828 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7829 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7830 match(Set dst (LShiftVS src shift)); 7831 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7832 ins_encode %{ 7833 int vector_len = 0; 7834 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7835 %} 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 7840 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7841 match(Set dst (LShiftVS src shift)); 7842 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7843 ins_encode %{ 7844 int vector_len = 1; 7845 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7846 %} 7847 ins_pipe( pipe_slow ); 7848 %} 7849 7850 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7851 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7852 match(Set dst (LShiftVS src shift)); 7853 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7854 ins_encode %{ 7855 int vector_len = 1; 7856 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7857 %} 7858 ins_pipe( pipe_slow ); 7859 %} 7860 7861 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7862 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7863 match(Set dst (LShiftVS src shift)); 7864 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7865 ins_encode %{ 7866 int vector_len = 2; 7867 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7868 %} 7869 ins_pipe( pipe_slow ); 7870 %} 7871 7872 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7873 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7874 match(Set dst (LShiftVS src shift)); 7875 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7876 ins_encode %{ 7877 int vector_len = 2; 7878 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7879 %} 7880 ins_pipe( pipe_slow ); 7881 %} 7882 7883 // Integers vector left shift 7884 instruct vsll2I(vecD dst, vecS shift) %{ 7885 predicate(n->as_Vector()->length() == 2); 7886 match(Set dst (LShiftVI dst shift)); 7887 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7888 ins_encode %{ 7889 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7890 %} 7891 ins_pipe( pipe_slow ); 7892 %} 7893 8062 %} 8063 ins_pipe( pipe_slow ); 8064 %} 8065 8066 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8067 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8068 match(Set dst (LShiftVL src shift)); 8069 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8070 ins_encode %{ 8071 int vector_len = 1; 8072 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8073 %} 8074 ins_pipe( pipe_slow ); 8075 %} 8076 8077 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8078 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8079 match(Set dst (LShiftVL src shift)); 8080 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8081 ins_encode %{ 8082 int vector_len = 2; 8083 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8084 %} 8085 ins_pipe( pipe_slow ); 8086 %} 8087 8088 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8089 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8090 match(Set dst (LShiftVL src shift)); 8091 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8092 ins_encode %{ 8093 int vector_len = 2; 8094 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8095 %} 8096 ins_pipe( pipe_slow ); 8097 %} 8098 8099 // ----------------------- LogicalRightShift ----------------------------------- 8100 8101 // Shorts vector logical right shift produces incorrect Java result 8102 // for negative data because java code convert short value into int with 8103 // sign extension before a shift. But char vectors are fine since chars are 8104 // unsigned values. 8105 8106 instruct vsrl2S(vecS dst, vecS shift) %{ 8107 predicate(n->as_Vector()->length() == 2); 8108 match(Set dst (URShiftVS dst shift)); 8109 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8110 ins_encode %{ 8111 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8112 %} 8113 ins_pipe( pipe_slow ); 8114 %} 8115 8116 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8117 predicate(n->as_Vector()->length() == 2); 8118 match(Set dst (URShiftVS dst shift)); 8119 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8120 ins_encode %{ 8121 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8127 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8128 match(Set dst (URShiftVS src shift)); 8129 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8130 ins_encode %{ 8131 int vector_len = 0; 8132 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8133 %} 8134 ins_pipe( pipe_slow ); 8135 %} 8136 8137 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8138 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8139 match(Set dst (URShiftVS src shift)); 8140 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8141 ins_encode %{ 8142 int vector_len = 0; 8143 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8144 %} 8145 ins_pipe( pipe_slow ); 8146 %} 8147 8148 instruct vsrl4S(vecD dst, vecS shift) %{ 8149 predicate(n->as_Vector()->length() == 4); 8150 match(Set dst (URShiftVS dst shift)); 8151 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8152 ins_encode %{ 8153 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8154 %} 8155 ins_pipe( pipe_slow ); 8156 %} 8157 8158 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8159 predicate(n->as_Vector()->length() == 4); 8160 match(Set dst (URShiftVS dst shift)); 8161 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8162 ins_encode %{ 8163 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8164 %} 8165 ins_pipe( pipe_slow ); 8166 %} 8167 8168 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8169 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8170 match(Set dst (URShiftVS src shift)); 8171 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8172 ins_encode %{ 8173 int vector_len = 0; 8174 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8175 %} 8176 ins_pipe( pipe_slow ); 8177 %} 8178 8179 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8180 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8181 match(Set dst (URShiftVS src shift)); 8182 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8183 ins_encode %{ 8184 int vector_len = 0; 8185 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8186 %} 8187 ins_pipe( pipe_slow ); 8188 %} 8189 8190 instruct vsrl8S(vecX dst, vecS shift) %{ 8191 predicate(n->as_Vector()->length() == 8); 8192 match(Set dst (URShiftVS dst shift)); 8193 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8194 ins_encode %{ 8195 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8196 %} 8197 ins_pipe( pipe_slow ); 8198 %} 8199 8200 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8201 predicate(n->as_Vector()->length() == 8); 8202 match(Set dst (URShiftVS dst shift)); 8203 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8204 ins_encode %{ 8205 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8206 %} 8207 ins_pipe( pipe_slow ); 8208 %} 8209 8210 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8211 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8212 match(Set dst (URShiftVS src shift)); 8213 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8214 ins_encode %{ 8215 int vector_len = 0; 8216 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8222 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8223 match(Set dst (URShiftVS src shift)); 8224 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8225 ins_encode %{ 8226 int vector_len = 0; 8227 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8233 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8234 match(Set dst (URShiftVS src shift)); 8235 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8236 ins_encode %{ 8237 int vector_len = 1; 8238 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8244 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8245 match(Set dst (URShiftVS src shift)); 8246 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8247 ins_encode %{ 8248 int vector_len = 1; 8249 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8250 %} 8251 ins_pipe( pipe_slow ); 8252 %} 8253 8254 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8255 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8256 match(Set dst (URShiftVS src shift)); 8257 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8258 ins_encode %{ 8259 int vector_len = 2; 8260 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8266 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8267 match(Set dst (URShiftVS src shift)); 8268 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8269 ins_encode %{ 8270 int vector_len = 2; 8271 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8272 %} 8273 ins_pipe( pipe_slow ); 8274 %} 8275 8276 // Integers vector logical right shift 8277 instruct vsrl2I(vecD dst, vecS shift) %{ 8278 predicate(n->as_Vector()->length() == 2); 8279 match(Set dst (URShiftVI dst shift)); 8280 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8281 ins_encode %{ 8282 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8283 %} 8284 ins_pipe( pipe_slow ); 8285 %} 8286 8476 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8477 %} 8478 ins_pipe( pipe_slow ); 8479 %} 8480 8481 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8482 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8483 match(Set dst (URShiftVL src shift)); 8484 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8485 ins_encode %{ 8486 int vector_len = 2; 8487 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8488 %} 8489 ins_pipe( pipe_slow ); 8490 %} 8491 8492 // ------------------- ArithmeticRightShift ----------------------------------- 8493 8494 // Shorts/Chars vector arithmetic right shift 8495 instruct vsra2S(vecS dst, vecS shift) %{ 8496 predicate(n->as_Vector()->length() == 2); 8497 match(Set dst (RShiftVS dst shift)); 8498 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8499 ins_encode %{ 8500 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8501 %} 8502 ins_pipe( pipe_slow ); 8503 %} 8504 8505 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8506 predicate(n->as_Vector()->length() == 2); 8507 match(Set dst (RShiftVS dst shift)); 8508 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8509 ins_encode %{ 8510 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8511 %} 8512 ins_pipe( pipe_slow ); 8513 %} 8514 8515 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8516 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8517 match(Set dst (RShiftVS src shift)); 8518 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8519 ins_encode %{ 8520 int vector_len = 0; 8521 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8522 %} 8523 ins_pipe( pipe_slow ); 8524 %} 8525 8526 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8527 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8528 match(Set dst (RShiftVS src shift)); 8529 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8530 ins_encode %{ 8531 int vector_len = 0; 8532 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8533 %} 8534 ins_pipe( pipe_slow ); 8535 %} 8536 8537 instruct vsra4S(vecD dst, vecS shift) %{ 8538 predicate(n->as_Vector()->length() == 4); 8539 match(Set dst (RShiftVS dst shift)); 8540 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8541 ins_encode %{ 8542 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8543 %} 8544 ins_pipe( pipe_slow ); 8545 %} 8546 8547 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 8548 predicate(n->as_Vector()->length() == 4); 8549 match(Set dst (RShiftVS dst shift)); 8550 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8551 ins_encode %{ 8552 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8553 %} 8554 ins_pipe( pipe_slow ); 8555 %} 8556 8557 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 8558 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8559 match(Set dst (RShiftVS src shift)); 8560 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8561 ins_encode %{ 8562 int vector_len = 0; 8563 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8569 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8570 match(Set dst (RShiftVS src shift)); 8571 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8572 ins_encode %{ 8573 int vector_len = 0; 8574 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 instruct vsra8S(vecX dst, vecS shift) %{ 8580 predicate(n->as_Vector()->length() == 8); 8581 match(Set dst (RShiftVS dst shift)); 8582 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8583 ins_encode %{ 8584 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8585 %} 8586 ins_pipe( pipe_slow ); 8587 %} 8588 8589 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 8590 predicate(n->as_Vector()->length() == 8); 8591 match(Set dst (RShiftVS dst shift)); 8592 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8593 ins_encode %{ 8594 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8595 %} 8596 ins_pipe( pipe_slow ); 8597 %} 8598 8599 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 8600 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8601 match(Set dst (RShiftVS src shift)); 8602 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8603 ins_encode %{ 8604 int vector_len = 0; 8605 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8606 %} 8607 ins_pipe( pipe_slow ); 8608 %} 8609 8610 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8611 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8612 match(Set dst (RShiftVS src shift)); 8613 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8614 ins_encode %{ 8615 int vector_len = 0; 8616 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8617 %} 8618 ins_pipe( pipe_slow ); 8619 %} 8620 8621 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 8622 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8623 match(Set dst (RShiftVS src shift)); 8624 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8625 ins_encode %{ 8626 int vector_len = 1; 8627 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8628 %} 8629 ins_pipe( pipe_slow ); 8630 %} 8631 8632 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8633 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8634 match(Set dst (RShiftVS src shift)); 8635 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8636 ins_encode %{ 8637 int vector_len = 1; 8638 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8639 %} 8640 ins_pipe( pipe_slow ); 8641 %} 8642 8643 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8644 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8645 match(Set dst (RShiftVS src shift)); 8646 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8647 ins_encode %{ 8648 int vector_len = 2; 8649 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8650 %} 8651 ins_pipe( pipe_slow ); 8652 %} 8653 8654 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8655 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8656 match(Set dst (RShiftVS src shift)); 8657 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8658 ins_encode %{ 8659 int vector_len = 2; 8660 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8661 %} 8662 ins_pipe( pipe_slow ); 8663 %} 8664 8665 // Integers vector arithmetic right shift 8666 instruct vsra2I(vecD dst, vecS shift) %{ 8667 predicate(n->as_Vector()->length() == 2); 8668 match(Set dst (RShiftVI dst shift)); 8669 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8670 ins_encode %{ 8671 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8672 %} 8673 ins_pipe( pipe_slow ); 8674 %} 8675 | 1699 case Op_SqrtVD: 1700 if (UseAVX < 1) // enabled for AVX only 1701 ret_value = false; 1702 break; 1703 case Op_CompareAndSwapL: 1704 #ifdef _LP64 1705 case Op_CompareAndSwapP: 1706 #endif 1707 if (!VM_Version::supports_cx8()) 1708 ret_value = false; 1709 break; 1710 case Op_CMoveVD: 1711 if (UseAVX < 1 || UseAVX > 2) 1712 ret_value = false; 1713 break; 1714 } 1715 1716 return ret_value; // Per default match rules are supported. 1717 } 1718 1719 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1720 // identify extra cases that we might want to provide match rules for 1721 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1722 bool ret_value = match_rule_supported(opcode); 1723 if (ret_value) { 1724 switch (opcode) { 1725 case Op_AddVB: 1726 case Op_SubVB: 1727 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1728 ret_value = false; 1729 break; 1730 case Op_URShiftVS: 1731 case Op_RShiftVS: 1732 case Op_LShiftVS: 1733 case Op_MulVS: 1734 case Op_AddVS: 1735 case Op_SubVS: 1736 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1737 ret_value = false; 1738 break; 1739 case Op_CMoveVD: 1740 if (vlen != 4) 1741 ret_value = false; 1742 break; 1743 } 1744 } 1745 1746 return ret_value; // Per default match rules are supported. 1747 } 1748 1749 const int Matcher::float_pressure(int default_pressure_threshold) { 1750 int float_pressure_threshold = default_pressure_threshold; 1751 #ifdef _LP64 1752 if (UseAVX > 2) { 1753 // Increase pressure threshold on machines with AVX3 which have 1754 // 2x more XMM registers. 1755 float_pressure_threshold = default_pressure_threshold * 2; 1756 } 1757 #endif 1758 return float_pressure_threshold; 1759 } 1760 1761 // Max vector size in bytes. 0 if not supported. 1762 const int Matcher::vector_width_in_bytes(BasicType bt) { 1763 assert(is_java_primitive(bt), "only primitive type vectors"); 1764 if (UseSSE < 2) return 0; 1765 // SSE2 supports 128bit vectors for all types. 1766 // AVX2 supports 256bit vectors for all types. 1767 // AVX2/EVEX supports 512bit vectors for all types. 1768 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1772 // Use flag to limit vector size. 1773 size = MIN2(size,(int)MaxVectorSize); 1774 // Minimum 2 values in vector (or 4 for bytes). 1775 switch (bt) { 1776 case T_DOUBLE: 1777 case T_LONG: 1778 if (size < 16) return 0; 1779 break; 1780 case T_FLOAT: 1781 case T_INT: 1782 if (size < 8) return 0; 1783 break; 1784 case T_BOOLEAN: 1785 if (size < 4) return 0; 1786 break; 1787 case T_CHAR: 1788 if (size < 4) return 0; 1789 break; 1790 case T_BYTE: 1791 if (size < 4) return 0; 1792 break; 1793 case T_SHORT: 1794 if (size < 4) return 0; 1795 break; 1796 default: 1797 ShouldNotReachHere(); 1798 } 1799 return size; 1800 } 1801 1802 // Limits on vector size (number of elements) loaded into vector. 1803 const int Matcher::max_vector_size(const BasicType bt) { 1804 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1805 } 1806 const int Matcher::min_vector_size(const BasicType bt) { 1807 int max_size = max_vector_size(bt); 1808 // Min size which can be loaded into vector is 4 bytes. 1809 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1810 return MIN2(size,max_size); 1811 } 1812 1813 // Vector ideal reg corresponding to specidied size in bytes 1814 const int Matcher::vector_ideal_reg(int size) { 1978 break; 1979 case Op_VecD: 1980 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1981 break; 1982 case Op_VecX: 1983 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1984 break; 1985 case Op_VecY: 1986 case Op_VecZ: 1987 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1988 break; 1989 default: 1990 ShouldNotReachHere(); 1991 } 1992 } 1993 #endif 1994 } 1995 bool is_single_byte = false; 1996 int vec_len = 0; 1997 if ((UseAVX > 2) && (stack_offset != 0)) { 1998 int tuple_type = Assembler::EVEX_FVM; 1999 int input_size = Assembler::EVEX_32bit; 2000 switch (ireg) { 2001 case Op_VecS: 2002 tuple_type = Assembler::EVEX_T1S; 2003 break; 2004 case Op_VecD: 2005 tuple_type = Assembler::EVEX_T1S; 2006 input_size = Assembler::EVEX_64bit; 2007 break; 2008 case Op_VecX: 2009 break; 2010 case Op_VecY: 2011 vec_len = 1; 2012 break; 2013 case Op_VecZ: 2014 vec_len = 2; 2015 break; 2016 } 2017 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2018 } 2019 int offset_size = 0; 2020 int size = 5; 2021 if (UseAVX > 2 ) { 2022 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2023 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2024 size += 2; // Need an additional two bytes for EVEX encoding 2025 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2026 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2027 } else { 2028 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2029 size += 2; // Need an additional two bytes for EVEX encodding 2030 } 2031 } else { 2032 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2033 } 2034 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2035 return size+offset_size; 2036 } 2037 2038 static inline jfloat replicate4_imm(int con, int width) { 2039 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2040 assert(width == 1 || width == 2, "only byte or short types here"); 2041 int bit_width = width * 8; 2042 jint val = con; 2043 val &= (1 << bit_width) - 1; // mask off sign bits 2044 while(bit_width < 32) { 2045 val |= (val << bit_width); 2729 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2730 ins_cost(150); 2731 ins_encode %{ 2732 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2733 %} 2734 ins_pipe(pipe_slow); 2735 %} 2736 2737 instruct absF_reg(regF dst) %{ 2738 predicate((UseSSE>=1) && (UseAVX == 0)); 2739 match(Set dst (AbsF dst)); 2740 ins_cost(150); 2741 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2742 ins_encode %{ 2743 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2744 %} 2745 ins_pipe(pipe_slow); 2746 %} 2747 2748 instruct absF_reg_reg(regF dst, regF src) %{ 2749 predicate(VM_Version::supports_avxonly()); 2750 match(Set dst (AbsF src)); 2751 ins_cost(150); 2752 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2753 ins_encode %{ 2754 int vector_len = 0; 2755 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2756 ExternalAddress(float_signmask()), vector_len); 2757 %} 2758 ins_pipe(pipe_slow); 2759 %} 2760 2761 #ifdef _LP64 2762 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2763 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2764 match(Set dst (AbsF src)); 2765 ins_cost(150); 2766 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2767 ins_encode %{ 2768 int vector_len = 0; 2769 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2770 ExternalAddress(float_signmask()), vector_len); 2771 %} 2772 ins_pipe(pipe_slow); 2773 %} 2774 2775 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2776 predicate(VM_Version::supports_avx512novl()); 2777 match(Set dst (AbsF src1)); 2778 effect(TEMP src2); 2779 ins_cost(150); 2780 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2781 ins_encode %{ 2782 int vector_len = 0; 2783 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2784 ExternalAddress(float_signmask()), vector_len); 2785 %} 2786 ins_pipe(pipe_slow); 2787 %} 2788 #else // _LP64 2789 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2790 predicate(UseAVX > 2); 2791 match(Set dst (AbsF src)); 2792 ins_cost(150); 2793 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2794 ins_encode %{ 2795 int vector_len = 0; 2796 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2797 ExternalAddress(float_signmask()), vector_len); 2798 %} 2799 ins_pipe(pipe_slow); 2800 %} 2801 #endif 2802 2803 instruct absD_reg(regD dst) %{ 2804 predicate((UseSSE>=2) && (UseAVX == 0)); 2805 match(Set dst (AbsD dst)); 2806 ins_cost(150); 2807 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2808 "# abs double by sign masking" %} 2809 ins_encode %{ 2810 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2811 %} 2812 ins_pipe(pipe_slow); 2813 %} 2814 2815 instruct absD_reg_reg(regD dst, regD src) %{ 2816 predicate(VM_Version::supports_avxonly()); 2817 match(Set dst (AbsD src)); 2818 ins_cost(150); 2819 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2820 "# abs double by sign masking" %} 2821 ins_encode %{ 2822 int vector_len = 0; 2823 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2824 ExternalAddress(double_signmask()), vector_len); 2825 %} 2826 ins_pipe(pipe_slow); 2827 %} 2828 2829 #ifdef _LP64 2830 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2831 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2832 match(Set dst (AbsD src)); 2833 ins_cost(150); 2834 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2835 "# abs double by sign masking" %} 2836 ins_encode %{ 2837 int vector_len = 0; 2838 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2839 ExternalAddress(double_signmask()), vector_len); 2840 %} 2841 ins_pipe(pipe_slow); 2842 %} 2843 2844 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2845 predicate(VM_Version::supports_avx512novl()); 2846 match(Set dst (AbsD src1)); 2847 effect(TEMP src2); 2848 ins_cost(150); 2849 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2850 ins_encode %{ 2851 int vector_len = 0; 2852 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2853 ExternalAddress(double_signmask()), vector_len); 2854 %} 2855 ins_pipe(pipe_slow); 2856 %} 2857 #else // _LP64 2858 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2859 predicate(UseAVX > 2); 2860 match(Set dst (AbsD src)); 2861 ins_cost(150); 2862 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2863 "# abs double by sign masking" %} 2864 ins_encode %{ 2865 int vector_len = 0; 2866 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2867 ExternalAddress(double_signmask()), vector_len); 2868 %} 2869 ins_pipe(pipe_slow); 2870 %} 2871 #endif 2872 2873 instruct negF_reg(regF dst) %{ 2874 predicate((UseSSE>=1) && (UseAVX == 0)); 2875 match(Set dst (NegF dst)); 2876 ins_cost(150); 2877 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2878 ins_encode %{ 2879 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2880 %} 2881 ins_pipe(pipe_slow); 2882 %} 2883 2884 instruct negF_reg_reg(regF dst, regF src) %{ 2885 predicate(UseAVX > 0); 2886 match(Set dst (NegF src)); 2887 ins_cost(150); 2888 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2889 ins_encode %{ 2890 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2891 ExternalAddress(float_signflip())); 4658 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4659 predicate(UseSSE > 2 && UseAVX == 0); 4660 match(Set dst (AddReductionVI src1 src2)); 4661 effect(TEMP tmp2, TEMP tmp); 4662 format %{ "movdqu $tmp2,$src2\n\t" 4663 "phaddd $tmp2,$tmp2\n\t" 4664 "movd $tmp,$src1\n\t" 4665 "paddd $tmp,$tmp2\n\t" 4666 "movd $dst,$tmp\t! add reduction2I" %} 4667 ins_encode %{ 4668 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4669 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4670 __ movdl($tmp$$XMMRegister, $src1$$Register); 4671 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4672 __ movdl($dst$$Register, $tmp$$XMMRegister); 4673 %} 4674 ins_pipe( pipe_slow ); 4675 %} 4676 4677 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4678 predicate(VM_Version::supports_avxonly()); 4679 match(Set dst (AddReductionVI src1 src2)); 4680 effect(TEMP tmp, TEMP tmp2); 4681 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4682 "movd $tmp2,$src1\n\t" 4683 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4684 "movd $dst,$tmp2\t! add reduction2I" %} 4685 ins_encode %{ 4686 int vector_len = 0; 4687 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4688 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4689 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4690 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4691 %} 4692 ins_pipe( pipe_slow ); 4693 %} 4694 4695 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4696 predicate(UseAVX > 2); 4697 match(Set dst (AddReductionVI src1 src2)); 4698 effect(TEMP tmp, TEMP tmp2); 4699 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4700 "vpaddd $tmp,$src2,$tmp2\n\t" 4701 "movd $tmp2,$src1\n\t" 4702 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4703 "movd $dst,$tmp2\t! add reduction2I" %} 4704 ins_encode %{ 4705 int vector_len = 0; 4706 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4707 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4708 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4709 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4710 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4711 %} 4712 ins_pipe( pipe_slow ); 4713 %} 4714 4715 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4716 predicate(UseSSE > 2 && UseAVX == 0); 4717 match(Set dst (AddReductionVI src1 src2)); 4718 effect(TEMP tmp, TEMP tmp2); 4719 format %{ "movdqu $tmp,$src2\n\t" 4720 "phaddd $tmp,$tmp\n\t" 4721 "phaddd $tmp,$tmp\n\t" 4722 "movd $tmp2,$src1\n\t" 4723 "paddd $tmp2,$tmp\n\t" 4724 "movd $dst,$tmp2\t! add reduction4I" %} 4725 ins_encode %{ 4726 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4727 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4728 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4729 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4730 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4731 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4732 %} 4733 ins_pipe( pipe_slow ); 4734 %} 4735 4736 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4737 predicate(VM_Version::supports_avxonly()); 4738 match(Set dst (AddReductionVI src1 src2)); 4739 effect(TEMP tmp, TEMP tmp2); 4740 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4741 "vphaddd $tmp,$tmp,$tmp\n\t" 4742 "movd $tmp2,$src1\n\t" 4743 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4744 "movd $dst,$tmp2\t! add reduction4I" %} 4745 ins_encode %{ 4746 int vector_len = 0; 4747 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4748 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4749 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4750 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4751 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4752 %} 4753 ins_pipe( pipe_slow ); 4754 %} 4755 4756 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4757 predicate(UseAVX > 2); 4758 match(Set dst (AddReductionVI src1 src2)); 4759 effect(TEMP tmp, TEMP tmp2); 4760 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4761 "vpaddd $tmp,$src2,$tmp2\n\t" 4762 "pshufd $tmp2,$tmp,0x1\n\t" 4763 "vpaddd $tmp,$tmp,$tmp2\n\t" 4764 "movd $tmp2,$src1\n\t" 4765 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4766 "movd $dst,$tmp2\t! add reduction4I" %} 4767 ins_encode %{ 4768 int vector_len = 0; 4769 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4770 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4771 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4772 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4773 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4774 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4775 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4776 %} 4777 ins_pipe( pipe_slow ); 4778 %} 4779 4780 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4781 predicate(VM_Version::supports_avxonly()); 4782 match(Set dst (AddReductionVI src1 src2)); 4783 effect(TEMP tmp, TEMP tmp2); 4784 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4785 "vphaddd $tmp,$tmp,$tmp2\n\t" 4786 "vextracti128 $tmp2,$tmp\n\t" 4787 "vpaddd $tmp,$tmp,$tmp2\n\t" 4788 "movd $tmp2,$src1\n\t" 4789 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4790 "movd $dst,$tmp2\t! add reduction8I" %} 4791 ins_encode %{ 4792 int vector_len = 1; 4793 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4794 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4795 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4796 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4797 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4798 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4799 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4800 %} 4801 ins_pipe( pipe_slow ); 4816 "movd $dst,$tmp2\t! add reduction8I" %} 4817 ins_encode %{ 4818 int vector_len = 0; 4819 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4820 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4821 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4822 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4823 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4824 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4825 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4826 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4827 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4833 predicate(UseAVX > 2); 4834 match(Set dst (AddReductionVI src1 src2)); 4835 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4836 format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" 4837 "vpaddd $tmp3,$tmp3,$src2\n\t" 4838 "vextracti128 $tmp,$tmp3\n\t" 4839 "vpaddd $tmp,$tmp,$tmp3\n\t" 4840 "pshufd $tmp2,$tmp,0xE\n\t" 4841 "vpaddd $tmp,$tmp,$tmp2\n\t" 4842 "pshufd $tmp2,$tmp,0x1\n\t" 4843 "vpaddd $tmp,$tmp,$tmp2\n\t" 4844 "movd $tmp2,$src1\n\t" 4845 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4846 "movd $dst,$tmp2\t! mul reduction16I" %} 4847 ins_encode %{ 4848 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4849 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4850 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4851 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4852 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4853 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4854 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4855 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4856 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4857 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4858 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4859 %} 4860 ins_pipe( pipe_slow ); 4861 %} 4862 4863 #ifdef _LP64 4864 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4865 predicate(UseAVX > 2); 4866 match(Set dst (AddReductionVL src1 src2)); 4867 effect(TEMP tmp, TEMP tmp2); 4868 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4869 "vpaddq $tmp,$src2,$tmp2\n\t" 4870 "movdq $tmp2,$src1\n\t" 4871 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4872 "movdq $dst,$tmp2\t! add reduction2L" %} 4873 ins_encode %{ 4874 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4875 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4876 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4877 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4878 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4879 %} 4880 ins_pipe( pipe_slow ); 4881 %} 4882 4883 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4884 predicate(UseAVX > 2); 4885 match(Set dst (AddReductionVL src1 src2)); 4886 effect(TEMP tmp, TEMP tmp2); 4887 format %{ "vextracti128 $tmp,$src2\n\t" 4888 "vpaddq $tmp2,$tmp,$src2\n\t" 4889 "pshufd $tmp,$tmp2,0xE\n\t" 4890 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4891 "movdq $tmp,$src1\n\t" 4892 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4893 "movdq $dst,$tmp2\t! add reduction4L" %} 4894 ins_encode %{ 4895 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4896 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4897 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4898 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4899 __ movdq($tmp$$XMMRegister, $src1$$Register); 4900 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4901 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4902 %} 4903 ins_pipe( pipe_slow ); 4904 %} 4905 4906 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4907 predicate(UseAVX > 2); 4908 match(Set dst (AddReductionVL src1 src2)); 4909 effect(TEMP tmp, TEMP tmp2); 4910 format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" 4911 "vpaddq $tmp2,$tmp2,$src2\n\t" 4912 "vextracti128 $tmp,$tmp2\n\t" 4913 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4914 "pshufd $tmp,$tmp2,0xE\n\t" 4915 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4916 "movdq $tmp,$src1\n\t" 4917 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4918 "movdq $dst,$tmp2\t! add reduction8L" %} 4919 ins_encode %{ 4920 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4921 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4922 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4923 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4924 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4925 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4926 __ movdq($tmp$$XMMRegister, $src1$$Register); 4927 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4928 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4929 %} 4930 ins_pipe( pipe_slow ); 4931 %} 4932 #endif 4933 4934 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4935 predicate(UseSSE >= 1 && UseAVX == 0); 4936 match(Set dst (AddReductionVF dst src2)); 4937 effect(TEMP dst, TEMP tmp); 4938 format %{ "addss $dst,$src2\n\t" 4939 "pshufd $tmp,$src2,0x01\n\t" 4940 "addss $dst,$tmp\t! add reduction2F" %} 4941 ins_encode %{ 4942 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4943 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4944 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4945 %} 4946 ins_pipe( pipe_slow ); 4947 %} 4948 4949 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4950 predicate(UseAVX > 0); 4951 match(Set dst (AddReductionVF dst src2)); 4952 effect(TEMP dst, TEMP tmp); 4953 format %{ "vaddss $dst,$dst,$src2\n\t" 4954 "pshufd $tmp,$src2,0x01\n\t" 4955 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4956 ins_encode %{ 4957 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4958 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4959 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4965 predicate(UseSSE >= 1 && UseAVX == 0); 4966 match(Set dst (AddReductionVF dst src2)); 4967 effect(TEMP dst, TEMP tmp); 4968 format %{ "addss $dst,$src2\n\t" 4969 "pshufd $tmp,$src2,0x01\n\t" 4970 "addss $dst,$tmp\n\t" 4971 "pshufd $tmp,$src2,0x02\n\t" 4972 "addss $dst,$tmp\n\t" 4973 "pshufd $tmp,$src2,0x03\n\t" 4974 "addss $dst,$tmp\t! add reduction4F" %} 4975 ins_encode %{ 4976 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4977 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4978 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4979 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4980 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4981 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4982 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 4987 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4988 predicate(UseAVX > 0); 4989 match(Set dst (AddReductionVF dst src2)); 4990 effect(TEMP tmp, TEMP dst); 4991 format %{ "vaddss $dst,dst,$src2\n\t" 4992 "pshufd $tmp,$src2,0x01\n\t" 4993 "vaddss $dst,$dst,$tmp\n\t" 4994 "pshufd $tmp,$src2,0x02\n\t" 4995 "vaddss $dst,$dst,$tmp\n\t" 4996 "pshufd $tmp,$src2,0x03\n\t" 4997 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4998 ins_encode %{ 4999 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5000 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5001 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5002 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5003 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5004 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5005 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5006 %} 5007 ins_pipe( pipe_slow ); 5008 %} 5009 5010 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5011 predicate(UseAVX > 0); 5012 match(Set dst (AddReductionVF dst src2)); 5013 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5014 format %{ "vaddss $dst,$dst,$src2\n\t" 5015 "pshufd $tmp,$src2,0x01\n\t" 5016 "vaddss $dst,$dst,$tmp\n\t" 5017 "pshufd $tmp,$src2,0x02\n\t" 5018 "vaddss $dst,$dst,$tmp\n\t" 5019 "pshufd $tmp,$src2,0x03\n\t" 5020 "vaddss $dst,$dst,$tmp\n\t" 5021 "vextractf128 $tmp2,$src2\n\t" 5022 "vaddss $dst,$dst,$tmp2\n\t" 5023 "pshufd $tmp,$tmp2,0x01\n\t" 5024 "vaddss $dst,$dst,$tmp\n\t" 5025 "pshufd $tmp,$tmp2,0x02\n\t" 5026 "vaddss $dst,$dst,$tmp\n\t" 5027 "pshufd $tmp,$tmp2,0x03\n\t" 5028 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5029 ins_encode %{ 5030 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5031 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5032 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5034 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5035 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5036 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5037 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5038 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5039 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5040 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5041 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5042 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5043 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5044 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5045 %} 5046 ins_pipe( pipe_slow ); 5047 %} 5048 5049 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5050 predicate(UseAVX > 2); 5051 match(Set dst (AddReductionVF dst src2)); 5052 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5053 format %{ "vaddss $dst,$dst,$src2\n\t" 5054 "pshufd $tmp,$src2,0x01\n\t" 5055 "vaddss $dst,$dst,$tmp\n\t" 5056 "pshufd $tmp,$src2,0x02\n\t" 5057 "vaddss $dst,$dst,$tmp\n\t" 5058 "pshufd $tmp,$src2,0x03\n\t" 5059 "vaddss $dst,$dst,$tmp\n\t" 5060 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5061 "vaddss $dst,$dst,$tmp2\n\t" 5062 "pshufd $tmp,$tmp2,0x01\n\t" 5063 "vaddss $dst,$dst,$tmp\n\t" 5064 "pshufd $tmp,$tmp2,0x02\n\t" 5065 "vaddss $dst,$dst,$tmp\n\t" 5066 "pshufd $tmp,$tmp2,0x03\n\t" 5067 "vaddss $dst,$dst,$tmp\n\t" 5068 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5069 "vaddss $dst,$dst,$tmp2\n\t" 5070 "pshufd $tmp,$tmp2,0x01\n\t" 5071 "vaddss $dst,$dst,$tmp\n\t" 5072 "pshufd $tmp,$tmp2,0x02\n\t" 5073 "vaddss $dst,$dst,$tmp\n\t" 5074 "pshufd $tmp,$tmp2,0x03\n\t" 5075 "vaddss $dst,$dst,$tmp\n\t" 5076 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5077 "vaddss $dst,$dst,$tmp2\n\t" 5078 "pshufd $tmp,$tmp2,0x01\n\t" 5079 "vaddss $dst,$dst,$tmp\n\t" 5080 "pshufd $tmp,$tmp2,0x02\n\t" 5081 "vaddss $dst,$dst,$tmp\n\t" 5082 "pshufd $tmp,$tmp2,0x03\n\t" 5083 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5084 ins_encode %{ 5085 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5086 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5087 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5088 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5089 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5090 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5091 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5092 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5093 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5094 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5095 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5096 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5097 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5098 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5099 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5100 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5101 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5102 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5103 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5104 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5105 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5106 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5107 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5108 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5109 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5110 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5111 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5112 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5113 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5114 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5115 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5116 %} 5117 ins_pipe( pipe_slow ); 5118 %} 5119 5120 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5121 predicate(UseSSE >= 1 && UseAVX == 0); 5122 match(Set dst (AddReductionVD dst src2)); 5123 effect(TEMP tmp, TEMP dst); 5124 format %{ "addsd $dst,$src2\n\t" 5125 "pshufd $tmp,$src2,0xE\n\t" 5126 "addsd $dst,$tmp\t! add reduction2D" %} 5127 ins_encode %{ 5128 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5129 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5130 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5131 %} 5132 ins_pipe( pipe_slow ); 5133 %} 5134 5135 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5136 predicate(UseAVX > 0); 5137 match(Set dst (AddReductionVD dst src2)); 5138 effect(TEMP tmp, TEMP dst); 5139 format %{ "vaddsd $dst,$dst,$src2\n\t" 5140 "pshufd $tmp,$src2,0xE\n\t" 5141 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5142 ins_encode %{ 5143 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5144 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5145 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5146 %} 5147 ins_pipe( pipe_slow ); 5148 %} 5149 5150 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5151 predicate(UseAVX > 0); 5152 match(Set dst (AddReductionVD dst src2)); 5153 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5154 format %{ "vaddsd $dst,$dst,$src2\n\t" 5155 "pshufd $tmp,$src2,0xE\n\t" 5156 "vaddsd $dst,$dst,$tmp\n\t" 5157 "vextractf32x4h $tmp2,$src2, 0x1\n\t" 5158 "vaddsd $dst,$dst,$tmp2\n\t" 5159 "pshufd $tmp,$tmp2,0xE\n\t" 5160 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5161 ins_encode %{ 5162 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5163 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5164 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5165 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5166 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5167 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5168 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5169 %} 5170 ins_pipe( pipe_slow ); 5171 %} 5172 5173 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5174 predicate(UseAVX > 2); 5175 match(Set dst (AddReductionVD dst src2)); 5176 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5177 format %{ "vaddsd $dst,$dst,$src2\n\t" 5178 "pshufd $tmp,$src2,0xE\n\t" 5179 "vaddsd $dst,$dst,$tmp\n\t" 5180 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5181 "vaddsd $dst,$dst,$tmp2\n\t" 5182 "pshufd $tmp,$tmp2,0xE\n\t" 5183 "vaddsd $dst,$dst,$tmp\n\t" 5184 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5185 "vaddsd $dst,$dst,$tmp2\n\t" 5186 "pshufd $tmp,$tmp2,0xE\n\t" 5187 "vaddsd $dst,$dst,$tmp\n\t" 5188 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5189 "vaddsd $dst,$dst,$tmp2\n\t" 5190 "pshufd $tmp,$tmp2,0xE\n\t" 5191 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5192 ins_encode %{ 5193 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5194 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5195 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5196 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5197 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5198 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5199 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5200 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5201 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5202 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5203 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5204 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5205 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5206 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5207 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5208 %} 5209 ins_pipe( pipe_slow ); 5210 %} 5211 5212 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5213 predicate(UseSSE > 3 && UseAVX == 0); 5214 match(Set dst (MulReductionVI src1 src2)); 5215 effect(TEMP tmp, TEMP tmp2); 5216 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5217 "pmulld $tmp2,$src2\n\t" 5218 "movd $tmp,$src1\n\t" 5219 "pmulld $tmp2,$tmp\n\t" 5220 "movd $dst,$tmp2\t! mul reduction2I" %} 5221 ins_encode %{ 5222 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5223 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5224 __ movdl($tmp$$XMMRegister, $src1$$Register); 5225 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5226 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5227 %} 5310 "movd $dst,$tmp2\t! mul reduction8I" %} 5311 ins_encode %{ 5312 int vector_len = 0; 5313 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5314 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5315 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5316 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5317 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5318 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5319 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5320 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5321 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5322 %} 5323 ins_pipe( pipe_slow ); 5324 %} 5325 5326 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5327 predicate(UseAVX > 2); 5328 match(Set dst (MulReductionVI src1 src2)); 5329 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5330 format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" 5331 "vpmulld $tmp3,$tmp3,$src2\n\t" 5332 "vextracti128 $tmp,$tmp3\n\t" 5333 "vpmulld $tmp,$tmp,$src2\n\t" 5334 "pshufd $tmp2,$tmp,0xE\n\t" 5335 "vpmulld $tmp,$tmp,$tmp2\n\t" 5336 "pshufd $tmp2,$tmp,0x1\n\t" 5337 "vpmulld $tmp,$tmp,$tmp2\n\t" 5338 "movd $tmp2,$src1\n\t" 5339 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5340 "movd $dst,$tmp2\t! mul reduction16I" %} 5341 ins_encode %{ 5342 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5343 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5344 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5345 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5346 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5347 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5348 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5349 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5350 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5351 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5352 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5353 %} 5354 ins_pipe( pipe_slow ); 5355 %} 5356 5357 #ifdef _LP64 5358 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5359 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5360 match(Set dst (MulReductionVL src1 src2)); 5361 effect(TEMP tmp, TEMP tmp2); 5362 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5363 "vpmullq $tmp,$src2,$tmp2\n\t" 5364 "movdq $tmp2,$src1\n\t" 5365 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5366 "movdq $dst,$tmp2\t! mul reduction2L" %} 5367 ins_encode %{ 5368 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5369 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5370 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5371 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5372 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5378 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5379 match(Set dst (MulReductionVL src1 src2)); 5380 effect(TEMP tmp, TEMP tmp2); 5381 format %{ "vextracti128 $tmp,$src2\n\t" 5382 "vpmullq $tmp2,$tmp,$src2\n\t" 5383 "pshufd $tmp,$tmp2,0xE\n\t" 5384 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5385 "movdq $tmp,$src1\n\t" 5386 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5387 "movdq $dst,$tmp2\t! mul reduction4L" %} 5388 ins_encode %{ 5389 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5390 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5391 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5392 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5393 __ movdq($tmp$$XMMRegister, $src1$$Register); 5394 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5395 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5396 %} 5397 ins_pipe( pipe_slow ); 5398 %} 5399 5400 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5401 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5402 match(Set dst (MulReductionVL src1 src2)); 5403 effect(TEMP tmp, TEMP tmp2); 5404 format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" 5405 "vpmullq $tmp2,$tmp2,$src2\n\t" 5406 "vextracti128 $tmp,$tmp2\n\t" 5407 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5408 "pshufd $tmp,$tmp2,0xE\n\t" 5409 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5410 "movdq $tmp,$src1\n\t" 5411 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5412 "movdq $dst,$tmp2\t! mul reduction8L" %} 5413 ins_encode %{ 5414 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5415 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5416 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5417 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5418 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5419 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5420 __ movdq($tmp$$XMMRegister, $src1$$Register); 5421 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5422 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5423 %} 5424 ins_pipe( pipe_slow ); 5425 %} 5426 #endif 5427 5428 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5429 predicate(UseSSE >= 1 && UseAVX == 0); 5430 match(Set dst (MulReductionVF dst src2)); 5431 effect(TEMP dst, TEMP tmp); 5432 format %{ "mulss $dst,$src2\n\t" 5433 "pshufd $tmp,$src2,0x01\n\t" 5434 "mulss $dst,$tmp\t! mul reduction2F" %} 5435 ins_encode %{ 5436 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5437 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5438 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5439 %} 5440 ins_pipe( pipe_slow ); 5441 %} 5442 5443 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5444 predicate(UseAVX > 0); 5445 match(Set dst (MulReductionVF dst src2)); 5446 effect(TEMP tmp, TEMP dst); 5447 format %{ "vmulss $dst,$dst,$src2\n\t" 5448 "pshufd $tmp,$src2,0x01\n\t" 5449 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5450 ins_encode %{ 5451 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5452 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5453 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5454 %} 5455 ins_pipe( pipe_slow ); 5456 %} 5457 5458 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5459 predicate(UseSSE >= 1 && UseAVX == 0); 5460 match(Set dst (MulReductionVF dst src2)); 5461 effect(TEMP dst, TEMP tmp); 5462 format %{ "mulss $dst,$src2\n\t" 5463 "pshufd $tmp,$src2,0x01\n\t" 5464 "mulss $dst,$tmp\n\t" 5465 "pshufd $tmp,$src2,0x02\n\t" 5466 "mulss $dst,$tmp\n\t" 5467 "pshufd $tmp,$src2,0x03\n\t" 5468 "mulss $dst,$tmp\t! mul reduction4F" %} 5469 ins_encode %{ 5470 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5471 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5472 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5473 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5474 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5475 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5476 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5477 %} 5478 ins_pipe( pipe_slow ); 5479 %} 5480 5481 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5482 predicate(UseAVX > 0); 5483 match(Set dst (MulReductionVF dst src2)); 5484 effect(TEMP tmp, TEMP dst); 5485 format %{ "vmulss $dst,$dst,$src2\n\t" 5486 "pshufd $tmp,$src2,0x01\n\t" 5487 "vmulss $dst,$dst,$tmp\n\t" 5488 "pshufd $tmp,$src2,0x02\n\t" 5489 "vmulss $dst,$dst,$tmp\n\t" 5490 "pshufd $tmp,$src2,0x03\n\t" 5491 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5492 ins_encode %{ 5493 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5494 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5495 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5496 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5497 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5498 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5499 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5500 %} 5501 ins_pipe( pipe_slow ); 5502 %} 5503 5504 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5505 predicate(UseAVX > 0); 5506 match(Set dst (MulReductionVF dst src2)); 5507 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5508 format %{ "vmulss $dst,$dst,$src2\n\t" 5509 "pshufd $tmp,$src2,0x01\n\t" 5510 "vmulss $dst,$dst,$tmp\n\t" 5511 "pshufd $tmp,$src2,0x02\n\t" 5512 "vmulss $dst,$dst,$tmp\n\t" 5513 "pshufd $tmp,$src2,0x03\n\t" 5514 "vmulss $dst,$dst,$tmp\n\t" 5515 "vextractf128 $tmp2,$src2\n\t" 5516 "vmulss $dst,$dst,$tmp2\n\t" 5517 "pshufd $tmp,$tmp2,0x01\n\t" 5518 "vmulss $dst,$dst,$tmp\n\t" 5519 "pshufd $tmp,$tmp2,0x02\n\t" 5520 "vmulss $dst,$dst,$tmp\n\t" 5521 "pshufd $tmp,$tmp2,0x03\n\t" 5522 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5523 ins_encode %{ 5524 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5525 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5526 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5527 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5528 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5529 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5530 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5531 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5532 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5533 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5534 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5535 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5536 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5537 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5538 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5539 %} 5540 ins_pipe( pipe_slow ); 5541 %} 5542 5543 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5544 predicate(UseAVX > 2); 5545 match(Set dst (MulReductionVF dst src2)); 5546 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5547 format %{ "vmulss $dst,$dst,$src2\n\t" 5548 "pshufd $tmp,$src2,0x01\n\t" 5549 "vmulss $dst,$dst,$tmp\n\t" 5550 "pshufd $tmp,$src2,0x02\n\t" 5551 "vmulss $dst,$dst,$tmp\n\t" 5552 "pshufd $tmp,$src2,0x03\n\t" 5553 "vmulss $dst,$dst,$tmp\n\t" 5554 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5555 "vmulss $dst,$dst,$tmp2\n\t" 5556 "pshufd $tmp,$tmp2,0x01\n\t" 5557 "vmulss $dst,$dst,$tmp\n\t" 5558 "pshufd $tmp,$tmp2,0x02\n\t" 5559 "vmulss $dst,$dst,$tmp\n\t" 5560 "pshufd $tmp,$tmp2,0x03\n\t" 5561 "vmulss $dst,$dst,$tmp\n\t" 5562 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5563 "vmulss $dst,$dst,$tmp2\n\t" 5564 "pshufd $tmp,$tmp2,0x01\n\t" 5565 "vmulss $dst,$dst,$tmp\n\t" 5566 "pshufd $tmp,$tmp2,0x02\n\t" 5567 "vmulss $dst,$dst,$tmp\n\t" 5568 "pshufd $tmp,$tmp2,0x03\n\t" 5569 "vmulss $dst,$dst,$tmp\n\t" 5570 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5571 "vmulss $dst,$dst,$tmp2\n\t" 5572 "pshufd $tmp,$tmp2,0x01\n\t" 5573 "vmulss $dst,$dst,$tmp\n\t" 5574 "pshufd $tmp,$tmp2,0x02\n\t" 5575 "vmulss $dst,$dst,$tmp\n\t" 5576 "pshufd $tmp,$tmp2,0x03\n\t" 5577 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5578 ins_encode %{ 5579 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5580 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5581 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5582 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5583 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5584 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5585 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5586 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5587 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5588 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5589 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5590 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5591 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5592 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5593 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5594 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5595 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5596 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5597 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5598 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5599 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5600 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5601 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5602 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5603 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5604 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5605 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5606 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5607 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5608 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5609 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5610 %} 5611 ins_pipe( pipe_slow ); 5612 %} 5613 5614 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5615 predicate(UseSSE >= 1 && UseAVX == 0); 5616 match(Set dst (MulReductionVD dst src2)); 5617 effect(TEMP dst, TEMP tmp); 5618 format %{ "mulsd $dst,$src2\n\t" 5619 "pshufd $tmp,$src2,0xE\n\t" 5620 "mulsd $dst,$tmp\t! mul reduction2D" %} 5621 ins_encode %{ 5622 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5623 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5624 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5625 %} 5626 ins_pipe( pipe_slow ); 5627 %} 5628 5629 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5630 predicate(UseAVX > 0); 5631 match(Set dst (MulReductionVD dst src2)); 5632 effect(TEMP tmp, TEMP dst); 5633 format %{ "vmulsd $dst,$dst,$src2\n\t" 5634 "pshufd $tmp,$src2,0xE\n\t" 5635 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5636 ins_encode %{ 5637 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5638 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5639 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5640 %} 5641 ins_pipe( pipe_slow ); 5642 %} 5643 5644 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5645 predicate(UseAVX > 0); 5646 match(Set dst (MulReductionVD dst src2)); 5647 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5648 format %{ "vmulsd $dst,$dst,$src2\n\t" 5649 "pshufd $tmp,$src2,0xE\n\t" 5650 "vmulsd $dst,$dst,$tmp\n\t" 5651 "vextractf128 $tmp2,$src2\n\t" 5652 "vmulsd $dst,$dst,$tmp2\n\t" 5653 "pshufd $tmp,$tmp2,0xE\n\t" 5654 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5655 ins_encode %{ 5656 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5657 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5658 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5659 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5660 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5661 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5662 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5663 %} 5664 ins_pipe( pipe_slow ); 5665 %} 5666 5667 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5668 predicate(UseAVX > 2); 5669 match(Set dst (MulReductionVD dst src2)); 5670 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5671 format %{ "vmulsd $dst,$dst,$src2\n\t" 5672 "pshufd $tmp,$src2,0xE\n\t" 5673 "vmulsd $dst,$dst,$tmp\n\t" 5674 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5675 "vmulsd $dst,$dst,$tmp2\n\t" 5676 "pshufd $tmp,$src2,0xE\n\t" 5677 "vmulsd $dst,$dst,$tmp\n\t" 5678 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5679 "vmulsd $dst,$dst,$tmp2\n\t" 5680 "pshufd $tmp,$tmp2,0xE\n\t" 5681 "vmulsd $dst,$dst,$tmp\n\t" 5682 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5683 "vmulsd $dst,$dst,$tmp2\n\t" 5684 "pshufd $tmp,$tmp2,0xE\n\t" 5685 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5686 ins_encode %{ 5687 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5688 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5689 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5690 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5691 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5692 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5693 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5694 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5695 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5696 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5697 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5698 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5699 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5700 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5701 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 // ====================VECTOR ARITHMETIC======================================= 5707 5708 // --------------------------------- ADD -------------------------------------- 5709 5710 // Bytes vector add 5711 instruct vadd4B(vecS dst, vecS src) %{ 5712 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5713 match(Set dst (AddVB dst src)); 5714 format %{ "paddb $dst,$src\t! add packed4B" %} 5715 ins_encode %{ 5716 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5717 %} 5718 ins_pipe( pipe_slow ); 5719 %} 5720 5721 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5722 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5723 match(Set dst (AddVB src1 src2)); 5724 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5725 ins_encode %{ 5726 int vector_len = 0; 5727 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5728 %} 5729 ins_pipe( pipe_slow ); 5730 %} 5731 5732 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5733 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5734 match(Set dst (AddVB src1 src2)); 5735 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5736 ins_encode %{ 5737 int vector_len = 0; 5738 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5739 %} 5740 ins_pipe( pipe_slow ); 5741 %} 5742 5743 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5744 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5745 match(Set dst (AddVB dst src2)); 5746 effect(TEMP src1); 5747 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5748 ins_encode %{ 5749 int vector_len = 0; 5750 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5751 %} 5752 ins_pipe( pipe_slow ); 5753 %} 5754 5755 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5756 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5757 match(Set dst (AddVB src (LoadVector mem))); 5758 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5759 ins_encode %{ 5760 int vector_len = 0; 5761 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5762 %} 5763 ins_pipe( pipe_slow ); 5764 %} 5765 5766 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5767 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5768 match(Set dst (AddVB src (LoadVector mem))); 5769 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5770 ins_encode %{ 5771 int vector_len = 0; 5772 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5773 %} 5774 ins_pipe( pipe_slow ); 5775 %} 5776 5777 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5778 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5779 match(Set dst (AddVB dst (LoadVector mem))); 5780 effect(TEMP src); 5781 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5782 ins_encode %{ 5783 int vector_len = 0; 5784 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5785 %} 5786 ins_pipe( pipe_slow ); 5787 %} 5788 5789 instruct vadd8B(vecD dst, vecD src) %{ 5790 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5791 match(Set dst (AddVB dst src)); 5792 format %{ "paddb $dst,$src\t! add packed8B" %} 5793 ins_encode %{ 5794 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5795 %} 5796 ins_pipe( pipe_slow ); 5797 %} 5798 5799 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5800 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5801 match(Set dst (AddVB src1 src2)); 5802 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5803 ins_encode %{ 5804 int vector_len = 0; 5805 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5806 %} 5807 ins_pipe( pipe_slow ); 5808 %} 5809 5810 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5811 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5812 match(Set dst (AddVB src1 src2)); 5813 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5814 ins_encode %{ 5815 int vector_len = 0; 5816 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5817 %} 5818 ins_pipe( pipe_slow ); 5819 %} 5820 5821 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5822 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5823 match(Set dst (AddVB dst src2)); 5824 effect(TEMP src1); 5825 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5826 ins_encode %{ 5827 int vector_len = 0; 5828 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5829 %} 5830 ins_pipe( pipe_slow ); 5831 %} 5832 5833 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5834 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5835 match(Set dst (AddVB src (LoadVector mem))); 5836 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5837 ins_encode %{ 5838 int vector_len = 0; 5839 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5840 %} 5841 ins_pipe( pipe_slow ); 5842 %} 5843 5844 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5845 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5846 match(Set dst (AddVB src (LoadVector mem))); 5847 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5848 ins_encode %{ 5849 int vector_len = 0; 5850 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5851 %} 5852 ins_pipe( pipe_slow ); 5853 %} 5854 5855 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5856 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5857 match(Set dst (AddVB dst (LoadVector mem))); 5858 effect(TEMP src); 5859 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5860 ins_encode %{ 5861 int vector_len = 0; 5862 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5863 %} 5864 ins_pipe( pipe_slow ); 5865 %} 5866 5867 instruct vadd16B(vecX dst, vecX src) %{ 5868 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5869 match(Set dst (AddVB dst src)); 5870 format %{ "paddb $dst,$src\t! add packed16B" %} 5871 ins_encode %{ 5872 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5873 %} 5874 ins_pipe( pipe_slow ); 5875 %} 5876 5877 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5878 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5879 match(Set dst (AddVB src1 src2)); 5880 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5881 ins_encode %{ 5882 int vector_len = 0; 5883 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5884 %} 5885 ins_pipe( pipe_slow ); 5886 %} 5887 5888 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5889 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5890 match(Set dst (AddVB src1 src2)); 5891 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5892 ins_encode %{ 5893 int vector_len = 0; 5894 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5895 %} 5896 ins_pipe( pipe_slow ); 5897 %} 5898 5899 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5900 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5901 match(Set dst (AddVB dst src2)); 5902 effect(TEMP src1); 5903 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5904 ins_encode %{ 5905 int vector_len = 0; 5906 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5907 %} 5908 ins_pipe( pipe_slow ); 5909 %} 5910 5911 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5912 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5913 match(Set dst (AddVB src (LoadVector mem))); 5914 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5915 ins_encode %{ 5916 int vector_len = 0; 5917 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5918 %} 5919 ins_pipe( pipe_slow ); 5920 %} 5921 5922 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5923 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5924 match(Set dst (AddVB src (LoadVector mem))); 5925 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5926 ins_encode %{ 5927 int vector_len = 0; 5928 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5929 %} 5930 ins_pipe( pipe_slow ); 5931 %} 5932 5933 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5934 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5935 match(Set dst (AddVB dst (LoadVector mem))); 5936 effect(TEMP src); 5937 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5938 ins_encode %{ 5939 int vector_len = 0; 5940 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5941 %} 5942 ins_pipe( pipe_slow ); 5943 %} 5944 5945 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5946 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5947 match(Set dst (AddVB src1 src2)); 5948 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5949 ins_encode %{ 5950 int vector_len = 1; 5951 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5952 %} 5953 ins_pipe( pipe_slow ); 5954 %} 5955 5956 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5957 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5958 match(Set dst (AddVB src1 src2)); 5959 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5960 ins_encode %{ 5961 int vector_len = 1; 5962 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5968 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5969 match(Set dst (AddVB dst src2)); 5970 effect(TEMP src1); 5971 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5972 ins_encode %{ 5973 int vector_len = 1; 5974 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5975 %} 5976 ins_pipe( pipe_slow ); 5977 %} 5978 5979 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5980 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5981 match(Set dst (AddVB src (LoadVector mem))); 5982 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5983 ins_encode %{ 5984 int vector_len = 1; 5985 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5986 %} 5987 ins_pipe( pipe_slow ); 5988 %} 5989 5990 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5991 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5992 match(Set dst (AddVB src (LoadVector mem))); 5993 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5994 ins_encode %{ 5995 int vector_len = 1; 5996 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5997 %} 5998 ins_pipe( pipe_slow ); 5999 %} 6000 6001 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6002 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6003 match(Set dst (AddVB dst (LoadVector mem))); 6004 effect(TEMP src); 6005 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6006 ins_encode %{ 6007 int vector_len = 1; 6008 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6014 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6015 match(Set dst (AddVB src1 src2)); 6016 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6017 ins_encode %{ 6018 int vector_len = 2; 6019 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6020 %} 6021 ins_pipe( pipe_slow ); 6022 %} 6023 6024 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6025 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6026 match(Set dst (AddVB src (LoadVector mem))); 6027 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6028 ins_encode %{ 6029 int vector_len = 2; 6030 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6031 %} 6032 ins_pipe( pipe_slow ); 6033 %} 6034 6035 // Shorts/Chars vector add 6036 instruct vadd2S(vecS dst, vecS src) %{ 6037 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6038 match(Set dst (AddVS dst src)); 6039 format %{ "paddw $dst,$src\t! add packed2S" %} 6040 ins_encode %{ 6041 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6042 %} 6043 ins_pipe( pipe_slow ); 6044 %} 6045 6046 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6047 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6048 match(Set dst (AddVS src1 src2)); 6049 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6050 ins_encode %{ 6051 int vector_len = 0; 6052 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6053 %} 6054 ins_pipe( pipe_slow ); 6055 %} 6056 6057 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6058 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6059 match(Set dst (AddVS src1 src2)); 6060 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6061 ins_encode %{ 6062 int vector_len = 0; 6063 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6064 %} 6065 ins_pipe( pipe_slow ); 6066 %} 6067 6068 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6069 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6070 match(Set dst (AddVS dst src2)); 6071 effect(TEMP src1); 6072 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6073 ins_encode %{ 6074 int vector_len = 0; 6075 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6076 %} 6077 ins_pipe( pipe_slow ); 6078 %} 6079 6080 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6081 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6082 match(Set dst (AddVS src (LoadVector mem))); 6083 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6084 ins_encode %{ 6085 int vector_len = 0; 6086 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6087 %} 6088 ins_pipe( pipe_slow ); 6089 %} 6090 6091 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6093 match(Set dst (AddVS src (LoadVector mem))); 6094 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6095 ins_encode %{ 6096 int vector_len = 0; 6097 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6098 %} 6099 ins_pipe( pipe_slow ); 6100 %} 6101 6102 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6103 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6104 match(Set dst (AddVS dst (LoadVector mem))); 6105 effect(TEMP src); 6106 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6107 ins_encode %{ 6108 int vector_len = 0; 6109 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 instruct vadd4S(vecD dst, vecD src) %{ 6115 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6116 match(Set dst (AddVS dst src)); 6117 format %{ "paddw $dst,$src\t! add packed4S" %} 6118 ins_encode %{ 6119 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6120 %} 6121 ins_pipe( pipe_slow ); 6122 %} 6123 6124 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6125 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6126 match(Set dst (AddVS src1 src2)); 6127 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6128 ins_encode %{ 6129 int vector_len = 0; 6130 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6131 %} 6132 ins_pipe( pipe_slow ); 6133 %} 6134 6135 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6136 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6137 match(Set dst (AddVS src1 src2)); 6138 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6139 ins_encode %{ 6140 int vector_len = 0; 6141 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6142 %} 6143 ins_pipe( pipe_slow ); 6144 %} 6145 6146 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6147 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6148 match(Set dst (AddVS dst src2)); 6149 effect(TEMP src1); 6150 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6151 ins_encode %{ 6152 int vector_len = 0; 6153 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6154 %} 6155 ins_pipe( pipe_slow ); 6156 %} 6157 6158 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6159 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6160 match(Set dst (AddVS src (LoadVector mem))); 6161 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6162 ins_encode %{ 6163 int vector_len = 0; 6164 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6165 %} 6166 ins_pipe( pipe_slow ); 6167 %} 6168 6169 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6170 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6171 match(Set dst (AddVS src (LoadVector mem))); 6172 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6173 ins_encode %{ 6174 int vector_len = 0; 6175 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6176 %} 6177 ins_pipe( pipe_slow ); 6178 %} 6179 6180 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6181 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6182 match(Set dst (AddVS dst (LoadVector mem))); 6183 effect(TEMP src); 6184 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6185 ins_encode %{ 6186 int vector_len = 0; 6187 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6188 %} 6189 ins_pipe( pipe_slow ); 6190 %} 6191 6192 instruct vadd8S(vecX dst, vecX src) %{ 6193 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6194 match(Set dst (AddVS dst src)); 6195 format %{ "paddw $dst,$src\t! add packed8S" %} 6196 ins_encode %{ 6197 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6198 %} 6199 ins_pipe( pipe_slow ); 6200 %} 6201 6202 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6203 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6204 match(Set dst (AddVS src1 src2)); 6205 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6206 ins_encode %{ 6207 int vector_len = 0; 6208 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6209 %} 6210 ins_pipe( pipe_slow ); 6211 %} 6212 6213 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6214 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6215 match(Set dst (AddVS src1 src2)); 6216 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6217 ins_encode %{ 6218 int vector_len = 0; 6219 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6220 %} 6221 ins_pipe( pipe_slow ); 6222 %} 6223 6224 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6225 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6226 match(Set dst (AddVS dst src2)); 6227 effect(TEMP src1); 6228 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6229 ins_encode %{ 6230 int vector_len = 0; 6231 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6232 %} 6233 ins_pipe( pipe_slow ); 6234 %} 6235 6236 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6237 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6238 match(Set dst (AddVS src (LoadVector mem))); 6239 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6240 ins_encode %{ 6241 int vector_len = 0; 6242 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6243 %} 6244 ins_pipe( pipe_slow ); 6245 %} 6246 6247 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6248 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6249 match(Set dst (AddVS src (LoadVector mem))); 6250 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6251 ins_encode %{ 6252 int vector_len = 0; 6253 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6254 %} 6255 ins_pipe( pipe_slow ); 6256 %} 6257 6258 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6259 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6260 match(Set dst (AddVS dst (LoadVector mem))); 6261 effect(TEMP src); 6262 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6263 ins_encode %{ 6264 int vector_len = 0; 6265 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6266 %} 6267 ins_pipe( pipe_slow ); 6268 %} 6269 6270 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6271 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6272 match(Set dst (AddVS src1 src2)); 6273 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6274 ins_encode %{ 6275 int vector_len = 1; 6276 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6277 %} 6278 ins_pipe( pipe_slow ); 6279 %} 6280 6281 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6282 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6283 match(Set dst (AddVS src1 src2)); 6284 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6285 ins_encode %{ 6286 int vector_len = 1; 6287 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6288 %} 6289 ins_pipe( pipe_slow ); 6290 %} 6291 6292 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6293 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6294 match(Set dst (AddVS dst src2)); 6295 effect(TEMP src1); 6296 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6297 ins_encode %{ 6298 int vector_len = 1; 6299 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6300 %} 6301 ins_pipe( pipe_slow ); 6302 %} 6303 6304 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6305 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6306 match(Set dst (AddVS src (LoadVector mem))); 6307 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6308 ins_encode %{ 6309 int vector_len = 1; 6310 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6311 %} 6312 ins_pipe( pipe_slow ); 6313 %} 6314 6315 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6316 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6317 match(Set dst (AddVS src (LoadVector mem))); 6318 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6319 ins_encode %{ 6320 int vector_len = 1; 6321 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6322 %} 6323 ins_pipe( pipe_slow ); 6324 %} 6325 6326 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6327 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6328 match(Set dst (AddVS dst (LoadVector mem))); 6329 effect(TEMP src); 6330 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6331 ins_encode %{ 6332 int vector_len = 1; 6333 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6334 %} 6335 ins_pipe( pipe_slow ); 6336 %} 6337 6338 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6339 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6340 match(Set dst (AddVS src1 src2)); 6341 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6342 ins_encode %{ 6343 int vector_len = 2; 6344 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6345 %} 6346 ins_pipe( pipe_slow ); 6347 %} 6348 6349 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6350 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6351 match(Set dst (AddVS src (LoadVector mem))); 6352 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6353 ins_encode %{ 6354 int vector_len = 2; 6355 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 // Integers vector add 6361 instruct vadd2I(vecD dst, vecD src) %{ 6362 predicate(n->as_Vector()->length() == 2); 6363 match(Set dst (AddVI dst src)); 6364 format %{ "paddd $dst,$src\t! add packed2I" %} 6365 ins_encode %{ 6366 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6367 %} 6368 ins_pipe( pipe_slow ); 6369 %} 6370 6371 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6372 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6373 match(Set dst (AddVI src1 src2)); 6374 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6375 ins_encode %{ 6376 int vector_len = 0; 6377 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6378 %} 6379 ins_pipe( pipe_slow ); 6380 %} 6381 6382 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6383 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6384 match(Set dst (AddVI src (LoadVector mem))); 6385 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6386 ins_encode %{ 6387 int vector_len = 0; 6388 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 instruct vadd4I(vecX dst, vecX src) %{ 6394 predicate(n->as_Vector()->length() == 4); 6395 match(Set dst (AddVI dst src)); 6396 format %{ "paddd $dst,$src\t! add packed4I" %} 6397 ins_encode %{ 6398 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6399 %} 6400 ins_pipe( pipe_slow ); 6401 %} 6402 6403 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6404 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6405 match(Set dst (AddVI src1 src2)); 6406 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6407 ins_encode %{ 6408 int vector_len = 0; 6409 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6410 %} 6411 ins_pipe( pipe_slow ); 6412 %} 6413 6414 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6415 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6416 match(Set dst (AddVI src (LoadVector mem))); 6417 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6418 ins_encode %{ 6419 int vector_len = 0; 6420 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6421 %} 6422 ins_pipe( pipe_slow ); 6423 %} 6424 6425 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6426 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6427 match(Set dst (AddVI src1 src2)); 6428 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6429 ins_encode %{ 6430 int vector_len = 1; 6431 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6432 %} 6433 ins_pipe( pipe_slow ); 6434 %} 6435 6436 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6437 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6438 match(Set dst (AddVI src (LoadVector mem))); 6439 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6440 ins_encode %{ 6441 int vector_len = 1; 6442 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6443 %} 6444 ins_pipe( pipe_slow ); 6445 %} 6446 6447 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6448 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6449 match(Set dst (AddVI src1 src2)); 6450 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6451 ins_encode %{ 6452 int vector_len = 2; 6453 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6459 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6460 match(Set dst (AddVI src (LoadVector mem))); 6461 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6462 ins_encode %{ 6463 int vector_len = 2; 6464 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 // Longs vector add 6470 instruct vadd2L(vecX dst, vecX src) %{ 6471 predicate(n->as_Vector()->length() == 2); 6472 match(Set dst (AddVL dst src)); 6473 format %{ "paddq $dst,$src\t! add packed2L" %} 6474 ins_encode %{ 6475 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6476 %} 6477 ins_pipe( pipe_slow ); 6478 %} 6479 6480 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6481 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6482 match(Set dst (AddVL src1 src2)); 6483 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6484 ins_encode %{ 6485 int vector_len = 0; 6486 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6487 %} 6488 ins_pipe( pipe_slow ); 6489 %} 6490 6491 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6492 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6493 match(Set dst (AddVL src (LoadVector mem))); 6494 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6495 ins_encode %{ 6496 int vector_len = 0; 6497 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6503 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6504 match(Set dst (AddVL src1 src2)); 6505 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6506 ins_encode %{ 6507 int vector_len = 1; 6508 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6509 %} 6510 ins_pipe( pipe_slow ); 6511 %} 6512 6513 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6514 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6515 match(Set dst (AddVL src (LoadVector mem))); 6516 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6517 ins_encode %{ 6518 int vector_len = 1; 6519 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6520 %} 6521 ins_pipe( pipe_slow ); 6522 %} 6523 6524 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6525 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6526 match(Set dst (AddVL src1 src2)); 6527 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6528 ins_encode %{ 6529 int vector_len = 2; 6530 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6531 %} 6532 ins_pipe( pipe_slow ); 6533 %} 6534 6535 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6536 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6537 match(Set dst (AddVL src (LoadVector mem))); 6538 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6539 ins_encode %{ 6540 int vector_len = 2; 6541 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6542 %} 6543 ins_pipe( pipe_slow ); 6544 %} 6545 6546 // Floats vector add 6547 instruct vadd2F(vecD dst, vecD src) %{ 6548 predicate(n->as_Vector()->length() == 2); 6549 match(Set dst (AddVF dst src)); 6550 format %{ "addps $dst,$src\t! add packed2F" %} 6551 ins_encode %{ 6552 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6553 %} 6554 ins_pipe( pipe_slow ); 6555 %} 6556 6557 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6558 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6559 match(Set dst (AddVF src1 src2)); 6560 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6561 ins_encode %{ 6562 int vector_len = 0; 6563 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6564 %} 6565 ins_pipe( pipe_slow ); 6566 %} 6567 6568 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6569 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6570 match(Set dst (AddVF src (LoadVector mem))); 6571 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6572 ins_encode %{ 6573 int vector_len = 0; 6574 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 instruct vadd4F(vecX dst, vecX src) %{ 6580 predicate(n->as_Vector()->length() == 4); 6581 match(Set dst (AddVF dst src)); 6582 format %{ "addps $dst,$src\t! add packed4F" %} 6583 ins_encode %{ 6584 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6585 %} 6586 ins_pipe( pipe_slow ); 6587 %} 6588 6589 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6590 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6591 match(Set dst (AddVF src1 src2)); 6592 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6593 ins_encode %{ 6594 int vector_len = 0; 6595 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6596 %} 6597 ins_pipe( pipe_slow ); 6598 %} 6599 6600 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6601 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6602 match(Set dst (AddVF src (LoadVector mem))); 6603 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6604 ins_encode %{ 6605 int vector_len = 0; 6606 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6607 %} 6608 ins_pipe( pipe_slow ); 6609 %} 6610 6611 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6612 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6613 match(Set dst (AddVF src1 src2)); 6614 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6615 ins_encode %{ 6616 int vector_len = 1; 6617 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6618 %} 6619 ins_pipe( pipe_slow ); 6620 %} 6621 6622 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6623 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6624 match(Set dst (AddVF src (LoadVector mem))); 6625 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6626 ins_encode %{ 6627 int vector_len = 1; 6628 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6629 %} 6630 ins_pipe( pipe_slow ); 6631 %} 6632 6633 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6634 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6635 match(Set dst (AddVF src1 src2)); 6636 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6637 ins_encode %{ 6638 int vector_len = 2; 6639 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6640 %} 6641 ins_pipe( pipe_slow ); 6642 %} 6643 6644 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6645 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6646 match(Set dst (AddVF src (LoadVector mem))); 6647 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6648 ins_encode %{ 6649 int vector_len = 2; 6650 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6651 %} 6652 ins_pipe( pipe_slow ); 6653 %} 6654 6655 // Doubles vector add 6656 instruct vadd2D(vecX dst, vecX src) %{ 6657 predicate(n->as_Vector()->length() == 2); 6658 match(Set dst (AddVD dst src)); 6659 format %{ "addpd $dst,$src\t! add packed2D" %} 6660 ins_encode %{ 6661 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6662 %} 6663 ins_pipe( pipe_slow ); 6664 %} 6665 6666 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6667 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6668 match(Set dst (AddVD src1 src2)); 6669 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6670 ins_encode %{ 6671 int vector_len = 0; 6672 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6673 %} 6674 ins_pipe( pipe_slow ); 6675 %} 6676 6677 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6678 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6679 match(Set dst (AddVD src (LoadVector mem))); 6680 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6681 ins_encode %{ 6682 int vector_len = 0; 6683 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6684 %} 6685 ins_pipe( pipe_slow ); 6686 %} 6687 6688 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6689 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6690 match(Set dst (AddVD src1 src2)); 6691 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6692 ins_encode %{ 6693 int vector_len = 1; 6694 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6695 %} 6696 ins_pipe( pipe_slow ); 6697 %} 6698 6699 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6700 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6701 match(Set dst (AddVD src (LoadVector mem))); 6702 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6703 ins_encode %{ 6704 int vector_len = 1; 6705 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6706 %} 6707 ins_pipe( pipe_slow ); 6708 %} 6709 6710 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6711 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6712 match(Set dst (AddVD src1 src2)); 6713 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6714 ins_encode %{ 6715 int vector_len = 2; 6716 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6717 %} 6718 ins_pipe( pipe_slow ); 6719 %} 6720 6721 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6722 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6723 match(Set dst (AddVD src (LoadVector mem))); 6724 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6725 ins_encode %{ 6726 int vector_len = 2; 6727 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6728 %} 6729 ins_pipe( pipe_slow ); 6730 %} 6731 6732 // --------------------------------- SUB -------------------------------------- 6733 6734 // Bytes vector sub 6735 instruct vsub4B(vecS dst, vecS src) %{ 6736 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6737 match(Set dst (SubVB dst src)); 6738 format %{ "psubb $dst,$src\t! sub packed4B" %} 6739 ins_encode %{ 6740 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6741 %} 6742 ins_pipe( pipe_slow ); 6743 %} 6744 6745 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6746 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6747 match(Set dst (SubVB src1 src2)); 6748 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6749 ins_encode %{ 6750 int vector_len = 0; 6751 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6752 %} 6753 ins_pipe( pipe_slow ); 6754 %} 6755 6756 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6757 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6758 match(Set dst (SubVB src1 src2)); 6759 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6760 ins_encode %{ 6761 int vector_len = 0; 6762 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6763 %} 6764 ins_pipe( pipe_slow ); 6765 %} 6766 6767 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6768 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6769 match(Set dst (SubVB dst src2)); 6770 effect(TEMP src1); 6771 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6772 ins_encode %{ 6773 int vector_len = 0; 6774 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6775 %} 6776 ins_pipe( pipe_slow ); 6777 %} 6778 6779 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6780 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6781 match(Set dst (SubVB src (LoadVector mem))); 6782 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6783 ins_encode %{ 6784 int vector_len = 0; 6785 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6786 %} 6787 ins_pipe( pipe_slow ); 6788 %} 6789 6790 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6791 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6792 match(Set dst (SubVB src (LoadVector mem))); 6793 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6794 ins_encode %{ 6795 int vector_len = 0; 6796 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6797 %} 6798 ins_pipe( pipe_slow ); 6799 %} 6800 6801 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6802 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6803 match(Set dst (SubVB dst (LoadVector mem))); 6804 effect(TEMP src); 6805 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6806 ins_encode %{ 6807 int vector_len = 0; 6808 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6809 %} 6810 ins_pipe( pipe_slow ); 6811 %} 6812 6813 instruct vsub8B(vecD dst, vecD src) %{ 6814 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6815 match(Set dst (SubVB dst src)); 6816 format %{ "psubb $dst,$src\t! sub packed8B" %} 6817 ins_encode %{ 6818 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6819 %} 6820 ins_pipe( pipe_slow ); 6821 %} 6822 6823 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6824 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6825 match(Set dst (SubVB src1 src2)); 6826 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6827 ins_encode %{ 6828 int vector_len = 0; 6829 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6830 %} 6831 ins_pipe( pipe_slow ); 6832 %} 6833 6834 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6835 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6836 match(Set dst (SubVB src1 src2)); 6837 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6838 ins_encode %{ 6839 int vector_len = 0; 6840 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6841 %} 6842 ins_pipe( pipe_slow ); 6843 %} 6844 6845 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6846 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6847 match(Set dst (SubVB dst src2)); 6848 effect(TEMP src1); 6849 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6850 ins_encode %{ 6851 int vector_len = 0; 6852 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6853 %} 6854 ins_pipe( pipe_slow ); 6855 %} 6856 6857 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6858 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6859 match(Set dst (SubVB src (LoadVector mem))); 6860 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6861 ins_encode %{ 6862 int vector_len = 0; 6863 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6864 %} 6865 ins_pipe( pipe_slow ); 6866 %} 6867 6868 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6869 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6870 match(Set dst (SubVB src (LoadVector mem))); 6871 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6872 ins_encode %{ 6873 int vector_len = 0; 6874 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6875 %} 6876 ins_pipe( pipe_slow ); 6877 %} 6878 6879 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6880 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6881 match(Set dst (SubVB dst (LoadVector mem))); 6882 effect(TEMP src); 6883 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6884 ins_encode %{ 6885 int vector_len = 0; 6886 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6887 %} 6888 ins_pipe( pipe_slow ); 6889 %} 6890 6891 instruct vsub16B(vecX dst, vecX src) %{ 6892 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6893 match(Set dst (SubVB dst src)); 6894 format %{ "psubb $dst,$src\t! sub packed16B" %} 6895 ins_encode %{ 6896 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6897 %} 6898 ins_pipe( pipe_slow ); 6899 %} 6900 6901 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6902 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6903 match(Set dst (SubVB src1 src2)); 6904 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6905 ins_encode %{ 6906 int vector_len = 0; 6907 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6908 %} 6909 ins_pipe( pipe_slow ); 6910 %} 6911 6912 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6913 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6914 match(Set dst (SubVB src1 src2)); 6915 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6916 ins_encode %{ 6917 int vector_len = 0; 6918 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6919 %} 6920 ins_pipe( pipe_slow ); 6921 %} 6922 6923 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6924 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6925 match(Set dst (SubVB dst src2)); 6926 effect(TEMP src1); 6927 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6928 ins_encode %{ 6929 int vector_len = 0; 6930 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6931 %} 6932 ins_pipe( pipe_slow ); 6933 %} 6934 6935 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6936 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6937 match(Set dst (SubVB src (LoadVector mem))); 6938 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6939 ins_encode %{ 6940 int vector_len = 0; 6941 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6942 %} 6943 ins_pipe( pipe_slow ); 6944 %} 6945 6946 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6947 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6948 match(Set dst (SubVB src (LoadVector mem))); 6949 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6950 ins_encode %{ 6951 int vector_len = 0; 6952 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6953 %} 6954 ins_pipe( pipe_slow ); 6955 %} 6956 6957 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6958 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6959 match(Set dst (SubVB dst (LoadVector mem))); 6960 effect(TEMP src); 6961 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6962 ins_encode %{ 6963 int vector_len = 0; 6964 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6965 %} 6966 ins_pipe( pipe_slow ); 6967 %} 6968 6969 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6970 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6971 match(Set dst (SubVB src1 src2)); 6972 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6973 ins_encode %{ 6974 int vector_len = 1; 6975 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6976 %} 6977 ins_pipe( pipe_slow ); 6978 %} 6979 6980 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6981 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6982 match(Set dst (SubVB src1 src2)); 6983 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6984 ins_encode %{ 6985 int vector_len = 1; 6986 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6987 %} 6988 ins_pipe( pipe_slow ); 6989 %} 6990 6991 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6992 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6993 match(Set dst (SubVB dst src2)); 6994 effect(TEMP src1); 6995 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6996 ins_encode %{ 6997 int vector_len = 1; 6998 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6999 %} 7000 ins_pipe( pipe_slow ); 7001 %} 7002 7003 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7004 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7005 match(Set dst (SubVB src (LoadVector mem))); 7006 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7007 ins_encode %{ 7008 int vector_len = 1; 7009 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7010 %} 7011 ins_pipe( pipe_slow ); 7012 %} 7013 7014 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7015 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7016 match(Set dst (SubVB src (LoadVector mem))); 7017 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7018 ins_encode %{ 7019 int vector_len = 1; 7020 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7021 %} 7022 ins_pipe( pipe_slow ); 7023 %} 7024 7025 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7026 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7027 match(Set dst (SubVB dst (LoadVector mem))); 7028 effect(TEMP src); 7029 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7030 ins_encode %{ 7031 int vector_len = 1; 7032 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7033 %} 7034 ins_pipe( pipe_slow ); 7035 %} 7036 7037 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7038 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7039 match(Set dst (SubVB src1 src2)); 7040 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7041 ins_encode %{ 7042 int vector_len = 2; 7043 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7044 %} 7045 ins_pipe( pipe_slow ); 7046 %} 7047 7048 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7049 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7050 match(Set dst (SubVB src (LoadVector mem))); 7051 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7052 ins_encode %{ 7053 int vector_len = 2; 7054 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7055 %} 7056 ins_pipe( pipe_slow ); 7057 %} 7058 7059 // Shorts/Chars vector sub 7060 instruct vsub2S(vecS dst, vecS src) %{ 7061 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7062 match(Set dst (SubVS dst src)); 7063 format %{ "psubw $dst,$src\t! sub packed2S" %} 7064 ins_encode %{ 7065 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7066 %} 7067 ins_pipe( pipe_slow ); 7068 %} 7069 7070 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7071 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7072 match(Set dst (SubVS src1 src2)); 7073 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7074 ins_encode %{ 7075 int vector_len = 0; 7076 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7077 %} 7078 ins_pipe( pipe_slow ); 7079 %} 7080 7081 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7082 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7083 match(Set dst (SubVS src1 src2)); 7084 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7085 ins_encode %{ 7086 int vector_len = 0; 7087 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7088 %} 7089 ins_pipe( pipe_slow ); 7090 %} 7091 7092 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7093 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7094 match(Set dst (SubVS dst src2)); 7095 effect(TEMP src1); 7096 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7097 ins_encode %{ 7098 int vector_len = 0; 7099 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7100 %} 7101 ins_pipe( pipe_slow ); 7102 %} 7103 7104 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7105 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7106 match(Set dst (SubVS src (LoadVector mem))); 7107 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7108 ins_encode %{ 7109 int vector_len = 0; 7110 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7111 %} 7112 ins_pipe( pipe_slow ); 7113 %} 7114 7115 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7116 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7117 match(Set dst (SubVS src (LoadVector mem))); 7118 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7119 ins_encode %{ 7120 int vector_len = 0; 7121 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7122 %} 7123 ins_pipe( pipe_slow ); 7124 %} 7125 7126 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7127 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7128 match(Set dst (SubVS dst (LoadVector mem))); 7129 effect(TEMP src); 7130 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7131 ins_encode %{ 7132 int vector_len = 0; 7133 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7134 %} 7135 ins_pipe( pipe_slow ); 7136 %} 7137 7138 instruct vsub4S(vecD dst, vecD src) %{ 7139 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7140 match(Set dst (SubVS dst src)); 7141 format %{ "psubw $dst,$src\t! sub packed4S" %} 7142 ins_encode %{ 7143 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7144 %} 7145 ins_pipe( pipe_slow ); 7146 %} 7147 7148 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7149 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7150 match(Set dst (SubVS src1 src2)); 7151 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7152 ins_encode %{ 7153 int vector_len = 0; 7154 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7155 %} 7156 ins_pipe( pipe_slow ); 7157 %} 7158 7159 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7160 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7161 match(Set dst (SubVS src1 src2)); 7162 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7163 ins_encode %{ 7164 int vector_len = 0; 7165 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7166 %} 7167 ins_pipe( pipe_slow ); 7168 %} 7169 7170 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7171 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7172 match(Set dst (SubVS dst src2)); 7173 effect(TEMP src1); 7174 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7175 ins_encode %{ 7176 int vector_len = 0; 7177 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7178 %} 7179 ins_pipe( pipe_slow ); 7180 %} 7181 7182 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7183 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7184 match(Set dst (SubVS src (LoadVector mem))); 7185 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7186 ins_encode %{ 7187 int vector_len = 0; 7188 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7189 %} 7190 ins_pipe( pipe_slow ); 7191 %} 7192 7193 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7194 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7195 match(Set dst (SubVS src (LoadVector mem))); 7196 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7197 ins_encode %{ 7198 int vector_len = 0; 7199 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7200 %} 7201 ins_pipe( pipe_slow ); 7202 %} 7203 7204 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7205 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7206 match(Set dst (SubVS dst (LoadVector mem))); 7207 effect(TEMP src); 7208 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7209 ins_encode %{ 7210 int vector_len = 0; 7211 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7212 %} 7213 ins_pipe( pipe_slow ); 7214 %} 7215 7216 instruct vsub8S(vecX dst, vecX src) %{ 7217 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7218 match(Set dst (SubVS dst src)); 7219 format %{ "psubw $dst,$src\t! sub packed8S" %} 7220 ins_encode %{ 7221 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7222 %} 7223 ins_pipe( pipe_slow ); 7224 %} 7225 7226 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7227 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7228 match(Set dst (SubVS src1 src2)); 7229 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7230 ins_encode %{ 7231 int vector_len = 0; 7232 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7233 %} 7234 ins_pipe( pipe_slow ); 7235 %} 7236 7237 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7238 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7239 match(Set dst (SubVS src1 src2)); 7240 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7241 ins_encode %{ 7242 int vector_len = 0; 7243 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7244 %} 7245 ins_pipe( pipe_slow ); 7246 %} 7247 7248 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7249 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7250 match(Set dst (SubVS dst src2)); 7251 effect(TEMP src1); 7252 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7253 ins_encode %{ 7254 int vector_len = 0; 7255 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7256 %} 7257 ins_pipe( pipe_slow ); 7258 %} 7259 7260 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7261 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7262 match(Set dst (SubVS src (LoadVector mem))); 7263 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7264 ins_encode %{ 7265 int vector_len = 0; 7266 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7267 %} 7268 ins_pipe( pipe_slow ); 7269 %} 7270 7271 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7272 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7273 match(Set dst (SubVS src (LoadVector mem))); 7274 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7275 ins_encode %{ 7276 int vector_len = 0; 7277 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7278 %} 7279 ins_pipe( pipe_slow ); 7280 %} 7281 7282 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7283 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7284 match(Set dst (SubVS dst (LoadVector mem))); 7285 effect(TEMP src); 7286 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7287 ins_encode %{ 7288 int vector_len = 0; 7289 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7290 %} 7291 ins_pipe( pipe_slow ); 7292 %} 7293 7294 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7295 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7296 match(Set dst (SubVS src1 src2)); 7297 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7298 ins_encode %{ 7299 int vector_len = 1; 7300 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7301 %} 7302 ins_pipe( pipe_slow ); 7303 %} 7304 7305 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7306 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7307 match(Set dst (SubVS src1 src2)); 7308 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7309 ins_encode %{ 7310 int vector_len = 1; 7311 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7312 %} 7313 ins_pipe( pipe_slow ); 7314 %} 7315 7316 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7317 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7318 match(Set dst (SubVS dst src2)); 7319 effect(TEMP src1); 7320 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7321 ins_encode %{ 7322 int vector_len = 1; 7323 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7324 %} 7325 ins_pipe( pipe_slow ); 7326 %} 7327 7328 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7329 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7330 match(Set dst (SubVS src (LoadVector mem))); 7331 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7332 ins_encode %{ 7333 int vector_len = 1; 7334 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7335 %} 7336 ins_pipe( pipe_slow ); 7337 %} 7338 7339 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7340 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7341 match(Set dst (SubVS src (LoadVector mem))); 7342 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7343 ins_encode %{ 7344 int vector_len = 1; 7345 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7346 %} 7347 ins_pipe( pipe_slow ); 7348 %} 7349 7350 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7351 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7352 match(Set dst (SubVS dst (LoadVector mem))); 7353 effect(TEMP src); 7354 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7355 ins_encode %{ 7356 int vector_len = 1; 7357 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7358 %} 7359 ins_pipe( pipe_slow ); 7360 %} 7361 7362 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7363 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7364 match(Set dst (SubVS src1 src2)); 7365 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7366 ins_encode %{ 7367 int vector_len = 2; 7368 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7374 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7375 match(Set dst (SubVS src (LoadVector mem))); 7376 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7377 ins_encode %{ 7378 int vector_len = 2; 7379 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 // Integers vector sub 7385 instruct vsub2I(vecD dst, vecD src) %{ 7386 predicate(n->as_Vector()->length() == 2); 7387 match(Set dst (SubVI dst src)); 7388 format %{ "psubd $dst,$src\t! sub packed2I" %} 7389 ins_encode %{ 7390 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7703 match(Set dst (SubVD src (LoadVector mem))); 7704 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7705 ins_encode %{ 7706 int vector_len = 0; 7707 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7708 %} 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7713 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7714 match(Set dst (SubVD src1 src2)); 7715 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7716 ins_encode %{ 7717 int vector_len = 1; 7718 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7724 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7725 match(Set dst (SubVD src (LoadVector mem))); 7726 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7727 ins_encode %{ 7728 int vector_len = 1; 7729 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7735 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7736 match(Set dst (SubVD src1 src2)); 7737 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7738 ins_encode %{ 7739 int vector_len = 2; 7740 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7746 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7747 match(Set dst (SubVD src (LoadVector mem))); 7748 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7749 ins_encode %{ 7750 int vector_len = 2; 7751 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 // --------------------------------- MUL -------------------------------------- 7757 7758 // Shorts/Chars vector mul 7759 instruct vmul2S(vecS dst, vecS src) %{ 7760 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7761 match(Set dst (MulVS dst src)); 7762 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7763 ins_encode %{ 7764 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7765 %} 7766 ins_pipe( pipe_slow ); 7767 %} 7768 7769 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7770 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7771 match(Set dst (MulVS src1 src2)); 7772 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7773 ins_encode %{ 7774 int vector_len = 0; 7775 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7776 %} 7777 ins_pipe( pipe_slow ); 7778 %} 7779 7780 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7781 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7782 match(Set dst (MulVS src1 src2)); 7783 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7784 ins_encode %{ 7785 int vector_len = 0; 7786 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7787 %} 7788 ins_pipe( pipe_slow ); 7789 %} 7790 7791 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7792 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7793 match(Set dst (MulVS dst src2)); 7794 effect(TEMP src1); 7795 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7796 ins_encode %{ 7797 int vector_len = 0; 7798 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7799 %} 7800 ins_pipe( pipe_slow ); 7801 %} 7802 7803 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7804 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7805 match(Set dst (MulVS src (LoadVector mem))); 7806 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7807 ins_encode %{ 7808 int vector_len = 0; 7809 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7810 %} 7811 ins_pipe( pipe_slow ); 7812 %} 7813 7814 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7815 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7816 match(Set dst (MulVS src (LoadVector mem))); 7817 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7818 ins_encode %{ 7819 int vector_len = 0; 7820 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7821 %} 7822 ins_pipe( pipe_slow ); 7823 %} 7824 7825 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7826 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7827 match(Set dst (MulVS dst (LoadVector mem))); 7828 effect(TEMP src); 7829 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7830 ins_encode %{ 7831 int vector_len = 0; 7832 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7833 %} 7834 ins_pipe( pipe_slow ); 7835 %} 7836 7837 instruct vmul4S(vecD dst, vecD src) %{ 7838 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7839 match(Set dst (MulVS dst src)); 7840 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7841 ins_encode %{ 7842 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7843 %} 7844 ins_pipe( pipe_slow ); 7845 %} 7846 7847 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7848 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7849 match(Set dst (MulVS src1 src2)); 7850 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7851 ins_encode %{ 7852 int vector_len = 0; 7853 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7854 %} 7855 ins_pipe( pipe_slow ); 7856 %} 7857 7858 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7859 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7860 match(Set dst (MulVS src1 src2)); 7861 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7862 ins_encode %{ 7863 int vector_len = 0; 7864 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7865 %} 7866 ins_pipe( pipe_slow ); 7867 %} 7868 7869 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7870 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7871 match(Set dst (MulVS dst src2)); 7872 effect(TEMP src1); 7873 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7874 ins_encode %{ 7875 int vector_len = 0; 7876 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7877 %} 7878 ins_pipe( pipe_slow ); 7879 %} 7880 7881 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7882 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7883 match(Set dst (MulVS src (LoadVector mem))); 7884 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7885 ins_encode %{ 7886 int vector_len = 0; 7887 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7888 %} 7889 ins_pipe( pipe_slow ); 7890 %} 7891 7892 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7893 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7894 match(Set dst (MulVS src (LoadVector mem))); 7895 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7896 ins_encode %{ 7897 int vector_len = 0; 7898 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7904 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7905 match(Set dst (MulVS dst (LoadVector mem))); 7906 effect(TEMP src); 7907 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7908 ins_encode %{ 7909 int vector_len = 0; 7910 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7911 %} 7912 ins_pipe( pipe_slow ); 7913 %} 7914 7915 instruct vmul8S(vecX dst, vecX src) %{ 7916 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7917 match(Set dst (MulVS dst src)); 7918 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7919 ins_encode %{ 7920 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7921 %} 7922 ins_pipe( pipe_slow ); 7923 %} 7924 7925 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7926 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7927 match(Set dst (MulVS src1 src2)); 7928 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7929 ins_encode %{ 7930 int vector_len = 0; 7931 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7932 %} 7933 ins_pipe( pipe_slow ); 7934 %} 7935 7936 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7937 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7938 match(Set dst (MulVS src1 src2)); 7939 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7940 ins_encode %{ 7941 int vector_len = 0; 7942 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7943 %} 7944 ins_pipe( pipe_slow ); 7945 %} 7946 7947 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7948 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7949 match(Set dst (MulVS dst src2)); 7950 effect(TEMP src1); 7951 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7952 ins_encode %{ 7953 int vector_len = 0; 7954 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7955 %} 7956 ins_pipe( pipe_slow ); 7957 %} 7958 7959 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7960 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7961 match(Set dst (MulVS src (LoadVector mem))); 7962 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7963 ins_encode %{ 7964 int vector_len = 0; 7965 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7966 %} 7967 ins_pipe( pipe_slow ); 7968 %} 7969 7970 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7971 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7972 match(Set dst (MulVS src (LoadVector mem))); 7973 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7974 ins_encode %{ 7975 int vector_len = 0; 7976 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7977 %} 7978 ins_pipe( pipe_slow ); 7979 %} 7980 7981 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7982 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7983 match(Set dst (MulVS dst (LoadVector mem))); 7984 effect(TEMP src); 7985 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7986 ins_encode %{ 7987 int vector_len = 0; 7988 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7989 %} 7990 ins_pipe( pipe_slow ); 7991 %} 7992 7993 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7994 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7995 match(Set dst (MulVS src1 src2)); 7996 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7997 ins_encode %{ 7998 int vector_len = 1; 7999 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8000 %} 8001 ins_pipe( pipe_slow ); 8002 %} 8003 8004 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8005 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8006 match(Set dst (MulVS src1 src2)); 8007 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8008 ins_encode %{ 8009 int vector_len = 1; 8010 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8011 %} 8012 ins_pipe( pipe_slow ); 8013 %} 8014 8015 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8016 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8017 match(Set dst (MulVS dst src2)); 8018 effect(TEMP src1); 8019 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8020 ins_encode %{ 8021 int vector_len = 1; 8022 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8028 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8029 match(Set dst (MulVS src (LoadVector mem))); 8030 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8031 ins_encode %{ 8032 int vector_len = 1; 8033 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8034 %} 8035 ins_pipe( pipe_slow ); 8036 %} 8037 8038 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8039 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8040 match(Set dst (MulVS src (LoadVector mem))); 8041 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8042 ins_encode %{ 8043 int vector_len = 1; 8044 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8045 %} 8046 ins_pipe( pipe_slow ); 8047 %} 8048 8049 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8050 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8051 match(Set dst (MulVS dst (LoadVector mem))); 8052 effect(TEMP src); 8053 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8054 ins_encode %{ 8055 int vector_len = 1; 8056 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 8061 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8062 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8063 match(Set dst (MulVS src1 src2)); 8064 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8065 ins_encode %{ 8066 int vector_len = 2; 8067 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8068 %} 8069 ins_pipe( pipe_slow ); 8070 %} 8071 8072 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8073 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8074 match(Set dst (MulVS src (LoadVector mem))); 8075 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8076 ins_encode %{ 8077 int vector_len = 2; 8078 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 8083 // Integers vector mul (sse4_1) 8084 instruct vmul2I(vecD dst, vecD src) %{ 8085 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8086 match(Set dst (MulVI dst src)); 8087 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8088 ins_encode %{ 8089 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8090 %} 8091 ins_pipe( pipe_slow ); 8092 %} 8093 8683 %} 8684 ins_pipe( pipe_slow ); 8685 %} 8686 8687 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8688 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8689 match(Set dst (SqrtVD src)); 8690 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8691 ins_encode %{ 8692 int vector_len = 1; 8693 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8694 %} 8695 ins_pipe( pipe_slow ); 8696 %} 8697 8698 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8699 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8700 match(Set dst (SqrtVD (LoadVector mem))); 8701 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8702 ins_encode %{ 8703 int vector_len = 1; 8704 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8705 %} 8706 ins_pipe( pipe_slow ); 8707 %} 8708 8709 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8710 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8711 match(Set dst (SqrtVD src)); 8712 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8713 ins_encode %{ 8714 int vector_len = 2; 8715 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8716 %} 8717 ins_pipe( pipe_slow ); 8718 %} 8719 8720 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8721 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8722 match(Set dst (SqrtVD (LoadVector mem))); 8723 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8724 ins_encode %{ 8725 int vector_len = 2; 8726 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8727 %} 8728 ins_pipe( pipe_slow ); 8729 %} 8730 8731 // ------------------------------ LeftShift ----------------------------------- 8732 8733 // Shorts/Chars vector left shift 8734 instruct vsll2S(vecS dst, vecS shift) %{ 8735 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8736 match(Set dst (LShiftVS dst shift)); 8737 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8738 ins_encode %{ 8739 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8740 %} 8741 ins_pipe( pipe_slow ); 8742 %} 8743 8744 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8745 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8746 match(Set dst (LShiftVS dst shift)); 8747 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8748 ins_encode %{ 8749 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8750 %} 8751 ins_pipe( pipe_slow ); 8752 %} 8753 8754 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8755 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8756 match(Set dst (LShiftVS src shift)); 8757 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8758 ins_encode %{ 8759 int vector_len = 0; 8760 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8761 %} 8762 ins_pipe( pipe_slow ); 8763 %} 8764 8765 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8766 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8767 match(Set dst (LShiftVS src shift)); 8768 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8769 ins_encode %{ 8770 int vector_len = 0; 8771 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8772 %} 8773 ins_pipe( pipe_slow ); 8774 %} 8775 8776 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8777 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8778 match(Set dst (LShiftVS dst shift)); 8779 effect(TEMP src); 8780 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8781 ins_encode %{ 8782 int vector_len = 0; 8783 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8784 %} 8785 ins_pipe( pipe_slow ); 8786 %} 8787 8788 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8789 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8790 match(Set dst (LShiftVS src shift)); 8791 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8792 ins_encode %{ 8793 int vector_len = 0; 8794 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8795 %} 8796 ins_pipe( pipe_slow ); 8797 %} 8798 8799 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8800 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8801 match(Set dst (LShiftVS src shift)); 8802 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8803 ins_encode %{ 8804 int vector_len = 0; 8805 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8806 %} 8807 ins_pipe( pipe_slow ); 8808 %} 8809 8810 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8811 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8812 match(Set dst (LShiftVS dst shift)); 8813 effect(TEMP src); 8814 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8815 ins_encode %{ 8816 int vector_len = 0; 8817 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8818 %} 8819 ins_pipe( pipe_slow ); 8820 %} 8821 8822 instruct vsll4S(vecD dst, vecS shift) %{ 8823 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8824 match(Set dst (LShiftVS dst shift)); 8825 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8826 ins_encode %{ 8827 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8828 %} 8829 ins_pipe( pipe_slow ); 8830 %} 8831 8832 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8833 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8834 match(Set dst (LShiftVS dst shift)); 8835 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8836 ins_encode %{ 8837 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8838 %} 8839 ins_pipe( pipe_slow ); 8840 %} 8841 8842 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8843 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8844 match(Set dst (LShiftVS src shift)); 8845 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8846 ins_encode %{ 8847 int vector_len = 0; 8848 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8849 %} 8850 ins_pipe( pipe_slow ); 8851 %} 8852 8853 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8854 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8855 match(Set dst (LShiftVS src shift)); 8856 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8857 ins_encode %{ 8858 int vector_len = 0; 8859 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8860 %} 8861 ins_pipe( pipe_slow ); 8862 %} 8863 8864 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8865 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8866 match(Set dst (LShiftVS dst shift)); 8867 effect(TEMP src); 8868 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8869 ins_encode %{ 8870 int vector_len = 0; 8871 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8872 %} 8873 ins_pipe( pipe_slow ); 8874 %} 8875 8876 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8877 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8878 match(Set dst (LShiftVS src shift)); 8879 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8880 ins_encode %{ 8881 int vector_len = 0; 8882 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8883 %} 8884 ins_pipe( pipe_slow ); 8885 %} 8886 8887 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8888 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8889 match(Set dst (LShiftVS src shift)); 8890 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8891 ins_encode %{ 8892 int vector_len = 0; 8893 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8894 %} 8895 ins_pipe( pipe_slow ); 8896 %} 8897 8898 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8899 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8900 match(Set dst (LShiftVS dst shift)); 8901 effect(TEMP src); 8902 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8903 ins_encode %{ 8904 int vector_len = 0; 8905 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8906 %} 8907 ins_pipe( pipe_slow ); 8908 %} 8909 8910 instruct vsll8S(vecX dst, vecS shift) %{ 8911 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8912 match(Set dst (LShiftVS dst shift)); 8913 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8914 ins_encode %{ 8915 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8916 %} 8917 ins_pipe( pipe_slow ); 8918 %} 8919 8920 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8921 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8922 match(Set dst (LShiftVS dst shift)); 8923 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8924 ins_encode %{ 8925 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8926 %} 8927 ins_pipe( pipe_slow ); 8928 %} 8929 8930 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8931 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8932 match(Set dst (LShiftVS src shift)); 8933 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8934 ins_encode %{ 8935 int vector_len = 0; 8936 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8937 %} 8938 ins_pipe( pipe_slow ); 8939 %} 8940 8941 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8942 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8943 match(Set dst (LShiftVS src shift)); 8944 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8945 ins_encode %{ 8946 int vector_len = 0; 8947 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8948 %} 8949 ins_pipe( pipe_slow ); 8950 %} 8951 8952 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8953 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8954 match(Set dst (LShiftVS dst shift)); 8955 effect(TEMP src); 8956 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8957 ins_encode %{ 8958 int vector_len = 0; 8959 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8960 %} 8961 ins_pipe( pipe_slow ); 8962 %} 8963 8964 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8965 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8966 match(Set dst (LShiftVS src shift)); 8967 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8968 ins_encode %{ 8969 int vector_len = 0; 8970 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8971 %} 8972 ins_pipe( pipe_slow ); 8973 %} 8974 8975 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8976 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8977 match(Set dst (LShiftVS src shift)); 8978 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8979 ins_encode %{ 8980 int vector_len = 0; 8981 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8982 %} 8983 ins_pipe( pipe_slow ); 8984 %} 8985 8986 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8987 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8988 match(Set dst (LShiftVS dst shift)); 8989 effect(TEMP src); 8990 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8991 ins_encode %{ 8992 int vector_len = 0; 8993 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8994 %} 8995 ins_pipe( pipe_slow ); 8996 %} 8997 8998 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8999 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9000 match(Set dst (LShiftVS src shift)); 9001 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9002 ins_encode %{ 9003 int vector_len = 1; 9004 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9005 %} 9006 ins_pipe( pipe_slow ); 9007 %} 9008 9009 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9010 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9011 match(Set dst (LShiftVS src shift)); 9012 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9013 ins_encode %{ 9014 int vector_len = 1; 9015 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9016 %} 9017 ins_pipe( pipe_slow ); 9018 %} 9019 9020 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9021 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9022 match(Set dst (LShiftVS dst shift)); 9023 effect(TEMP src); 9024 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9025 ins_encode %{ 9026 int vector_len = 1; 9027 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9028 %} 9029 ins_pipe( pipe_slow ); 9030 %} 9031 9032 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9033 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9034 match(Set dst (LShiftVS src shift)); 9035 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9036 ins_encode %{ 9037 int vector_len = 1; 9038 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9039 %} 9040 ins_pipe( pipe_slow ); 9041 %} 9042 9043 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9044 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9045 match(Set dst (LShiftVS src shift)); 9046 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9047 ins_encode %{ 9048 int vector_len = 1; 9049 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9050 %} 9051 ins_pipe( pipe_slow ); 9052 %} 9053 9054 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9055 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9056 match(Set dst (LShiftVS dst shift)); 9057 effect(TEMP src); 9058 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9059 ins_encode %{ 9060 int vector_len = 1; 9061 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9062 %} 9063 ins_pipe( pipe_slow ); 9064 %} 9065 9066 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9067 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9068 match(Set dst (LShiftVS src shift)); 9069 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9070 ins_encode %{ 9071 int vector_len = 2; 9072 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9073 %} 9074 ins_pipe( pipe_slow ); 9075 %} 9076 9077 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9078 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9079 match(Set dst (LShiftVS src shift)); 9080 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9081 ins_encode %{ 9082 int vector_len = 2; 9083 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9084 %} 9085 ins_pipe( pipe_slow ); 9086 %} 9087 9088 // Integers vector left shift 9089 instruct vsll2I(vecD dst, vecS shift) %{ 9090 predicate(n->as_Vector()->length() == 2); 9091 match(Set dst (LShiftVI dst shift)); 9092 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9093 ins_encode %{ 9094 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9095 %} 9096 ins_pipe( pipe_slow ); 9097 %} 9098 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9272 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9273 match(Set dst (LShiftVL src shift)); 9274 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9275 ins_encode %{ 9276 int vector_len = 1; 9277 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9278 %} 9279 ins_pipe( pipe_slow ); 9280 %} 9281 9282 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9283 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9284 match(Set dst (LShiftVL src shift)); 9285 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9286 ins_encode %{ 9287 int vector_len = 2; 9288 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9289 %} 9290 ins_pipe( pipe_slow ); 9291 %} 9292 9293 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9294 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9295 match(Set dst (LShiftVL src shift)); 9296 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9297 ins_encode %{ 9298 int vector_len = 2; 9299 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9300 %} 9301 ins_pipe( pipe_slow ); 9302 %} 9303 9304 // ----------------------- LogicalRightShift ----------------------------------- 9305 9306 // Shorts vector logical right shift produces incorrect Java result 9307 // for negative data because java code convert short value into int with 9308 // sign extension before a shift. But char vectors are fine since chars are 9309 // unsigned values. 9310 9311 instruct vsrl2S(vecS dst, vecS shift) %{ 9312 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9313 match(Set dst (URShiftVS dst shift)); 9314 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9315 ins_encode %{ 9316 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9317 %} 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9322 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9323 match(Set dst (URShiftVS dst shift)); 9324 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9325 ins_encode %{ 9326 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9327 %} 9328 ins_pipe( pipe_slow ); 9329 %} 9330 9331 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9332 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9333 match(Set dst (URShiftVS src shift)); 9334 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9335 ins_encode %{ 9336 int vector_len = 0; 9337 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9338 %} 9339 ins_pipe( pipe_slow ); 9340 %} 9341 9342 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9343 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9344 match(Set dst (URShiftVS src shift)); 9345 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9346 ins_encode %{ 9347 int vector_len = 0; 9348 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9349 %} 9350 ins_pipe( pipe_slow ); 9351 %} 9352 9353 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9354 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9355 match(Set dst (URShiftVS dst shift)); 9356 effect(TEMP src); 9357 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9358 ins_encode %{ 9359 int vector_len = 0; 9360 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9361 %} 9362 ins_pipe( pipe_slow ); 9363 %} 9364 9365 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9366 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9367 match(Set dst (URShiftVS src shift)); 9368 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9369 ins_encode %{ 9370 int vector_len = 0; 9371 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9372 %} 9373 ins_pipe( pipe_slow ); 9374 %} 9375 9376 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9377 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9378 match(Set dst (URShiftVS src shift)); 9379 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9380 ins_encode %{ 9381 int vector_len = 0; 9382 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9383 %} 9384 ins_pipe( pipe_slow ); 9385 %} 9386 9387 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9388 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9389 match(Set dst (URShiftVS dst shift)); 9390 effect(TEMP src); 9391 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9392 ins_encode %{ 9393 int vector_len = 0; 9394 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 instruct vsrl4S(vecD dst, vecS shift) %{ 9400 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9401 match(Set dst (URShiftVS dst shift)); 9402 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9403 ins_encode %{ 9404 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9405 %} 9406 ins_pipe( pipe_slow ); 9407 %} 9408 9409 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9410 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9411 match(Set dst (URShiftVS dst shift)); 9412 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9413 ins_encode %{ 9414 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9420 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9421 match(Set dst (URShiftVS src shift)); 9422 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9423 ins_encode %{ 9424 int vector_len = 0; 9425 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9426 %} 9427 ins_pipe( pipe_slow ); 9428 %} 9429 9430 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9431 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9432 match(Set dst (URShiftVS src shift)); 9433 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9434 ins_encode %{ 9435 int vector_len = 0; 9436 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9437 %} 9438 ins_pipe( pipe_slow ); 9439 %} 9440 9441 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9442 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9443 match(Set dst (URShiftVS dst shift)); 9444 effect(TEMP src); 9445 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9446 ins_encode %{ 9447 int vector_len = 0; 9448 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9454 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9455 match(Set dst (URShiftVS src shift)); 9456 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9457 ins_encode %{ 9458 int vector_len = 0; 9459 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9460 %} 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9465 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9466 match(Set dst (URShiftVS src shift)); 9467 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9468 ins_encode %{ 9469 int vector_len = 0; 9470 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9471 %} 9472 ins_pipe( pipe_slow ); 9473 %} 9474 9475 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9476 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9477 match(Set dst (URShiftVS dst shift)); 9478 effect(TEMP src); 9479 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9480 ins_encode %{ 9481 int vector_len = 0; 9482 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9483 %} 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 instruct vsrl8S(vecX dst, vecS shift) %{ 9488 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9489 match(Set dst (URShiftVS dst shift)); 9490 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9491 ins_encode %{ 9492 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9493 %} 9494 ins_pipe( pipe_slow ); 9495 %} 9496 9497 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9498 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9499 match(Set dst (URShiftVS dst shift)); 9500 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9501 ins_encode %{ 9502 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9503 %} 9504 ins_pipe( pipe_slow ); 9505 %} 9506 9507 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9508 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9509 match(Set dst (URShiftVS src shift)); 9510 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9511 ins_encode %{ 9512 int vector_len = 0; 9513 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9514 %} 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9519 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9520 match(Set dst (URShiftVS src shift)); 9521 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9522 ins_encode %{ 9523 int vector_len = 0; 9524 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9530 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9531 match(Set dst (URShiftVS dst shift)); 9532 effect(TEMP src); 9533 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9534 ins_encode %{ 9535 int vector_len = 0; 9536 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9537 %} 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9542 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9543 match(Set dst (URShiftVS src shift)); 9544 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9545 ins_encode %{ 9546 int vector_len = 0; 9547 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9553 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9554 match(Set dst (URShiftVS src shift)); 9555 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9556 ins_encode %{ 9557 int vector_len = 0; 9558 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9559 %} 9560 ins_pipe( pipe_slow ); 9561 %} 9562 9563 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9564 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9565 match(Set dst (URShiftVS dst shift)); 9566 effect(TEMP src); 9567 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9568 ins_encode %{ 9569 int vector_len = 0; 9570 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9576 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9577 match(Set dst (URShiftVS src shift)); 9578 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9579 ins_encode %{ 9580 int vector_len = 1; 9581 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9582 %} 9583 ins_pipe( pipe_slow ); 9584 %} 9585 9586 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9587 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9588 match(Set dst (URShiftVS src shift)); 9589 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9590 ins_encode %{ 9591 int vector_len = 1; 9592 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9593 %} 9594 ins_pipe( pipe_slow ); 9595 %} 9596 9597 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9598 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9599 match(Set dst (URShiftVS dst shift)); 9600 effect(TEMP src); 9601 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9602 ins_encode %{ 9603 int vector_len = 1; 9604 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9605 %} 9606 ins_pipe( pipe_slow ); 9607 %} 9608 9609 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9610 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9611 match(Set dst (URShiftVS src shift)); 9612 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9613 ins_encode %{ 9614 int vector_len = 1; 9615 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9621 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9622 match(Set dst (URShiftVS src shift)); 9623 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9624 ins_encode %{ 9625 int vector_len = 1; 9626 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9627 %} 9628 ins_pipe( pipe_slow ); 9629 %} 9630 9631 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9632 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9633 match(Set dst (URShiftVS dst shift)); 9634 effect(TEMP src); 9635 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9636 ins_encode %{ 9637 int vector_len = 1; 9638 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9639 %} 9640 ins_pipe( pipe_slow ); 9641 %} 9642 9643 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9644 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9645 match(Set dst (URShiftVS src shift)); 9646 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9647 ins_encode %{ 9648 int vector_len = 2; 9649 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9650 %} 9651 ins_pipe( pipe_slow ); 9652 %} 9653 9654 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9655 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9656 match(Set dst (URShiftVS src shift)); 9657 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9658 ins_encode %{ 9659 int vector_len = 2; 9660 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9661 %} 9662 ins_pipe( pipe_slow ); 9663 %} 9664 9665 // Integers vector logical right shift 9666 instruct vsrl2I(vecD dst, vecS shift) %{ 9667 predicate(n->as_Vector()->length() == 2); 9668 match(Set dst (URShiftVI dst shift)); 9669 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9670 ins_encode %{ 9671 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9672 %} 9673 ins_pipe( pipe_slow ); 9674 %} 9675 9865 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9866 %} 9867 ins_pipe( pipe_slow ); 9868 %} 9869 9870 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9871 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9872 match(Set dst (URShiftVL src shift)); 9873 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9874 ins_encode %{ 9875 int vector_len = 2; 9876 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9877 %} 9878 ins_pipe( pipe_slow ); 9879 %} 9880 9881 // ------------------- ArithmeticRightShift ----------------------------------- 9882 9883 // Shorts/Chars vector arithmetic right shift 9884 instruct vsra2S(vecS dst, vecS shift) %{ 9885 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9886 match(Set dst (RShiftVS dst shift)); 9887 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9888 ins_encode %{ 9889 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9890 %} 9891 ins_pipe( pipe_slow ); 9892 %} 9893 9894 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9895 predicate(n->as_Vector()->length() == 2); 9896 match(Set dst (RShiftVS dst shift)); 9897 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9898 ins_encode %{ 9899 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9900 %} 9901 ins_pipe( pipe_slow ); 9902 %} 9903 9904 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9905 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9906 match(Set dst (RShiftVS src shift)); 9907 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9908 ins_encode %{ 9909 int vector_len = 0; 9910 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9911 %} 9912 ins_pipe( pipe_slow ); 9913 %} 9914 9915 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9916 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9917 match(Set dst (RShiftVS src shift)); 9918 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9919 ins_encode %{ 9920 int vector_len = 0; 9921 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9922 %} 9923 ins_pipe( pipe_slow ); 9924 %} 9925 9926 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9927 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9928 match(Set dst (RShiftVS dst shift)); 9929 effect(TEMP src); 9930 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9931 ins_encode %{ 9932 int vector_len = 0; 9933 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9934 %} 9935 ins_pipe( pipe_slow ); 9936 %} 9937 9938 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9939 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9940 match(Set dst (RShiftVS src shift)); 9941 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9942 ins_encode %{ 9943 int vector_len = 0; 9944 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9945 %} 9946 ins_pipe( pipe_slow ); 9947 %} 9948 9949 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9950 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9951 match(Set dst (RShiftVS src shift)); 9952 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9953 ins_encode %{ 9954 int vector_len = 0; 9955 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9956 %} 9957 ins_pipe( pipe_slow ); 9958 %} 9959 9960 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9961 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9962 match(Set dst (RShiftVS dst shift)); 9963 effect(TEMP src); 9964 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9965 ins_encode %{ 9966 int vector_len = 0; 9967 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9968 %} 9969 ins_pipe( pipe_slow ); 9970 %} 9971 9972 instruct vsra4S(vecD dst, vecS shift) %{ 9973 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9974 match(Set dst (RShiftVS dst shift)); 9975 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9976 ins_encode %{ 9977 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9978 %} 9979 ins_pipe( pipe_slow ); 9980 %} 9981 9982 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9983 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9984 match(Set dst (RShiftVS dst shift)); 9985 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9986 ins_encode %{ 9987 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9988 %} 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9993 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9994 match(Set dst (RShiftVS src shift)); 9995 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9996 ins_encode %{ 9997 int vector_len = 0; 9998 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9999 %} 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10004 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10005 match(Set dst (RShiftVS src shift)); 10006 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10007 ins_encode %{ 10008 int vector_len = 0; 10009 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10010 %} 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10015 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10016 match(Set dst (RShiftVS dst shift)); 10017 effect(TEMP src); 10018 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10019 ins_encode %{ 10020 int vector_len = 0; 10021 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10022 %} 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10027 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10028 match(Set dst (RShiftVS src shift)); 10029 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10030 ins_encode %{ 10031 int vector_len = 0; 10032 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10033 %} 10034 ins_pipe( pipe_slow ); 10035 %} 10036 10037 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10038 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10039 match(Set dst (RShiftVS src shift)); 10040 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10041 ins_encode %{ 10042 int vector_len = 0; 10043 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10044 %} 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10049 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10050 match(Set dst (RShiftVS dst shift)); 10051 effect(TEMP src); 10052 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10053 ins_encode %{ 10054 int vector_len = 0; 10055 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10056 %} 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 instruct vsra8S(vecX dst, vecS shift) %{ 10061 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10062 match(Set dst (RShiftVS dst shift)); 10063 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10064 ins_encode %{ 10065 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10066 %} 10067 ins_pipe( pipe_slow ); 10068 %} 10069 10070 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10071 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10072 match(Set dst (RShiftVS dst shift)); 10073 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10074 ins_encode %{ 10075 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10076 %} 10077 ins_pipe( pipe_slow ); 10078 %} 10079 10080 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10081 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10082 match(Set dst (RShiftVS src shift)); 10083 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10084 ins_encode %{ 10085 int vector_len = 0; 10086 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10087 %} 10088 ins_pipe( pipe_slow ); 10089 %} 10090 10091 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10093 match(Set dst (RShiftVS src shift)); 10094 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10095 ins_encode %{ 10096 int vector_len = 0; 10097 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10098 %} 10099 ins_pipe( pipe_slow ); 10100 %} 10101 10102 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10103 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10104 match(Set dst (RShiftVS dst shift)); 10105 effect(TEMP src); 10106 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10107 ins_encode %{ 10108 int vector_len = 0; 10109 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10110 %} 10111 ins_pipe( pipe_slow ); 10112 %} 10113 10114 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10115 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10116 match(Set dst (RShiftVS src shift)); 10117 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10118 ins_encode %{ 10119 int vector_len = 0; 10120 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10121 %} 10122 ins_pipe( pipe_slow ); 10123 %} 10124 10125 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10126 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10127 match(Set dst (RShiftVS src shift)); 10128 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10129 ins_encode %{ 10130 int vector_len = 0; 10131 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10132 %} 10133 ins_pipe( pipe_slow ); 10134 %} 10135 10136 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10137 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10138 match(Set dst (RShiftVS dst shift)); 10139 effect(TEMP src); 10140 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10141 ins_encode %{ 10142 int vector_len = 0; 10143 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10144 %} 10145 ins_pipe( pipe_slow ); 10146 %} 10147 10148 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10149 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10150 match(Set dst (RShiftVS src shift)); 10151 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10152 ins_encode %{ 10153 int vector_len = 1; 10154 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10155 %} 10156 ins_pipe( pipe_slow ); 10157 %} 10158 10159 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10160 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10161 match(Set dst (RShiftVS src shift)); 10162 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10163 ins_encode %{ 10164 int vector_len = 1; 10165 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10166 %} 10167 ins_pipe( pipe_slow ); 10168 %} 10169 10170 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10171 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10172 match(Set dst (RShiftVS dst shift)); 10173 effect(TEMP src); 10174 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10175 ins_encode %{ 10176 int vector_len = 1; 10177 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10178 %} 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10183 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10184 match(Set dst (RShiftVS src shift)); 10185 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10186 ins_encode %{ 10187 int vector_len = 1; 10188 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10189 %} 10190 ins_pipe( pipe_slow ); 10191 %} 10192 10193 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10194 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10195 match(Set dst (RShiftVS src shift)); 10196 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10197 ins_encode %{ 10198 int vector_len = 1; 10199 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10200 %} 10201 ins_pipe( pipe_slow ); 10202 %} 10203 10204 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10205 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10206 match(Set dst (RShiftVS dst shift)); 10207 effect(TEMP src); 10208 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10209 ins_encode %{ 10210 int vector_len = 1; 10211 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10212 %} 10213 ins_pipe( pipe_slow ); 10214 %} 10215 10216 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10217 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10218 match(Set dst (RShiftVS src shift)); 10219 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10220 ins_encode %{ 10221 int vector_len = 2; 10222 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10223 %} 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10228 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10229 match(Set dst (RShiftVS src shift)); 10230 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10231 ins_encode %{ 10232 int vector_len = 2; 10233 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10234 %} 10235 ins_pipe( pipe_slow ); 10236 %} 10237 10238 // Integers vector arithmetic right shift 10239 instruct vsra2I(vecD dst, vecS shift) %{ 10240 predicate(n->as_Vector()->length() == 2); 10241 match(Set dst (RShiftVI dst shift)); 10242 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10243 ins_encode %{ 10244 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10245 %} 10246 ins_pipe( pipe_slow ); 10247 %} 10248 |