1429 if (!VM_Version::supports_cx8()) 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 } 1450 1451 return ret_value; // Per default match rules are supported. 1452 } 1453 1454 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1455 // identify extra cases that we might want to provide match rules for 1456 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1457 bool ret_value = match_rule_supported(opcode); 1458 if (ret_value) { 1459 switch (opcode) { 1460 case Op_AddVB: 1461 case Op_SubVB: 1462 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1463 ret_value = false; 1464 break; 1465 case Op_URShiftVS: 1466 case Op_RShiftVS: 1467 case Op_LShiftVS: 1468 case Op_MulVS: 9834 predicate(UseFMA && n->as_Vector()->length() == 16); 9835 match(Set c (FmaVF c (Binary a b))); 9836 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9837 ins_cost(150); 9838 ins_encode %{ 9839 int vector_len = 2; 9840 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9841 %} 9842 ins_pipe( pipe_slow ); 9843 %} 9844 9845 // a * b + c 9846 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9847 predicate(UseFMA && n->as_Vector()->length() == 16); 9848 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9849 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9850 ins_cost(150); 9851 ins_encode %{ 9852 int vector_len = 2; 9853 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9854 %} 9855 ins_pipe( pipe_slow ); 9856 %} 9857 9858 // --------------------------------- PopCount -------------------------------------- 9859 9860 instruct vpopcount2I(vecD dst, vecD src) %{ 9861 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9862 match(Set dst (PopCountVI src)); 9863 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9864 ins_encode %{ 9865 int vector_len = 0; 9866 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9867 %} 9868 ins_pipe( pipe_slow ); 9869 %} 9870 9871 instruct vpopcount4I(vecX dst, vecX src) %{ 9872 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9873 match(Set dst (PopCountVI src)); | 1429 if (!VM_Version::supports_cx8()) 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 } 1454 1455 return ret_value; // Per default match rules are supported. 1456 } 1457 1458 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1459 // identify extra cases that we might want to provide match rules for 1460 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1461 bool ret_value = match_rule_supported(opcode); 1462 if (ret_value) { 1463 switch (opcode) { 1464 case Op_AddVB: 1465 case Op_SubVB: 1466 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1467 ret_value = false; 1468 break; 1469 case Op_URShiftVS: 1470 case Op_RShiftVS: 1471 case Op_LShiftVS: 1472 case Op_MulVS: 9838 predicate(UseFMA && n->as_Vector()->length() == 16); 9839 match(Set c (FmaVF c (Binary a b))); 9840 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9841 ins_cost(150); 9842 ins_encode %{ 9843 int vector_len = 2; 9844 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9845 %} 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 // a * b + c 9850 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9851 predicate(UseFMA && n->as_Vector()->length() == 16); 9852 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9853 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9854 ins_cost(150); 9855 ins_encode %{ 9856 int vector_len = 2; 9857 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9858 %} 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 // --------------------------------- Vector Multiply Add -------------------------------------- 9863 9864 instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ 9865 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); 9866 match(Set dst (MulAddVS2VI dst src1)); 9867 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} 9868 ins_encode %{ 9869 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9875 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9876 match(Set dst (MulAddVS2VI src1 src2)); 9877 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} 9878 ins_encode %{ 9879 int vector_len = 0; 9880 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9881 %} 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ 9886 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); 9887 match(Set dst (MulAddVS2VI dst src1)); 9888 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} 9889 ins_encode %{ 9890 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9891 %} 9892 ins_pipe( pipe_slow ); 9893 %} 9894 9895 instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9896 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9897 match(Set dst (MulAddVS2VI src1 src2)); 9898 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} 9899 ins_encode %{ 9900 int vector_len = 0; 9901 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9902 %} 9903 ins_pipe( pipe_slow ); 9904 %} 9905 9906 instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9907 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9908 match(Set dst (MulAddVS2VI src1 src2)); 9909 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} 9910 ins_encode %{ 9911 int vector_len = 1; 9912 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9913 %} 9914 ins_pipe( pipe_slow ); 9915 %} 9916 9917 instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9918 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9919 match(Set dst (MulAddVS2VI src1 src2)); 9920 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} 9921 ins_encode %{ 9922 int vector_len = 2; 9923 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9924 %} 9925 ins_pipe( pipe_slow ); 9926 %} 9927 9928 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9929 9930 instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9931 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); 9932 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9933 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} 9934 ins_encode %{ 9935 int vector_len = 0; 9936 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9937 %} 9938 ins_pipe( pipe_slow ); 9939 %} 9940 9941 instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9942 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); 9943 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9944 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} 9945 ins_encode %{ 9946 int vector_len = 0; 9947 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9948 %} 9949 ins_pipe( pipe_slow ); 9950 %} 9951 9952 instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9953 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); 9954 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9955 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} 9956 ins_encode %{ 9957 int vector_len = 1; 9958 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9959 %} 9960 ins_pipe( pipe_slow ); 9961 %} 9962 9963 instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9964 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); 9965 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9966 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} 9967 ins_encode %{ 9968 int vector_len = 2; 9969 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9970 %} 9971 ins_pipe( pipe_slow ); 9972 %} 9973 9974 // --------------------------------- PopCount -------------------------------------- 9975 9976 instruct vpopcount2I(vecD dst, vecD src) %{ 9977 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9978 match(Set dst (PopCountVI src)); 9979 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9980 ins_encode %{ 9981 int vector_len = 0; 9982 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9983 %} 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 instruct vpopcount4I(vecX dst, vecX src) %{ 9988 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9989 match(Set dst (PopCountVI src)); |