1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 } 1454 1455 return ret_value; // Per default match rules are supported. 1456 } 1457 1458 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1459 // identify extra cases that we might want to provide match rules for 1460 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1461 bool ret_value = match_rule_supported(opcode); 1462 if (ret_value) { 1463 switch (opcode) { 1464 case Op_AddVB: 1465 case Op_SubVB: 1466 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1467 ret_value = false; 1468 break; 1469 case Op_URShiftVS: 1470 case Op_RShiftVS: 1471 case Op_LShiftVS: 1472 case Op_MulVS: 2820 // a * b + c 2821 instruct fmaD_reg(regD a, regD b, regD c) %{ 2822 predicate(UseFMA); 2823 match(Set c (FmaD c (Binary a b))); 2824 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2825 ins_cost(150); 2826 ins_encode %{ 2827 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2828 %} 2829 ins_pipe( pipe_slow ); 2830 %} 2831 2832 // a * b + c 2833 instruct fmaF_reg(regF a, regF b, regF c) %{ 2834 predicate(UseFMA); 2835 match(Set c (FmaF c (Binary a b))); 2836 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2837 ins_cost(150); 2838 ins_encode %{ 2839 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2840 %} 2841 ins_pipe( pipe_slow ); 2842 %} 2843 2844 // ====================VECTOR INSTRUCTIONS===================================== 2845 2846 2847 // Load vectors (4 bytes long) 2848 instruct loadV4(vecS dst, memory mem) %{ 2849 predicate(n->as_LoadVector()->memory_size() == 4); 2850 match(Set dst (LoadVector mem)); 2851 ins_cost(125); 2852 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2853 ins_encode %{ 2854 __ movdl($dst$$XMMRegister, $mem$$Address); 2855 %} 2856 ins_pipe( pipe_slow ); 2857 %} 2858 2859 // Load vectors (4 bytes long) | 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 case Op_MaxD: 1454 case Op_MaxF: 1455 case Op_MinD: 1456 case Op_MinF: 1457 if (UseAVX < 1) // enabled for AVX only 1458 ret_value = false; 1459 break; 1460 } 1461 1462 return ret_value; // Per default match rules are supported. 1463 } 1464 1465 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1466 // identify extra cases that we might want to provide match rules for 1467 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1468 bool ret_value = match_rule_supported(opcode); 1469 if (ret_value) { 1470 switch (opcode) { 1471 case Op_AddVB: 1472 case Op_SubVB: 1473 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1474 ret_value = false; 1475 break; 1476 case Op_URShiftVS: 1477 case Op_RShiftVS: 1478 case Op_LShiftVS: 1479 case Op_MulVS: 2827 // a * b + c 2828 instruct fmaD_reg(regD a, regD b, regD c) %{ 2829 predicate(UseFMA); 2830 match(Set c (FmaD c (Binary a b))); 2831 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2832 ins_cost(150); 2833 ins_encode %{ 2834 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2835 %} 2836 ins_pipe( pipe_slow ); 2837 %} 2838 2839 // a * b + c 2840 instruct fmaF_reg(regF a, regF b, regF c) %{ 2841 predicate(UseFMA); 2842 match(Set c (FmaF c (Binary a b))); 2843 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2844 ins_cost(150); 2845 ins_encode %{ 2846 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2847 %} 2848 ins_pipe( pipe_slow ); 2849 %} 2850 2851 // Following pseudo code describes the algorithm for max[FD]/min[FD]: 2852 // if ( b < 0 ) 2853 // swap(a, b) 2854 // Tmp = Max_Float( a , b) 2855 // Mask = a == NaN ? 1 : 0 2856 // Res = Mask ? a : Tmp 2857 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF mask) %{ 2858 predicate(UseAVX > 0); 2859 match(Set dst (MaxF a b)); 2860 effect(USE a, USE b, TEMP tmp, TEMP mask); 2861 format %{ 2862 "blendvps $tmp,$b,$a,$b \n\t" 2863 "blendvps $a,$a,$b,$b \n\t" 2864 "movaps $b,$tmp \n\t" 2865 "vmaxps $tmp,$a,$b \n\t" 2866 "cmpps.unordered $mask, $a, $a \n\t" 2867 "blendvps $dst,$tmp,$a,$mask \n\t" 2868 %} 2869 ins_encode %{ 2870 int vector_len = 0; 2871 __ blendvps($tmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); 2872 __ blendvps($a$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); 2873 __ movflt($b$$XMMRegister , $tmp$$XMMRegister); 2874 __ vmaxps($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); 2875 __ cmpps($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); 2876 __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); 2877 %} 2878 ins_pipe( pipe_slow ); 2879 %} 2880 2881 // max = java.lang.Max(double a , double b) 2882 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD mask) %{ 2883 predicate(UseAVX > 0); 2884 match(Set dst (MaxD a b)); 2885 effect(USE a, USE b, TEMP tmp, TEMP mask); 2886 format %{ 2887 "blendvpd $tmp,$b,$a,$b \n\t" 2888 "blendvpd $a,$a,$b,$b \n\t" 2889 "movapd $b,$tmp \n\t" 2890 "vmaxpd $tmp,$a,$b \n\t" 2891 "cmppd.unordered $mask, $a, $a \n\t" 2892 "blendvpd $dst,$tmp,$a,$mask \n\t" 2893 %} 2894 ins_encode %{ 2895 int vector_len = 0; 2896 __ blendvpd($tmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); 2897 __ blendvpd($a$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); 2898 __ movdbl($b$$XMMRegister , $tmp$$XMMRegister); 2899 __ vmaxpd($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); 2900 __ cmppd($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); 2901 __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); 2902 %} 2903 ins_pipe( pipe_slow ); 2904 %} 2905 2906 2907 // min = java.lang.Min(float a , float b) 2908 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF mask) %{ 2909 predicate(UseAVX > 0); 2910 match(Set dst (MinF a b)); 2911 effect(USE a, USE b, TEMP tmp, TEMP mask, DEF dst); 2912 format %{ 2913 "blendvps $tmp,$a,$b,$a \n\t" 2914 "blendvps $b,$b,$a,$a \n\t" 2915 "movaps $a,$tmp \n\t" 2916 "vminps $tmp,$a,$b \n\t" 2917 "cmpps.unordered $mask, $a, $a \n\t" 2918 "blendvps $dst,$tmp,$a,$mask \n\t" 2919 %} 2920 ins_encode %{ 2921 int vector_len = 0; 2922 __ blendvps($tmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); 2923 __ blendvps($b$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); 2924 __ movflt($a$$XMMRegister , $tmp$$XMMRegister); 2925 __ vminps($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); 2926 __ cmpps($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); 2927 __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); 2928 %} 2929 ins_pipe( pipe_slow ); 2930 %} 2931 2932 // min = java.lang.Min(double a , double b) 2933 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD mask) %{ 2934 predicate(UseAVX > 0); 2935 match(Set dst (MinD a b)); 2936 effect(USE a, USE b, TEMP tmp, TEMP mask, DEF dst); 2937 format %{ 2938 "blendvpd $tmp,$a,$b,$a \n\t" 2939 "blendvpd $b,$b,$a,$a \n\t" 2940 "movapd $a,$tmp \n\t" 2941 "vminpd $tmp,$a,$b \n\t" 2942 "cmppd.unordered $mask, $a, $a \n\t" 2943 "blendvpd $dst,$tmp,$a,$mask \n\t" 2944 %} 2945 ins_encode %{ 2946 int vector_len = 0; 2947 __ blendvpd($tmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); 2948 __ blendvpd($b$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); 2949 __ movdbl($a$$XMMRegister , $tmp$$XMMRegister); 2950 __ vminpd($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); 2951 __ cmppd($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); 2952 __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); 2953 %} 2954 ins_pipe( pipe_slow ); 2955 %} 2956 2957 // ====================VECTOR INSTRUCTIONS===================================== 2958 2959 2960 // Load vectors (4 bytes long) 2961 instruct loadV4(vecS dst, memory mem) %{ 2962 predicate(n->as_LoadVector()->memory_size() == 4); 2963 match(Set dst (LoadVector mem)); 2964 ins_cost(125); 2965 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2966 ins_encode %{ 2967 __ movdl($dst$$XMMRegister, $mem$$Address); 2968 %} 2969 ins_pipe( pipe_slow ); 2970 %} 2971 2972 // Load vectors (4 bytes long) |