--- old/src/hotspot/cpu/x86/x86.ad 2019-02-07 17:43:07.287095630 +0530 +++ new/src/hotspot/cpu/x86/x86.ad 2019-02-07 17:43:07.095095626 +0530 @@ -1450,6 +1450,13 @@ if (UseSSE < 2) ret_value = false; break; + case Op_MaxD: + case Op_MaxF: + case Op_MinD: + case Op_MinF: + if (UseAVX < 1) // enabled for AVX only + ret_value = false; + break; } return ret_value; // Per default match rules are supported. @@ -2838,6 +2845,105 @@ %} ins_pipe( pipe_slow ); %} + +// Following pseudo code describes the algorithm for max[FD]: +// Min algorithm is on similar lines +// btmp = (b < 0) ? a : b +// atmp = (b < 0) ? b : a +// Tmp = Max_Float( atmp , btmp) +// Res = (atmp == NaN) ? atmp : Tmp +instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ + predicate(UseAVX > 0); + match(Set dst (MaxF a b)); + effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); + format %{ + "blendvps $btmp,$b,$a,$b \n\t" + "blendvps $atmp,$a,$b,$b \n\t" + "vmaxps $tmp,$atmp,$btmp \n\t" + "cmpps.unordered $btmp, $atmp, $atmp \n\t" + "blendvps $dst,$tmp,$atmp,$btmp \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); + __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); + __ vmaxps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); + __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, 0x3, vector_len); + __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + + +// max = java.lang.Max(double a , double b) +instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ + predicate(UseAVX > 0); + match(Set dst (MaxD a b)); + effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); + format %{ + "blendvpd $btmp,$b,$a,$b \n\t" + "blendvpd $atmp,$a,$b,$b \n\t" + "vmaxpd $tmp,$atmp,$btmp \n\t" + "cmppd.unordered $btmp, $atmp, $atmp \n\t" + "blendvpd $dst,$tmp,$atmp,$btmp \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); + __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); + __ vmaxpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); + __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, 0x3, vector_len); + __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + + +// min = java.lang.Min(float a , float b) +instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ + predicate(UseAVX > 0); + match(Set dst (MinF a b)); + effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); + format %{ + "blendvps $atmp,$a,$b,$a \n\t" + "blendvps $btmp,$b,$a,$a \n\t" + "vminps $tmp,$atmp,$btmp \n\t" + "cmpps.unordered $btmp, $atmp, $atmp \n\t" + "blendvps $dst,$tmp,$atmp,$btmp \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); + __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); + __ vminps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); + __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, 0x3, vector_len); + __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// min = java.lang.Min(double a , double b) +instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ + predicate(UseAVX > 0); + match(Set dst (MinD a b)); + effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); + format %{ + "blendvpd $atmp,$a,$b,$a \n\t" + "blendvpd $btmp,$b,$a,$a \n\t" + "vminpd $tmp,$atmp,$btmp \n\t" + "cmppd.unordered $btmp, $atmp, $atmp \n\t" + "blendvpd $dst,$tmp,$atmp,$btmp \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); + __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); + __ vminpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); + __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, 0x3, vector_len); + __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} // ====================VECTOR INSTRUCTIONS=====================================