--- old/src/hotspot/cpu/x86/x86.ad 2019-01-28 14:04:52.966283337 +0530 +++ new/src/hotspot/cpu/x86/x86.ad 2019-01-28 14:04:52.790283333 +0530 @@ -1450,6 +1450,13 @@ if (UseSSE < 2) ret_value = false; break; + case Op_MaxD: + case Op_MaxF: + case Op_MinD: + case Op_MinF: + if (UseAVX < 1) // enabled for AVX only + ret_value = false; + break; } return ret_value; // Per default match rules are supported. @@ -2840,6 +2847,112 @@ %} ins_pipe( pipe_slow ); %} + +// Following pseudo code describes the algorithm for max[FD]/min[FD]: +// if ( b < 0 ) +// swap(a, b) +// Tmp = Max_Float( a , b) +// Mask = a == NaN ? 1 : 0 +// Res = Mask ? a : Tmp +instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF mask) %{ + predicate(UseAVX > 0); + match(Set dst (MaxF a b)); + effect(USE a, USE b, TEMP tmp, TEMP mask); + format %{ + "blendvps $tmp,$b,$a,$b \n\t" + "blendvps $a,$a,$b,$b \n\t" + "movaps $b,$tmp \n\t" + "vmaxps $tmp,$a,$b \n\t" + "cmpps.unordered $mask, $a, $a \n\t" + "blendvps $dst,$tmp,$a,$mask \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvps($tmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); + __ blendvps($a$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); + __ movflt($b$$XMMRegister , $tmp$$XMMRegister); + __ vmaxps($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); + __ cmpps($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); + __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// max = java.lang.Max(double a , double b) +instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD mask) %{ + predicate(UseAVX > 0); + match(Set dst (MaxD a b)); + effect(USE a, USE b, TEMP tmp, TEMP mask); + format %{ + "blendvpd $tmp,$b,$a,$b \n\t" + "blendvpd $a,$a,$b,$b \n\t" + "movapd $b,$tmp \n\t" + "vmaxpd $tmp,$a,$b \n\t" + "cmppd.unordered $mask, $a, $a \n\t" + "blendvpd $dst,$tmp,$a,$mask \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvpd($tmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); + __ blendvpd($a$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); + __ movdbl($b$$XMMRegister , $tmp$$XMMRegister); + __ vmaxpd($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); + __ cmppd($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); + __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + + +// min = java.lang.Min(float a , float b) +instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF mask) %{ + predicate(UseAVX > 0); + match(Set dst (MinF a b)); + effect(USE a, USE b, TEMP tmp, TEMP mask, DEF dst); + format %{ + "blendvps $tmp,$a,$b,$a \n\t" + "blendvps $b,$b,$a,$a \n\t" + "movaps $a,$tmp \n\t" + "vminps $tmp,$a,$b \n\t" + "cmpps.unordered $mask, $a, $a \n\t" + "blendvps $dst,$tmp,$a,$mask \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvps($tmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); + __ blendvps($b$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); + __ movflt($a$$XMMRegister , $tmp$$XMMRegister); + __ vminps($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); + __ cmpps($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); + __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// min = java.lang.Min(double a , double b) +instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD mask) %{ + predicate(UseAVX > 0); + match(Set dst (MinD a b)); + effect(USE a, USE b, TEMP tmp, TEMP mask, DEF dst); + format %{ + "blendvpd $tmp,$a,$b,$a \n\t" + "blendvpd $b,$b,$a,$a \n\t" + "movapd $a,$tmp \n\t" + "vminpd $tmp,$a,$b \n\t" + "cmppd.unordered $mask, $a, $a \n\t" + "blendvpd $dst,$tmp,$a,$mask \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvpd($tmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); + __ blendvpd($b$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); + __ movdbl($a$$XMMRegister , $tmp$$XMMRegister); + __ vminpd($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); + __ cmppd($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); + __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} // ====================VECTOR INSTRUCTIONS=====================================