--- old/src/hotspot/cpu/x86/assembler_x86.cpp 2019-01-28 14:04:52.138283318 +0530 +++ new/src/hotspot/cpu/x86/assembler_x86.cpp 2019-01-28 14:04:51.962283313 +0530 @@ -7775,6 +7775,40 @@ emit_int8((unsigned char)(0xF & cop)); } +void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5F); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_rex_vex_w_reverted(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5F); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5D); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_rex_vex_w_reverted(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5D); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(!VM_Version::supports_evex(), ""); --- old/src/hotspot/cpu/x86/assembler_x86.hpp 2019-01-28 14:04:52.494283326 +0530 +++ new/src/hotspot/cpu/x86/assembler_x86.hpp 2019-01-28 14:04:52.318283322 +0530 @@ -1934,6 +1934,11 @@ void vsubss(XMMRegister dst, XMMRegister nds, Address src); void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vminps(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void shlxl(Register dst, Register src1, Register src2); void shlxq(Register dst, Register src1, Register src2); --- old/src/hotspot/cpu/x86/x86.ad 2019-01-28 14:04:52.966283337 +0530 +++ new/src/hotspot/cpu/x86/x86.ad 2019-01-28 14:04:52.790283333 +0530 @@ -1450,6 +1450,13 @@ if (UseSSE < 2) ret_value = false; break; + case Op_MaxD: + case Op_MaxF: + case Op_MinD: + case Op_MinF: + if (UseAVX < 1) // enabled for AVX only + ret_value = false; + break; } return ret_value; // Per default match rules are supported. @@ -2840,6 +2847,112 @@ %} ins_pipe( pipe_slow ); %} + +// Following pseudo code describes the algorithm for max[FD]/min[FD]: +// if ( b < 0 ) +// swap(a, b) +// Tmp = Max_Float( a , b) +// Mask = a == NaN ? 1 : 0 +// Res = Mask ? a : Tmp +instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF mask) %{ + predicate(UseAVX > 0); + match(Set dst (MaxF a b)); + effect(USE a, USE b, TEMP tmp, TEMP mask); + format %{ + "blendvps $tmp,$b,$a,$b \n\t" + "blendvps $a,$a,$b,$b \n\t" + "movaps $b,$tmp \n\t" + "vmaxps $tmp,$a,$b \n\t" + "cmpps.unordered $mask, $a, $a \n\t" + "blendvps $dst,$tmp,$a,$mask \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvps($tmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); + __ blendvps($a$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); + __ movflt($b$$XMMRegister , $tmp$$XMMRegister); + __ vmaxps($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); + __ cmpps($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); + __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// max = java.lang.Max(double a , double b) +instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD mask) %{ + predicate(UseAVX > 0); + match(Set dst (MaxD a b)); + effect(USE a, USE b, TEMP tmp, TEMP mask); + format %{ + "blendvpd $tmp,$b,$a,$b \n\t" + "blendvpd $a,$a,$b,$b \n\t" + "movapd $b,$tmp \n\t" + "vmaxpd $tmp,$a,$b \n\t" + "cmppd.unordered $mask, $a, $a \n\t" + "blendvpd $dst,$tmp,$a,$mask \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvpd($tmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); + __ blendvpd($a$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); + __ movdbl($b$$XMMRegister , $tmp$$XMMRegister); + __ vmaxpd($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); + __ cmppd($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); + __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + + +// min = java.lang.Min(float a , float b) +instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF mask) %{ + predicate(UseAVX > 0); + match(Set dst (MinF a b)); + effect(USE a, USE b, TEMP tmp, TEMP mask, DEF dst); + format %{ + "blendvps $tmp,$a,$b,$a \n\t" + "blendvps $b,$b,$a,$a \n\t" + "movaps $a,$tmp \n\t" + "vminps $tmp,$a,$b \n\t" + "cmpps.unordered $mask, $a, $a \n\t" + "blendvps $dst,$tmp,$a,$mask \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvps($tmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); + __ blendvps($b$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); + __ movflt($a$$XMMRegister , $tmp$$XMMRegister); + __ vminps($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); + __ cmpps($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); + __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// min = java.lang.Min(double a , double b) +instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD mask) %{ + predicate(UseAVX > 0); + match(Set dst (MinD a b)); + effect(USE a, USE b, TEMP tmp, TEMP mask, DEF dst); + format %{ + "blendvpd $tmp,$a,$b,$a \n\t" + "blendvpd $b,$b,$a,$a \n\t" + "movapd $a,$tmp \n\t" + "vminpd $tmp,$a,$b \n\t" + "cmppd.unordered $mask, $a, $a \n\t" + "blendvpd $dst,$tmp,$a,$mask \n\t" + %} + ins_encode %{ + int vector_len = 0; + __ blendvpd($tmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); + __ blendvpd($b$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); + __ movdbl($a$$XMMRegister , $tmp$$XMMRegister); + __ vminpd($tmp$$XMMRegister, $a$$XMMRegister , $b$$XMMRegister); + __ cmppd($mask$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, 0x3, vector_len); + __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $a$$XMMRegister, $mask$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} // ====================VECTOR INSTRUCTIONS===================================== --- old/src/hotspot/cpu/x86/x86_32.ad 2019-01-28 14:04:53.458283349 +0530 +++ new/src/hotspot/cpu/x86/x86_32.ad 2019-01-28 14:04:53.282283345 +0530 @@ -4376,6 +4376,11 @@ opclass load_long_memory(load_long_indirect, load_long_indOffset32); + +opclass legRegF (regF); +opclass legRegD (regD); + + //----------Special Memory Operands-------------------------------------------- // Stack Slot Operand - This operand is used for loading and storing temporary --- old/src/hotspot/cpu/x86/x86_64.ad 2019-01-28 14:04:53.822283358 +0530 +++ new/src/hotspot/cpu/x86/x86_64.ad 2019-01-28 14:04:53.646283353 +0530 @@ -3663,6 +3663,16 @@ interface(REG_INTER); %} + +// Float register operands +operand legRegF() %{ + constraint(ALLOC_IN_RC(float_reg_legacy)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + // Float register operands operand vlRegF() %{ constraint(ALLOC_IN_RC(float_reg_vl)); @@ -3682,6 +3692,15 @@ %} // Double register operands +operand legRegD() %{ + constraint(ALLOC_IN_RC(double_reg_legacy)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +// Double register operands operand vlRegD() %{ constraint(ALLOC_IN_RC(double_reg_vl)); match(RegD); @@ -5409,6 +5428,7 @@ ins_pipe(pipe_slow); // XXX %} + // Load Float instruct MoveF2VL(vlRegF dst, regF src) %{ match(Set dst src); @@ -5420,6 +5440,16 @@ %} // Load Float +instruct MoveF2LEG(legRegF dst, regF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t! load float (4 bytes)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Float instruct MoveVL2F(regF dst, vlRegF src) %{ match(Set dst src); format %{ "movss $dst,$src\t! load float (4 bytes)" %} @@ -5429,6 +5459,16 @@ ins_pipe( fpu_reg_reg ); %} +// Load Float +instruct MoveLEG2F(regF dst, legRegF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t! load float (4 bytes)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + // Load Double instruct loadD_partial(regD dst, memory mem) %{ @@ -5443,6 +5483,7 @@ ins_pipe(pipe_slow); // XXX %} + instruct loadD(regD dst, memory mem) %{ predicate(UseXmmLoadAndClearUpper); @@ -5467,10 +5508,30 @@ %} // Load Double +instruct MoveD2LEG(legRegD dst, regD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t! load double (8 bytes)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double instruct MoveVL2D(regD dst, vlRegD src) %{ match(Set dst src); format %{ "movsd $dst,$src\t! load double (8 bytes)" %} ins_encode %{ + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double +instruct MoveLEG2D(regD dst, legRegD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t! load double (8 bytes)" %} + ins_encode %{ __ movdbl($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( fpu_reg_reg ); --- /dev/null 2019-01-21 08:42:35.435473376 +0530 +++ new/test/hotspot/jtreg/compiler/intrinsics/math/TestFpMinMaxIntrinsics.java 2019-01-28 14:04:54.006283362 +0530 @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8212043 + * @summary Test compiler intrinsics of floating-point Math.min/max + * + * @run main/othervm -Xint compiler.intrinsics.math.TestFpMinMaxIntrinsics + * @run main/othervm -XX:+UnlockDiagnosticVMOptions + * -Xcomp -XX:TieredStopAtLevel=1 + * -XX:CompileOnly=java/lang/Math + * compiler.intrinsics.math.TestFpMinMaxIntrinsics + * @run main/othervm -XX:+UnlockDiagnosticVMOptions + * -Xcomp -XX:-TieredCompilation + * -XX:CompileOnly=java/lang/Math + * compiler.intrinsics.math.TestFpMinMaxIntrinsics + */ + +package compiler.intrinsics.math; + +import java.util.Arrays; + +public class TestFpMinMaxIntrinsics { + + private static final float fPos = 15280.0f; + private static final float fNeg = -55555.5f; + private static final float fPosZero = 0.0f; + private static final float fNegZero = -0.0f; + private static final float fPosInf = Float.POSITIVE_INFINITY; + private static final float fNegInf = Float.NEGATIVE_INFINITY; + private static final float fNaN = Float.NaN; + + private static final double dPos = 482390926662501720.0; + private static final double dNeg = -333333333333333333.3; + private static final double dPosZero = 0.0; + private static final double dNegZero = -0.0; + private static final double dPosInf = Double.POSITIVE_INFINITY; + private static final double dNegInf = Double.NEGATIVE_INFINITY; + private static final double dNaN = Double.NaN; + + private static final float[][] f_cases = { + // a b min max + { fPos, fPos, fPos, fPos }, + { fPos, fNeg, fNeg, fPos }, + { fPosZero, fNegZero, fNegZero, fPosZero }, + { fNegZero, fNegZero, fNegZero, fNegZero }, + { fPos, fPosInf, fPos, fPosInf }, + { fNeg, fNegInf, fNegInf, fNeg }, + { fPos, fNaN, fNaN, fNaN }, + { fNegInf, fNaN, fNaN, fNaN }, + }; + + private static final double[][] d_cases = { + // a b min max + { dPos, dPos, dPos, dPos }, + { dPos, dNeg, dNeg, dPos }, + { dPosZero, dNegZero, dNegZero, dPosZero }, + { dNegZero, dNegZero, dNegZero, dNegZero }, + { dPos, dPosInf, dPos, dPosInf }, + { dNeg, dNegInf, dNegInf, dNeg }, + { dPos, dNaN, dNaN, dNaN }, + { dNegInf, dNaN, dNaN, dNaN }, + }; + + private static void fTest(float[] row) { + float min = Math.min(row[0], row[1]); + float max = Math.max(row[0], row[1]); + if (Float.isNaN(min) && Float.isNaN(max) + && Float.isNaN(row[2]) && Float.isNaN(row[3])) { + // Return if all of them are NaN + return; + } + if (min != row[2] || max != row[3]) { + throw new AssertionError("Unexpected result of float min/max: " + + "a = " + row[0] + ", b = " + row[1] + ", " + + "result = (" + min + ", " + max + "), " + + "expected = (" + row[2] + ", " + row[3] + ")"); + } + } + + private static void dTest(double[] row) { + double min = Math.min(row[0], row[1]); + double max = Math.max(row[0], row[1]); + if (Double.isNaN(min) && Double.isNaN(max) + && Double.isNaN(row[2]) && Double.isNaN(row[3])) { + // Return if all of them are NaN + return; + } + if (min != row[2] || max != row[3]) { + throw new AssertionError("Unexpected result of double min/max" + + "a = " + row[0] + ", b = " + row[1] + ", " + + "result = (" + min + ", " + max + "), " + + "expected = (" + row[2] + ", " + row[3] + ")"); + } + } + + public static void main(String[] args) { + Arrays.stream(f_cases).forEach(TestFpMinMaxIntrinsics::fTest); + Arrays.stream(d_cases).forEach(TestFpMinMaxIntrinsics::dTest); + System.out.println("PASS"); + } +} +