--- old/src/hotspot/cpu/x86/assembler_x86.cpp 2019-03-06 22:35:23.649133366 +0100 +++ new/src/hotspot/cpu/x86/assembler_x86.cpp 2019-03-06 22:35:23.439133462 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -7765,9 +7765,43 @@ } } +void Assembler::vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5F); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_rex_vex_w_reverted(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5F); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5D); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_rex_vex_w_reverted(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5D); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { assert(VM_Version::supports_avx(), ""); - assert(!VM_Version::supports_evex(), ""); + assert(vector_len <= AVX_256bit, ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC2); @@ -7777,7 +7811,7 @@ void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { assert(VM_Version::supports_avx(), ""); - assert(!VM_Version::supports_evex(), ""); + assert(vector_len <= AVX_256bit, ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8((unsigned char)0x4B); @@ -7788,7 +7822,7 @@ void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { assert(VM_Version::supports_avx(), ""); - assert(!VM_Version::supports_evex(), ""); + assert(vector_len <= AVX_256bit, ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC2); @@ -7798,7 +7832,7 @@ void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { assert(VM_Version::supports_avx(), ""); - assert(!VM_Version::supports_evex(), ""); + assert(vector_len <= AVX_256bit, ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8((unsigned char)0x4A); --- old/src/hotspot/cpu/x86/assembler_x86.hpp 2019-03-06 22:35:24.150133136 +0100 +++ new/src/hotspot/cpu/x86/assembler_x86.hpp 2019-03-06 22:35:23.943133231 +0100 @@ -1934,6 +1934,11 @@ void vsubss(XMMRegister dst, XMMRegister nds, Address src); void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void shlxl(Register dst, Register src1, Register src2); void shlxq(Register dst, Register src1, Register src2); --- old/src/hotspot/cpu/x86/macroAssembler_x86.hpp 2019-03-06 22:35:24.615132922 +0100 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.hpp 2019-03-06 22:35:24.405133019 +0100 @@ -165,6 +165,7 @@ // Support optimal SSE move instructions. void movflt(XMMRegister dst, XMMRegister src) { + if (dst-> encoding() == src->encoding()) return; if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } else { movss (dst, src); return; } } @@ -173,6 +174,7 @@ void movflt(Address dst, XMMRegister src) { movss(dst, src); } void movdbl(XMMRegister dst, XMMRegister src) { + if (dst-> encoding() == src->encoding()) return; if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } else { movsd (dst, src); return; } } --- old/src/hotspot/cpu/x86/x86.ad 2019-03-06 22:35:25.075132711 +0100 +++ new/src/hotspot/cpu/x86/x86.ad 2019-03-06 22:35:24.864132808 +0100 @@ -1,5 +1,5 @@ // -// Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -1450,6 +1450,15 @@ if (UseSSE < 2) ret_value = false; break; +#ifdef _LP64 + case Op_MaxD: + case Op_MaxF: + case Op_MinD: + case Op_MinF: + if (UseAVX < 1) // enabled for AVX only + ret_value = false; + break; +#endif } return ret_value; // Per default match rules are supported. --- old/src/hotspot/cpu/x86/x86_64.ad 2019-03-06 22:35:25.577132481 +0100 +++ new/src/hotspot/cpu/x86/x86_64.ad 2019-03-06 22:35:25.365132578 +0100 @@ -698,6 +698,87 @@ __ bind(done); } +// Math.min() # Math.max() +// -------------------------- +// ucomis[s/d] # +// ja -> b # a +// jp -> NaN # NaN +// jb -> a # b +// je # +// |-jz -> a | b # a & b +// | -> a # +void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst, + XMMRegister a, XMMRegister b, + XMMRegister xmmt, Register rt, + bool min, bool single) { + + Label nan, zero, below, above, done; + + if (single) + __ ucomiss(a, b); + else + __ ucomisd(a, b); + + if (dst->encoding() != (min ? b : a)->encoding()) + __ jccb(Assembler::above, above); // CF=0 & ZF=0 + else + __ jccb(Assembler::above, done); + + __ jccb(Assembler::parity, nan); // PF=1 + __ jccb(Assembler::below, below); // CF=1 + + // equal + __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit); + if (single) { + __ ucomiss(a, xmmt); + __ jccb(Assembler::equal, zero); + + __ movflt(dst, a); + __ jmp(done); + } + else { + __ ucomisd(a, xmmt); + __ jccb(Assembler::equal, zero); + + __ movdbl(dst, a); + __ jmp(done); + } + + __ bind(zero); + if (min) + __ vpor(dst, a, b, Assembler::AVX_128bit); + else + __ vpand(dst, a, b, Assembler::AVX_128bit); + + __ jmp(done); + + __ bind(above); + if (single) + __ movflt(dst, min ? b : a); + else + __ movdbl(dst, min ? b : a); + + __ jmp(done); + + __ bind(nan); + if (single) { + __ movl(rt, 0x7fc00000); // Float.NaN + __ movdl(dst, rt); + } + else { + __ mov64(rt, 0x7ff8000000000000L); // Double.NaN + __ movdq(dst, rt); + } + __ jmp(done); + + __ bind(below); + if (single) + __ movflt(dst, min ? a : b); + else + __ movdbl(dst, min ? a : b); + + __ bind(done); +} //============================================================================= const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; @@ -3548,6 +3629,15 @@ %} // Float register operands +operand legRegF() %{ + constraint(ALLOC_IN_RC(float_reg_legacy)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +// Float register operands operand vlRegF() %{ constraint(ALLOC_IN_RC(float_reg_vl)); match(RegF); @@ -3566,6 +3656,15 @@ %} // Double register operands +operand legRegD() %{ + constraint(ALLOC_IN_RC(double_reg_legacy)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +// Double register operands operand vlRegD() %{ constraint(ALLOC_IN_RC(double_reg_vl)); match(RegD); @@ -5304,6 +5403,16 @@ %} // Load Float +instruct MoveF2LEG(legRegF dst, regF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Float instruct MoveVL2F(regF dst, vlRegF src) %{ match(Set dst src); format %{ "movss $dst,$src\t! load float (4 bytes)" %} @@ -5313,6 +5422,16 @@ ins_pipe( fpu_reg_reg ); %} +// Load Float +instruct MoveLEG2F(regF dst, legRegF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + // Load Double instruct loadD_partial(regD dst, memory mem) %{ @@ -5351,6 +5470,16 @@ %} // Load Double +instruct MoveD2LEG(legRegD dst, regD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double instruct MoveVL2D(regD dst, vlRegD src) %{ match(Set dst src); format %{ "movsd $dst,$src\t! load double (8 bytes)" %} @@ -5360,6 +5489,167 @@ ins_pipe( fpu_reg_reg ); %} +// Load Double +instruct MoveLEG2D(regD dst, legRegD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Following pseudo code describes the algorithm for max[FD]: +// Min algorithm is on similar lines +// btmp = (b < +0.0) ? a : b +// atmp = (b < +0.0) ? b : a +// Tmp = Max_Float(atmp , btmp) +// Res = (atmp == NaN) ? atmp : Tmp + +// max = java.lang.Math.max(float a, float b) +instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ + predicate(UseAVX > 0 && !n->is_reduction()); + match(Set dst (MaxF a b)); + effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); + format %{ + "blendvps $btmp,$b,$a,$b \n\t" + "blendvps $atmp,$a,$b,$b \n\t" + "vmaxss $tmp,$atmp,$btmp \n\t" + "cmpps.unordered $btmp,$atmp,$atmp \n\t" + "blendvps $dst,$tmp,$atmp,$btmp \n\t" + %} + ins_encode %{ + int vector_len = Assembler::AVX_128bit; + __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); + __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); + __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister); + __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len); + __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct maxF_reduction_reg(regF dst, regF a, regF b, regF xmmt, rRegI tmp, rFlagsReg cr) %{ + predicate(UseAVX > 0 && n->is_reduction()); + match(Set dst (MaxF a b)); + effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr); + + format %{ "$dst = max($a, $b)\t# intrinsic (float)" %} + ins_encode %{ + emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register, + false /*min*/, true /*single*/); + %} + ins_pipe( pipe_slow ); +%} + +// max = java.lang.Math.max(double a, double b) +instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ + predicate(UseAVX > 0 && !n->is_reduction()); + match(Set dst (MaxD a b)); + effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); + format %{ + "blendvpd $btmp,$b,$a,$b \n\t" + "blendvpd $atmp,$a,$b,$b \n\t" + "vmaxsd $tmp,$atmp,$btmp \n\t" + "cmppd.unordered $btmp,$atmp,$atmp \n\t" + "blendvpd $dst,$tmp,$atmp,$btmp \n\t" + %} + ins_encode %{ + int vector_len = Assembler::AVX_128bit; + __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); + __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); + __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister); + __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len); + __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct maxD_reduction_reg(regD dst, regD a, regD b, regD xmmt, rRegL tmp, rFlagsReg cr) %{ + predicate(UseAVX > 0 && n->is_reduction()); + match(Set dst (MaxD a b)); + effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr); + + format %{ "$dst = max($a, $b)\t# intrinsic (double)" %} + ins_encode %{ + emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register, + false /*min*/, false /*single*/); + %} + ins_pipe( pipe_slow ); +%} + +// min = java.lang.Math.min(float a, float b) +instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ + predicate(UseAVX > 0 && !n->is_reduction()); + match(Set dst (MinF a b)); + effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); + format %{ + "blendvps $atmp,$a,$b,$a \n\t" + "blendvps $btmp,$b,$a,$a \n\t" + "vminss $tmp,$atmp,$btmp \n\t" + "cmpps.unordered $btmp,$atmp,$atmp \n\t" + "blendvps $dst,$tmp,$atmp,$btmp \n\t" + %} + ins_encode %{ + int vector_len = Assembler::AVX_128bit; + __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); + __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); + __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister); + __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len); + __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct minF_reduction_reg(regF dst, regF a, regF b, regF xmmt, rRegI tmp, rFlagsReg cr) %{ + predicate(UseAVX > 0 && n->is_reduction()); + match(Set dst (MinF a b)); + effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr); + + format %{ "$dst = min($a, $b)\t# intrinsic (float)" %} + ins_encode %{ + emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register, + true /*min*/, true /*single*/); + %} + ins_pipe( pipe_slow ); +%} + +// min = java.lang.Math.min(double a, double b) +instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ + predicate(UseAVX > 0 && !n->is_reduction()); + match(Set dst (MinD a b)); + effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); + format %{ + "blendvpd $atmp,$a,$b,$a \n\t" + "blendvpd $btmp,$b,$a,$a \n\t" + "vminsd $tmp,$atmp,$btmp \n\t" + "cmppd.unordered $btmp,$atmp,$atmp \n\t" + "blendvpd $dst,$tmp,$atmp,$btmp \n\t" + %} + ins_encode %{ + int vector_len = Assembler::AVX_128bit; + __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); + __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); + __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister); + __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len); + __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct minD_reduction_reg(regD dst, regD a, regD b, regD xmmt, rRegL tmp, rFlagsReg cr) %{ + predicate(UseAVX > 0 && n->is_reduction()); + match(Set dst (MinD a b)); + effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr); + + format %{ "$dst = min($a, $b)\t# intrinsic (double)" %} + ins_encode %{ + emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register, + true /*min*/, false /*single*/); + %} + ins_pipe( pipe_slow ); +%} + // Load Effective Address instruct leaP8(rRegP dst, indOffset8 mem) %{ --- old/src/hotspot/share/opto/library_call.cpp 2019-03-06 22:35:26.090132245 +0100 +++ new/src/hotspot/share/opto/library_call.cpp 2019-03-06 22:35:25.880132341 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -6609,6 +6609,40 @@ //------------------------------inline_fp_min_max------------------------------ bool LibraryCallKit::inline_fp_min_max(vmIntrinsics::ID id) { +/* DISABLED BECAUSE METHOD DATA ISN'T COLLECTED PER CALL-SITE, SEE JDK-8015416. + + // The intrinsic should be used only when the API branches aren't predictable, + // the last one performing the most important comparison. The following heuristic + // uses the branch statistics to eventually bail out if necessary. + + ciMethodData *md = callee()->method_data(); + + if ( md != NULL && md->is_mature() && md->invocation_count() > 0 ) { + ciCallProfile cp = caller()->call_profile_at_bci(bci()); + + if ( ((double)cp.count()) / ((double)md->invocation_count()) < 0.8 ) { + // Bail out if the call-site didn't contribute enough to the statistics. + return false; + } + + uint taken = 0, not_taken = 0; + + for (ciProfileData *p = md->first_data(); md->is_valid(p); p = md->next_data(p)) { + if (p->is_BranchData()) { + taken = ((ciBranchData*)p)->taken(); + not_taken = ((ciBranchData*)p)->not_taken(); + } + } + + double balance = (((double)taken) - ((double)not_taken)) / ((double)md->invocation_count()); + balance = balance < 0 ? -balance : balance; + if ( balance > 0.2 ) { + // Bail out if the most important branch is predictable enough. + return false; + } + } +*/ + Node *a = NULL; Node *b = NULL; Node *n = NULL; --- old/src/hotspot/share/opto/loopTransform.cpp 2019-03-06 22:35:26.576132022 +0100 +++ new/src/hotspot/share/opto/loopTransform.cpp 2019-03-06 22:35:26.367132118 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2039,7 +2039,8 @@ if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) { // Now test it to see if it fits the standard pattern for a reduction operator. int opc = def_node->Opcode(); - if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) { + if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type()) + || opc == Op_MinD || opc == Op_MinF || opc == Op_MaxD || opc == Op_MaxF) { if (!def_node->is_reduction()) { // Not marked yet // To be a reduction, the arithmetic node must have the phi as input and provide a def to it bool ok = false; --- old/test/hotspot/jtreg/compiler/intrinsics/math/TestFpMinMaxIntrinsics.java 2019-03-06 22:35:27.043131807 +0100 +++ new/test/hotspot/jtreg/compiler/intrinsics/math/TestFpMinMaxIntrinsics.java 2019-03-06 22:35:26.834131903 +0100 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Arm Limited. All rights reserved. + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2019, Arm Limited. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,20 +27,41 @@ * @bug 8212043 * @summary Test compiler intrinsics of floating-point Math.min/max * - * @run main/othervm -Xint compiler.intrinsics.math.TestFpMinMaxIntrinsics + * @run main/othervm -Xint compiler.intrinsics.math.TestFpMinMaxIntrinsics sanityTests 1 * @run main/othervm -XX:+UnlockDiagnosticVMOptions * -Xcomp -XX:TieredStopAtLevel=1 * -XX:CompileOnly=java/lang/Math - * compiler.intrinsics.math.TestFpMinMaxIntrinsics + * compiler.intrinsics.math.TestFpMinMaxIntrinsics sanityTests 1 * @run main/othervm -XX:+UnlockDiagnosticVMOptions * -Xcomp -XX:-TieredCompilation * -XX:CompileOnly=java/lang/Math - * compiler.intrinsics.math.TestFpMinMaxIntrinsics + * compiler.intrinsics.math.TestFpMinMaxIntrinsics sanityTests 1 + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions + * -XX:-TieredCompilation -XX:CompileThresholdScaling=0.1 + * -XX:CompileCommand=print,compiler/intrinsics/math/TestFpMinMaxIntrinsics.*Test* + * compiler.intrinsics.math.TestFpMinMaxIntrinsics sanityTests 10000 + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions + * -XX:-TieredCompilation -Xcomp + * -XX:CompileCommand=print,compiler/intrinsics/math/TestFpMinMaxIntrinsics.*Test* + * -XX:CompileCommand=compileonly,compiler/intrinsics/math/TestFpMinMaxIntrinsics.*Test* + * compiler.intrinsics.math.TestFpMinMaxIntrinsics reductionTests 100 + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions + * -XX:+TieredCompilation + * -XX:CompileCommand=print,compiler/intrinsics/math/TestFpMinMaxIntrinsics.min* + * -XX:CompileCommand=dontinline,compiler/intrinsics/math/TestFpMinMaxIntrinsics.min* + * compiler.intrinsics.math.TestFpMinMaxIntrinsics randomSearchTree 1 + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions + * -XX:+TieredCompilation + * -XX:CompileCommand=print,compiler/intrinsics/math/TestFpMinMaxIntrinsics.min* + * -XX:CompileCommand=dontinline,compiler/intrinsics/math/TestFpMinMaxIntrinsics.min* + * compiler.intrinsics.math.TestFpMinMaxIntrinsics sortedSearchTree 1 */ package compiler.intrinsics.math; import java.util.Arrays; +import java.util.Random; +import java.lang.reflect.Method; public class TestFpMinMaxIntrinsics { @@ -63,63 +84,220 @@ private static final float[][] f_cases = { // a b min max { fPos, fPos, fPos, fPos }, + { fNeg, fNeg, fNeg, fNeg }, { fPos, fNeg, fNeg, fPos }, + { fNeg, fPos, fNeg, fPos }, + { fPosZero, fNegZero, fNegZero, fPosZero }, + { fNegZero, fPosZero, fNegZero, fPosZero }, { fNegZero, fNegZero, fNegZero, fNegZero }, + { fPos, fPosInf, fPos, fPosInf }, { fNeg, fNegInf, fNegInf, fNeg }, + { fPos, fNaN, fNaN, fNaN }, + { fNaN, fPos, fNaN, fNaN }, + { fNeg, fNaN, fNaN, fNaN }, + { fNaN, fNeg, fNaN, fNaN }, + + { fPosInf, fNaN, fNaN, fNaN }, + { fNaN, fPosInf, fNaN, fNaN }, { fNegInf, fNaN, fNaN, fNaN }, + { fNaN, fNegInf, fNaN, fNaN } }; private static final double[][] d_cases = { // a b min max { dPos, dPos, dPos, dPos }, + { dNeg, dNeg, dNeg, dNeg }, { dPos, dNeg, dNeg, dPos }, + { dNeg, dPos, dNeg, dPos }, + { dPosZero, dNegZero, dNegZero, dPosZero }, + { dNegZero, dPosZero, dNegZero, dPosZero }, { dNegZero, dNegZero, dNegZero, dNegZero }, + { dPos, dPosInf, dPos, dPosInf }, { dNeg, dNegInf, dNegInf, dNeg }, + { dPos, dNaN, dNaN, dNaN }, + { dNaN, dPos, dNaN, dNaN }, + { dNeg, dNaN, dNaN, dNaN }, + { dNaN, dNeg, dNaN, dNaN }, + + { dPosInf, dNaN, dNaN, dNaN }, + { dNaN, dPosInf, dNaN, dNaN }, { dNegInf, dNaN, dNaN, dNaN }, + { dNaN, dNegInf, dNaN, dNaN } }; private static void fTest(float[] row) { - float min = Math.min(row[0], row[1]); - float max = Math.max(row[0], row[1]); - if (Float.isNaN(min) && Float.isNaN(max) - && Float.isNaN(row[2]) && Float.isNaN(row[3])) { - // Return if all of them are NaN - return; + fCheck(row[0], row[1], Math.min(row[0], row[1]), Math.max(row[0], row[1]), row[2], row[3]); + } + + private static void fReductionTest(float[] row) { + float fmin = row[0], fmax = row[0]; + + for (int i=0; i<100; i++) { + fmin = Math.min(fmin, row[1]); + fmax = Math.max(fmax, row[1]); } - if (min != row[2] || max != row[3]) { + + fCheck(row[0], row[1], fmin, fmax, row[2], row[3]); + } + + private static void fCheck(float a, float b, float fmin, float fmax, float efmin, float efmax) { + int min = Float.floatToRawIntBits(fmin); + int max = Float.floatToRawIntBits(fmax); + int emin = Float.floatToRawIntBits(efmin); + int emax = Float.floatToRawIntBits(efmax); + + if (min != emin || max != emax) { throw new AssertionError("Unexpected result of float min/max: " + - "a = " + row[0] + ", b = " + row[1] + ", " + - "result = (" + min + ", " + max + "), " + - "expected = (" + row[2] + ", " + row[3] + ")"); + "a = " + a + ", b = " + b + ", " + + "result = (" + fmin + ", " + fmax + "), " + + "expected = (" + efmin + ", " + efmax + ")"); } } private static void dTest(double[] row) { - double min = Math.min(row[0], row[1]); - double max = Math.max(row[0], row[1]); - if (Double.isNaN(min) && Double.isNaN(max) - && Double.isNaN(row[2]) && Double.isNaN(row[3])) { - // Return if all of them are NaN - return; - } - if (min != row[2] || max != row[3]) { - throw new AssertionError("Unexpected result of double min/max" + - "a = " + row[0] + ", b = " + row[1] + ", " + - "result = (" + min + ", " + max + "), " + - "expected = (" + row[2] + ", " + row[3] + ")"); + dCheck(row[0], row[1], Math.min(row[0], row[1]), Math.max(row[0], row[1]), row[2], row[3]); + } + + private static void dReductionTest(double[] row) { + double dmin = row[0], dmax = row[0]; + + for (int i=0; i<100; i++) { + dmin = Math.min(dmin, row[1]); + dmax = Math.max(dmax, row[1]); } + + dCheck(row[0], row[1], dmin, dmax, row[2], row[3]); } - public static void main(String[] args) { + private static void dCheck(double a, double b, double dmin, double dmax, double edmin, double edmax) { + double min = Double.doubleToRawLongBits(dmin); + double max = Double.doubleToRawLongBits(dmax); + double emin = Double.doubleToRawLongBits(edmin); + double emax = Double.doubleToRawLongBits(edmax); + + if (min != emin || max != emax) { + throw new AssertionError("Unexpected result of double min/max: " + + "a = " + a + ", b = " + b + ", " + + "result = (" + dmin + ", " + dmax + "), " + + "expected = (" + edmin + ", " + edmax + ")"); + } + } + + public static void sanityTests() { Arrays.stream(f_cases).forEach(TestFpMinMaxIntrinsics::fTest); Arrays.stream(d_cases).forEach(TestFpMinMaxIntrinsics::dTest); - System.out.println("PASS"); } -} + public static void reductionTests() { + Arrays.stream(f_cases).forEach(TestFpMinMaxIntrinsics::fReductionTest); + Arrays.stream(d_cases).forEach(TestFpMinMaxIntrinsics::dReductionTest); + } + + public static void main(String[] args) throws Exception { + Method m = TestFpMinMaxIntrinsics.class.getDeclaredMethod(args[0]); + for (int i = 0 ; i < Integer.parseInt(args[1]) ; i++) + m.invoke(null); + } + + private static final int COUNT = 1000; + private static final int LOOPS = 100; + + private static Random r = new Random(); + + private static Node[] pool = new Node[COUNT]; + + private static long time = 0; + private static long times = 0; + + public static void init() { + for (int i=0; i 0 ? c2 - s2 : COUNT - (s2 = step()); + } + + @Benchmark + public float fMinReduce() { + float result = Float.MAX_VALUE; + + for (int i=0; i