# HG changeset patch # User enevill # Date 1443602139 14400 # Wed Sep 30 04:35:39 2015 -0400 # Node ID 6de885b4830f624e44631f50c02783a4b93aa910 # Parent fa430fa4f5774f1c464bf8b8171fc058a6b5ca0f 8138583: aarch64: add support for vectorizing fabs/fneg Reviewed-by: aph diff --git a/src/cpu/aarch64/vm/aarch64.ad b/src/cpu/aarch64/vm/aarch64.ad --- a/src/cpu/aarch64/vm/aarch64.ad +++ b/src/cpu/aarch64/vm/aarch64.ad @@ -15223,6 +15223,88 @@ ins_pipe(pipe_class_default); %} +// --------------------------------- ABS -------------------------------------- + +instruct vabs2F(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVF src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst,$src\t# vector (2S)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vabs4F(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVF src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst,$src\t# vector (4S)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vabs2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVD src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst,$src\t# vector (2D)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- NEG -------------------------------------- + +instruct vneg2F(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (NegVF src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (2S)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vneg4F(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (NegVF src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (4S)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vneg2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (NegVD src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (2D)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + // --------------------------------- AND -------------------------------------- instruct vand8B(vecD dst, vecD src1, vecD src2) diff --git a/src/cpu/aarch64/vm/assembler_aarch64.hpp b/src/cpu/aarch64/vm/assembler_aarch64.hpp --- a/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -2313,6 +2313,8 @@ #define ASSERTION (T == T2S || T == T4S || T == T2D) INSN(fsqrt, 1, 0b11111); + INSN(fabs, 0, 0b01111); + INSN(fneg, 1, 0b01111); #undef ASSERTION #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S) diff --git a/src/share/vm/adlc/formssel.cpp b/src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp +++ b/src/share/vm/adlc/formssel.cpp @@ -4143,6 +4143,8 @@ "SubVB","SubVS","SubVI","SubVL","SubVF","SubVD", "MulVS","MulVI","MulVL","MulVF","MulVD", "DivVF","DivVD", + "AbsVF","AbsVD", + "NegVF","NegVD", "SqrtVD", "AndV" ,"XorV" ,"OrV", "AddReductionVI", "AddReductionVL", diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp --- a/src/share/vm/opto/classes.hpp +++ b/src/share/vm/opto/classes.hpp @@ -290,6 +290,10 @@ macro(MulReductionVD) macro(DivVF) macro(DivVD) +macro(AbsVF) +macro(AbsVD) +macro(NegVF) +macro(NegVD) macro(SqrtVD) macro(LShiftCntV) macro(RShiftCntV) diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp +++ b/src/share/vm/opto/superword.cpp @@ -1858,8 +1858,8 @@ vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); vlen_in_bytes = vn->as_Vector()->length_in_bytes(); } - } else if (opc == Op_SqrtD) { - // Promote operand to vector (Sqrt is a 2 address instruction) + } else if (opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || opc == Op_NegF || opc == Op_NegD) { + // Promote operand to vector (Sqrt/Abs/Neg are 2 address instructions) Node* in = vector_opd(p, 1); vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n)); vlen_in_bytes = vn->as_Vector()->length_in_bytes(); diff --git a/src/share/vm/opto/vectornode.cpp b/src/share/vm/opto/vectornode.cpp --- a/src/share/vm/opto/vectornode.cpp +++ b/src/share/vm/opto/vectornode.cpp @@ -92,6 +92,18 @@ case Op_DivD: assert(bt == T_DOUBLE, "must be"); return Op_DivVD; + case Op_AbsF: + assert(bt == T_FLOAT, "must be"); + return Op_AbsVF; + case Op_AbsD: + assert(bt == T_DOUBLE, "must be"); + return Op_AbsVD; + case Op_NegF: + assert(bt == T_FLOAT, "must be"); + return Op_NegVF; + case Op_NegD: + assert(bt == T_DOUBLE, "must be"); + return Op_NegVD; case Op_SqrtD: assert(bt == T_DOUBLE, "must be"); return Op_SqrtVD; @@ -280,6 +292,12 @@ case Op_DivVF: return new DivVFNode(n1, n2, vt); case Op_DivVD: return new DivVDNode(n1, n2, vt); + case Op_AbsVF: return new AbsVFNode(n1, vt); + case Op_AbsVD: return new AbsVDNode(n1, vt); + + case Op_NegVF: return new NegVFNode(n1, vt); + case Op_NegVD: return new NegVDNode(n1, vt); + // Currently only supports double precision sqrt case Op_SqrtVD: return new SqrtVDNode(n1, vt); diff --git a/src/share/vm/opto/vectornode.hpp b/src/share/vm/opto/vectornode.hpp --- a/src/share/vm/opto/vectornode.hpp +++ b/src/share/vm/opto/vectornode.hpp @@ -309,6 +309,38 @@ virtual int Opcode() const; }; +//------------------------------AbsVFNode-------------------------------------- +// Vector Abs float +class AbsVFNode : public VectorNode { + public: + AbsVFNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {} + virtual int Opcode() const; +}; + +//------------------------------AbsVDNode-------------------------------------- +// Vector Abs double +class AbsVDNode : public VectorNode { + public: + AbsVDNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {} + virtual int Opcode() const; +}; + +//------------------------------NegVFNode-------------------------------------- +// Vector Neg float +class NegVFNode : public VectorNode { + public: + NegVFNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {} + virtual int Opcode() const; +}; + +//------------------------------NegVDNode-------------------------------------- +// Vector Neg double +class NegVDNode : public VectorNode { + public: + NegVDNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {} + virtual int Opcode() const; +}; + //------------------------------SqrtVDNode-------------------------------------- // Vector Sqrt double class SqrtVDNode : public VectorNode { diff --git a/test/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/compiler/loopopts/superword/SumRedAbsNeg_Double.java new file mode 100644 --- /dev/null +++ b/test/compiler/loopopts/superword/SumRedAbsNeg_Double.java @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8138583 + * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test + * @requires os.arch=="aarch64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double + */ + +public class SumRedAbsNeg_Double +{ + public static void main(String[] args) throws Exception { + double[] a = new double[256*1024]; + double[] b = new double[256*1024]; + double[] c = new double[256*1024]; + double[] d = new double[256*1024]; + sumReductionInit(a,b,c); + double total = 0; + double valid = 3.6028590866691944E19; + + for(int j = 0; j < 2000; j++) { + total = sumReductionImplement(a,b,c,d,total); + } + + if(total == valid) { + System.out.println("Success"); + } else { + System.out.println("Invalid sum of elements variable in total: " + total); + System.out.println("Expected value = " + valid); + throw new Exception("Failed"); + } + } + + public static void sumReductionInit( + double[] a, + double[] b, + double[] c) + { + for(int j = 0; j < 1; j++) + { + for(int i = 0; i < a.length; i++) + { + a[i] = i * 1 + j; + b[i] = i * 1 - j; + c[i] = i + j; + } + } + } + + public static double sumReductionImplement( + double[] a, + double[] b, + double[] c, + double[] d, + double total) + { + for(int i = 0; i < a.length; i++) + { + d[i] = Math.abs(-a[i] * -b[i]) + Math.abs(-a[i] * -c[i]) + Math.abs(-b[i] * -c[i]); + total += d[i]; + } + return total; + } + +} diff --git a/test/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/compiler/loopopts/superword/SumRedAbsNeg_Float.java new file mode 100644 --- /dev/null +++ b/test/compiler/loopopts/superword/SumRedAbsNeg_Float.java @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8138583 + * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test + * @requires os.arch=="aarch64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float + */ + +public class SumRedAbsNeg_Float +{ + public static void main(String[] args) throws Exception { + float[] a = new float[256*1024]; + float[] b = new float[256*1024]; + float[] c = new float[256*1024]; + float[] d = new float[256*1024]; + sumReductionInit(a,b,c); + float total = 0; + float valid = (float)4.611686E18; + + for(int j = 0; j < 2000; j++) { + total = sumReductionImplement(a,b,c,d,total); + } + + if(total == valid) { + System.out.println("Success"); + } else { + System.out.println("Invalid sum of elements variable in total: " + total); + System.out.println("Expected value = " + valid); + throw new Exception("Failed"); + } + } + + public static void sumReductionInit( + float[] a, + float[] b, + float[] c) + { + for(int j = 0; j < 1; j++) + { + for(int i = 0; i < a.length; i++) + { + a[i] = i * 1 + j; + b[i] = i * 1 - j; + c[i] = i + j; + } + } + } + + public static float sumReductionImplement( + float[] a, + float[] b, + float[] c, + float[] d, + float total) + { + for(int i = 0; i < a.length; i++) + { + d[i] = Math.abs(-a[i] * -b[i]) + Math.abs(-a[i] * -c[i]) + Math.abs(-b[i] * -c[i]); + total += d[i]; + } + return total; + } + +}