# HG changeset patch
# User enevill
# Date 1443602139 14400
#      Wed Sep 30 04:35:39 2015 -0400
# Node ID 6de885b4830f624e44631f50c02783a4b93aa910
# Parent  fa430fa4f5774f1c464bf8b8171fc058a6b5ca0f
8138583: aarch64: add support for vectorizing fabs/fneg
Reviewed-by: aph

diff --git a/src/cpu/aarch64/vm/aarch64.ad b/src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad
+++ b/src/cpu/aarch64/vm/aarch64.ad
@@ -15223,6 +15223,88 @@
   ins_pipe(pipe_class_default);
 %}
 
+// --------------------------------- ABS --------------------------------------
+
+instruct vabs2F(vecD dst, vecD src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AbsVF src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fabs  $dst,$src\t# vector (2S)" %}
+  ins_encode %{
+    __ fabs(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vabs4F(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AbsVF src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fabs  $dst,$src\t# vector (4S)" %}
+  ins_encode %{
+    __ fabs(as_FloatRegister($dst$$reg), __ T4S,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vabs2D(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AbsVD src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fabs  $dst,$src\t# vector (2D)" %}
+  ins_encode %{
+    __ fabs(as_FloatRegister($dst$$reg), __ T2D,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// --------------------------------- NEG --------------------------------------
+
+instruct vneg2F(vecD dst, vecD src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (NegVF src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fneg  $dst,$src\t# vector (2S)" %}
+  ins_encode %{
+    __ fneg(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vneg4F(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (NegVF src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fneg  $dst,$src\t# vector (4S)" %}
+  ins_encode %{
+    __ fneg(as_FloatRegister($dst$$reg), __ T4S,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vneg2D(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (NegVD src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fneg  $dst,$src\t# vector (2D)" %}
+  ins_encode %{
+    __ fneg(as_FloatRegister($dst$$reg), __ T2D,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // --------------------------------- AND --------------------------------------
 
 instruct vand8B(vecD dst, vecD src1, vecD src2)
diff --git a/src/cpu/aarch64/vm/assembler_aarch64.hpp b/src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp
@@ -2313,6 +2313,8 @@
 
 #define ASSERTION (T == T2S || T == T4S || T == T2D)
   INSN(fsqrt, 1, 0b11111);
+  INSN(fabs,  0, 0b01111);
+  INSN(fneg,  1, 0b01111);
 #undef ASSERTION
 
 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
diff --git a/src/share/vm/adlc/formssel.cpp b/src/share/vm/adlc/formssel.cpp
--- a/src/share/vm/adlc/formssel.cpp
+++ b/src/share/vm/adlc/formssel.cpp
@@ -4143,6 +4143,8 @@
     "SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
     "MulVS","MulVI","MulVL","MulVF","MulVD",
     "DivVF","DivVD",
+    "AbsVF","AbsVD",
+    "NegVF","NegVD",
     "SqrtVD",
     "AndV" ,"XorV" ,"OrV",
     "AddReductionVI", "AddReductionVL",
diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp
--- a/src/share/vm/opto/classes.hpp
+++ b/src/share/vm/opto/classes.hpp
@@ -290,6 +290,10 @@
 macro(MulReductionVD)
 macro(DivVF)
 macro(DivVD)
+macro(AbsVF)
+macro(AbsVD)
+macro(NegVF)
+macro(NegVD)
 macro(SqrtVD)
 macro(LShiftCntV)
 macro(RShiftCntV)
diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp
--- a/src/share/vm/opto/superword.cpp
+++ b/src/share/vm/opto/superword.cpp
@@ -1858,8 +1858,8 @@
           vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
           vlen_in_bytes = vn->as_Vector()->length_in_bytes();
         }
-      } else if (opc == Op_SqrtD) {
-        // Promote operand to vector (Sqrt is a 2 address instruction)
+      } else if (opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || opc == Op_NegF || opc == Op_NegD) {
+        // Promote operand to vector (Sqrt/Abs/Neg are 2 address instructions)
         Node* in = vector_opd(p, 1);
         vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
         vlen_in_bytes = vn->as_Vector()->length_in_bytes();
diff --git a/src/share/vm/opto/vectornode.cpp b/src/share/vm/opto/vectornode.cpp
--- a/src/share/vm/opto/vectornode.cpp
+++ b/src/share/vm/opto/vectornode.cpp
@@ -92,6 +92,18 @@
   case Op_DivD:
     assert(bt == T_DOUBLE, "must be");
     return Op_DivVD;
+  case Op_AbsF:
+    assert(bt == T_FLOAT, "must be");
+    return Op_AbsVF;
+  case Op_AbsD:
+    assert(bt == T_DOUBLE, "must be");
+    return Op_AbsVD;
+  case Op_NegF:
+    assert(bt == T_FLOAT, "must be");
+    return Op_NegVF;
+  case Op_NegD:
+    assert(bt == T_DOUBLE, "must be");
+    return Op_NegVD;
   case Op_SqrtD:
     assert(bt == T_DOUBLE, "must be");
     return Op_SqrtVD;
@@ -280,6 +292,12 @@
   case Op_DivVF: return new DivVFNode(n1, n2, vt);
   case Op_DivVD: return new DivVDNode(n1, n2, vt);
 
+  case Op_AbsVF: return new AbsVFNode(n1, vt);
+  case Op_AbsVD: return new AbsVDNode(n1, vt);
+
+  case Op_NegVF: return new NegVFNode(n1, vt);
+  case Op_NegVD: return new NegVDNode(n1, vt);
+
   // Currently only supports double precision sqrt
   case Op_SqrtVD: return new SqrtVDNode(n1, vt);
 
diff --git a/src/share/vm/opto/vectornode.hpp b/src/share/vm/opto/vectornode.hpp
--- a/src/share/vm/opto/vectornode.hpp
+++ b/src/share/vm/opto/vectornode.hpp
@@ -309,6 +309,38 @@
   virtual int Opcode() const;
 };
 
+//------------------------------AbsVFNode--------------------------------------
+// Vector Abs float
+class AbsVFNode : public VectorNode {
+ public:
+  AbsVFNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------AbsVDNode--------------------------------------
+// Vector Abs double
+class AbsVDNode : public VectorNode {
+ public:
+  AbsVDNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------NegVFNode--------------------------------------
+// Vector Neg float
+class NegVFNode : public VectorNode {
+ public:
+  NegVFNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------NegVDNode--------------------------------------
+// Vector Neg double
+class NegVDNode : public VectorNode {
+ public:
+  NegVDNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
+  virtual int Opcode() const;
+};
+
 //------------------------------SqrtVDNode--------------------------------------
 // Vector Sqrt double
 class SqrtVDNode : public VectorNode {
diff --git a/test/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/compiler/loopopts/superword/SumRedAbsNeg_Double.java
new file mode 100644
--- /dev/null
+++ b/test/compiler/loopopts/superword/SumRedAbsNeg_Double.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8138583
+ * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
+ * @requires os.arch=="aarch64"
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Double
+ */
+
+public class SumRedAbsNeg_Double
+{
+  public static void main(String[] args) throws Exception {
+    double[] a = new double[256*1024];
+    double[] b = new double[256*1024];
+    double[] c = new double[256*1024];
+    double[] d = new double[256*1024];
+    sumReductionInit(a,b,c);
+    double total = 0;
+    double valid = 3.6028590866691944E19;
+
+    for(int j = 0; j < 2000; j++) {
+      total = sumReductionImplement(a,b,c,d,total);
+    }
+
+    if(total == valid) {
+      System.out.println("Success");
+    } else {
+      System.out.println("Invalid sum of elements variable in total: " + total);
+      System.out.println("Expected value = " + valid);
+      throw new Exception("Failed");
+    }
+  }
+
+  public static void sumReductionInit(
+    double[] a,
+    double[] b,
+    double[] c)
+  {
+    for(int j = 0; j < 1; j++)
+    {
+      for(int i = 0; i < a.length; i++)
+      {
+        a[i] = i * 1 + j;
+        b[i] = i * 1 - j;
+        c[i] = i + j;
+      }
+    }
+  }
+
+  public static double sumReductionImplement(
+    double[] a,
+    double[] b,
+    double[] c,
+    double[] d,
+    double total)
+  {
+    for(int i = 0; i < a.length; i++)
+    {
+      d[i] = Math.abs(-a[i] * -b[i]) + Math.abs(-a[i] * -c[i]) + Math.abs(-b[i] * -c[i]);
+      total += d[i];
+    }
+    return total;
+  }
+
+}
diff --git a/test/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/compiler/loopopts/superword/SumRedAbsNeg_Float.java
new file mode 100644
--- /dev/null
+++ b/test/compiler/loopopts/superword/SumRedAbsNeg_Float.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8138583
+ * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
+ * @requires os.arch=="aarch64"
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=8 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=16 -XX:CompileThresholdScaling=0.1 SumRedAbsNeg_Float
+ */
+
+public class SumRedAbsNeg_Float
+{
+  public static void main(String[] args) throws Exception {
+    float[] a = new float[256*1024];
+    float[] b = new float[256*1024];
+    float[] c = new float[256*1024];
+    float[] d = new float[256*1024];
+    sumReductionInit(a,b,c);
+    float total = 0;
+    float valid = (float)4.611686E18;
+
+    for(int j = 0; j < 2000; j++) {
+      total = sumReductionImplement(a,b,c,d,total);
+    }
+
+    if(total == valid) {
+      System.out.println("Success");
+    } else {
+      System.out.println("Invalid sum of elements variable in total: " + total);
+      System.out.println("Expected value = " + valid);
+      throw new Exception("Failed");
+    }
+  }
+
+  public static void sumReductionInit(
+    float[] a,
+    float[] b,
+    float[] c)
+  {
+    for(int j = 0; j < 1; j++)
+    {
+      for(int i = 0; i < a.length; i++)
+      {
+        a[i] = i * 1 + j;
+        b[i] = i * 1 - j;
+        c[i] = i + j;
+      }
+    }
+  }
+
+  public static float sumReductionImplement(
+    float[] a,
+    float[] b,
+    float[] c,
+    float[] d,
+    float total)
+  {
+    for(int i = 0; i < a.length; i++)
+    {
+      d[i] = Math.abs(-a[i] * -b[i]) + Math.abs(-a[i] * -c[i]) + Math.abs(-b[i] * -c[i]);
+      total += d[i];
+    }
+    return total;
+  }
+
+}