--- old/src/cpu/sparc/vm/sparc.ad 2016-06-28 11:59:40.589192836 +0200 +++ new/src/cpu/sparc/vm/sparc.ad 2016-06-28 11:59:40.493192841 +0200 @@ -720,7 +720,7 @@ return offset; } -static inline jdouble replicate_immI(int con, int count, int width) { +static inline jlong replicate_immI(int con, int count, int width) { // Load a constant replicated "count" times with width "width" assert(count*width == 8 && width <= 4, "sanity"); int bit_width = width * 8; @@ -729,17 +729,15 @@ for (int i = 0; i < count - 1; i++) { val |= (val << bit_width); } - jdouble dval = *((jdouble*) &val); // coerce to double type - return dval; + return val; } -static inline jdouble replicate_immF(float con) { +static inline jlong replicate_immF(float con) { // Replicate float con 2 times and pack into vector. int val = *((int*)&con); jlong lval = val; lval = (lval << 32) | (lval & 0xFFFFFFFFl); - jdouble dval = *((jdouble*) &lval); // coerce to double type - return dval; + return lval; } // Standard Sparc opcode form2 field breakdown --- old/src/cpu/x86/vm/x86.ad 2016-06-28 11:59:41.273192804 +0200 +++ new/src/cpu/x86/vm/x86.ad 2016-06-28 11:59:41.133192811 +0200 @@ -2131,7 +2131,7 @@ return size+offset_size; } -static inline jfloat replicate4_imm(int con, int width) { +static inline jint replicate4_imm(int con, int width) { // Load a constant of "width" (in bytes) and replicate it to fill 32bit. assert(width == 1 || width == 2, "only byte or short types here"); int bit_width = width * 8; @@ -2141,11 +2141,10 @@ val |= (val << bit_width); bit_width <<= 1; } - jfloat fval = *((jfloat*) &val); // coerce to float type - return fval; + return val; } -static inline jdouble replicate8_imm(int con, int width) { +static inline jlong replicate8_imm(int con, int width) { // Load a constant of "width" (in bytes) and replicate it to fill 64bit. assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); int bit_width = width * 8; @@ -2155,8 +2154,7 @@ val |= (val << bit_width); bit_width <<= 1; } - jdouble dval = *((jdouble*) &val); // coerce to double type - return dval; + return val; } #ifndef PRODUCT --- old/src/share/vm/asm/assembler.hpp 2016-06-28 11:59:41.949192773 +0200 +++ new/src/share/vm/asm/assembler.hpp 2016-06-28 11:59:41.841192778 +0200 @@ -337,6 +337,15 @@ // // We must remember the code section (insts or stubs) in c1 // so we can reset to the proper section in end_a_const(). + address int_constant(jint c) { + CodeSection* c1 = _code_section; + address ptr = start_a_const(sizeof(c), sizeof(c)); + if (ptr != NULL) { + emit_int32(c); + end_a_const(c1); + } + return ptr; + } address long_constant(jlong c) { CodeSection* c1 = _code_section; address ptr = start_a_const(sizeof(c), sizeof(c)); --- old/src/share/vm/opto/compile.cpp 2016-06-28 11:59:42.597192743 +0200 +++ new/src/share/vm/opto/compile.cpp 2016-06-28 11:59:42.477192748 +0200 @@ -3814,6 +3814,7 @@ if (can_be_reused() != other.can_be_reused()) return false; // For floating point values we compare the bit pattern. switch (type()) { + case T_INT: case T_FLOAT: return (_v._value.i == other._v._value.i); case T_LONG: case T_DOUBLE: return (_v._value.j == other._v._value.j); @@ -3828,6 +3829,7 @@ static int type_to_size_in_bytes(BasicType t) { switch (t) { + case T_INT: return sizeof(jint ); case T_LONG: return sizeof(jlong ); case T_FLOAT: return sizeof(jfloat ); case T_DOUBLE: return sizeof(jdouble); @@ -3896,6 +3898,7 @@ Constant con = _constants.at(i); address constant_addr = NULL; switch (con.type()) { + case T_INT: constant_addr = _masm.int_constant( con.get_jint() ); break; case T_LONG: constant_addr = _masm.long_constant( con.get_jlong() ); break; case T_FLOAT: constant_addr = _masm.float_constant( con.get_jfloat() ); break; case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break; --- old/src/share/vm/opto/compile.hpp 2016-06-28 11:59:43.245192712 +0200 +++ new/src/share/vm/opto/compile.hpp 2016-06-28 11:59:43.133192718 +0200 @@ -264,6 +264,7 @@ BasicType type() const { return _type; } + jint get_jint() const { return _v._value.i; } jlong get_jlong() const { return _v._value.j; } jfloat get_jfloat() const { return _v._value.f; } jdouble get_jdouble() const { return _v._value.d; } @@ -320,6 +321,14 @@ Constant add(MachConstantNode* n, BasicType type, jvalue value); Constant add(Metadata* metadata); Constant add(MachConstantNode* n, MachOper* oper); + Constant add(MachConstantNode* n, jint i) { + jvalue value; value.i = i; + return add(n, T_INT, value); + } + Constant add(MachConstantNode* n, jlong j) { + jvalue value; value.j = j; + return add(n, T_LONG, value); + } Constant add(MachConstantNode* n, jfloat f) { jvalue value; value.f = f; return add(n, T_FLOAT, value); --- /dev/null 2016-06-28 08:01:07.973320956 +0200 +++ new/test/compiler/vectorization/TestNaNVector.java 2016-06-28 11:59:43.769192688 +0200 @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8160425 + * @summary Test vectorization with a signalling NaN. + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-OptimizeFill TestNaNVector + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-OptimizeFill -XX:MaxVectorSize=4 TestNaNVector + */ +public class TestNaNVector { + private char[] array; + private static final int LEN = 1024; + + public static void main(String args[]) { + TestNaNVector test = new TestNaNVector(); + // Check double precision NaN + for (int i = 0; i < 10_000; ++i) { + test.vectorizeNaNDP(); + } + System.out.println("Checking double precision Nan"); + test.checkResult(0xfff7); + + // Check single precision NaN + for (int i = 0; i < 10_000; ++i) { + test.vectorizeNaNSP(); + } + System.out.println("Checking single precision Nan"); + test.checkResult(0xff80); + } + + public TestNaNVector() { + array = new char[LEN]; + } + + public void vectorizeNaNDP() { + // This loop will be vectorized and the array store will be replaced by + // a 64-bit vector store to four subsequent array elements. The vector + // should look like this '0xffa0ffa0ffa0ffa0' and is read from the constant + // table. However, in floating point arithmetic this is a signalling NaN + // which may be converted to a quiet NaN when processed by the x87 FPU. + // If the signalling bit is set, the vector ends up in the constant table + // as '0xffa0ffa0ffa0ffa0' which leads to an incorrect result. + for (int i = 0; i < LEN; ++i) { + array[i] = 0xfff7; + } + } + + public void vectorizeNaNSP() { + // Same as above but with single precision + for (int i = 0; i < LEN; ++i) { + array[i] = 0xff80; + } + } + + public void checkResult(int expected) { + for (int i = 0; i < LEN; ++i) { + if (array[i] != expected) { + throw new RuntimeException("Invalid result: array[" + i + "] = " + (int)array[i] + " != " + expected); + } + } + } +} +