--- old/src/share/vm/interpreter/interpreter.cpp 2015-08-07 15:41:38.130585012 +0200 +++ new/src/share/vm/interpreter/interpreter.cpp 2015-08-07 15:41:37.990585019 +0200 @@ -234,7 +234,15 @@ case vmIntrinsics::_updateByteBufferCRC32 : return java_util_zip_CRC32_updateByteBuffer; } } -#endif + + switch(m->intrinsic_id()) { + case vmIntrinsics::_intBitsToFloat: return java_lang_Float_intBitsToFloat; + case vmIntrinsics::_floatToRawIntBits: return java_lang_Float_floatToRawIntBits; + case vmIntrinsics::_longBitsToDouble: return java_lang_Double_longBitsToDouble; + case vmIntrinsics::_doubleToRawLongBits: return java_lang_Double_doubleToRawLongBits; + } + +#endif // CC_INTERP // Native method? // Note: This test must come _before_ the test for intrinsic @@ -559,6 +567,25 @@ : // fall thru case Interpreter::java_util_zip_CRC32_updateByteBuffer : entry_point = generate_CRC32_updateBytes_entry(kind); break; +#if defined(TARGET_ARCH_x86) && !defined(_LP64) + // On x86_32 platforms, a special entry is generated for the following four methods. + // On other platforms the normal entry is used to enter these methods. + case Interpreter::java_lang_Float_intBitsToFloat + : entry_point = generate_Float_intBitsToFloat_entry(); break; + case Interpreter::java_lang_Float_floatToRawIntBits + : entry_point = generate_Float_floatToRawIntBits_entry(); break; + case Interpreter::java_lang_Double_longBitsToDouble + : entry_point = generate_Double_longBitsToDouble_entry(); break; + case Interpreter::java_lang_Double_doubleToRawLongBits + : entry_point = generate_Double_doubleToRawLongBits_entry(); break; +#else + case Interpreter::java_lang_Float_intBitsToFloat: + case Interpreter::java_lang_Float_floatToRawIntBits: + case Interpreter::java_lang_Double_longBitsToDouble: + case Interpreter::java_lang_Double_doubleToRawLongBits: + entry_point = generate_native_entry(false); + break; +#endif // defined(TARGET_ARCH_x86) && !defined(_LP64) #endif // CC_INTERP default: fatal(err_msg("unexpected method kind: %d", kind)); --- old/src/cpu/x86/vm/templateTable_x86.cpp 2015-08-07 15:41:38.170585010 +0200 +++ new/src/cpu/x86/vm/templateTable_x86.cpp 2015-08-07 15:41:37.966585020 +0200 @@ -349,53 +349,60 @@ void TemplateTable::fconst(int value) { transition(vtos, ftos); + if (UseSSE >= 1) { + static float one = 1.0f, two = 2.0f; + switch (value) { + case 0: + __ xorps(xmm0, xmm0); + break; + case 1: + __ movflt(xmm0, ExternalAddress((address) &one)); + break; + case 2: + __ movflt(xmm0, ExternalAddress((address) &two)); + break; + default: + ShouldNotReachHere(); + break; + } + } else { #ifdef _LP64 - static float one = 1.0f, two = 2.0f; - switch (value) { - case 0: - __ xorps(xmm0, xmm0); - break; - case 1: - __ movflt(xmm0, ExternalAddress((address) &one)); - break; - case 2: - __ movflt(xmm0, ExternalAddress((address) &two)); - break; - default: ShouldNotReachHere(); - break; - } #else - if (value == 0) { __ fldz(); - } else if (value == 1) { __ fld1(); - } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here - } else { ShouldNotReachHere(); + if (value == 0) { __ fldz(); + } else if (value == 1) { __ fld1(); + } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here + } else { ShouldNotReachHere(); + } +#endif // _LP64 } -#endif } void TemplateTable::dconst(int value) { transition(vtos, dtos); + if (UseSSE >= 2) { + static double one = 1.0; + switch (value) { + case 0: + __ xorpd(xmm0, xmm0); + break; + case 1: + __ movdbl(xmm0, ExternalAddress((address) &one)); + break; + default: + ShouldNotReachHere(); + break; + } + } else { #ifdef _LP64 - static double one = 1.0; - switch (value) { - case 0: - __ xorpd(xmm0, xmm0); - break; - case 1: - __ movdbl(xmm0, ExternalAddress((address) &one)); - break; - default: ShouldNotReachHere(); - break; - } - #else - if (value == 0) { __ fldz(); - } else if (value == 1) { __ fld1(); - } else { ShouldNotReachHere(); - } + if (value == 0) { __ fldz(); + } else if (value == 1) { __ fld1(); + } else { ShouldNotReachHere(); + } #endif + } } void TemplateTable::bipush() { @@ -454,8 +461,7 @@ __ jccb(Assembler::notEqual, notFloat); // ftos - LP64_ONLY(__ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset))); - NOT_LP64(__ fld_s( Address(rcx, rbx, Address::times_ptr, base_offset))); + __ load_float(Address(rcx, rbx, Address::times_ptr, base_offset)); __ push(ftos); __ jmp(Done); @@ -522,8 +528,7 @@ __ jccb(Assembler::notEqual, Long); // dtos - LP64_ONLY(__ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset))); - NOT_LP64(__ fld_d( Address(rcx, rbx, Address::times_ptr, base_offset))); + __ load_double(Address(rcx, rbx, Address::times_ptr, base_offset)); __ push(dtos); __ jmpb(Done); @@ -617,15 +622,13 @@ void TemplateTable::fload() { transition(vtos, ftos); locals_index(rbx); - LP64_ONLY(__ movflt(xmm0, faddress(rbx))); - NOT_LP64(__ fld_s(faddress(rbx))); + __ load_float(faddress(rbx)); } void TemplateTable::dload() { transition(vtos, dtos); locals_index(rbx); - LP64_ONLY(__ movdbl(xmm0, daddress(rbx))); - NOT_LP64(__ fld_d(daddress(rbx))); + __ load_double(daddress(rbx)); } void TemplateTable::aload() { @@ -657,15 +660,13 @@ void TemplateTable::wide_fload() { transition(vtos, ftos); locals_index_wide(rbx); - LP64_ONLY(__ movflt(xmm0, faddress(rbx))); - NOT_LP64(__ fld_s(faddress(rbx))); + __ load_float(faddress(rbx)); } void TemplateTable::wide_dload() { transition(vtos, dtos); locals_index_wide(rbx); - LP64_ONLY(__ movdbl(xmm0, daddress(rbx))); - NOT_LP64(__ fld_d(daddress(rbx))); + __ load_double(daddress(rbx)); } void TemplateTable::wide_aload() { @@ -726,10 +727,9 @@ // rax: index // rdx: array index_check(rdx, rax); // kills rbx - LP64_ONLY(__ movflt(xmm0, Address(rdx, rax, - Address::times_4, - arrayOopDesc::base_offset_in_bytes(T_FLOAT)))); - NOT_LP64(__ fld_s(Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)))); + __ load_float(Address(rdx, rax, + Address::times_4, + arrayOopDesc::base_offset_in_bytes(T_FLOAT))); } void TemplateTable::daload() { @@ -737,10 +737,9 @@ // rax: index // rdx: array index_check(rdx, rax); // kills rbx - LP64_ONLY(__ movdbl(xmm0, Address(rdx, rax, - Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_DOUBLE)))); - NOT_LP64(__ fld_d(Address(rdx, rax, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)))); + __ load_double(Address(rdx, rax, + Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_DOUBLE))); } void TemplateTable::aaload() { @@ -807,14 +806,12 @@ void TemplateTable::fload(int n) { transition(vtos, ftos); - LP64_ONLY(__ movflt(xmm0, faddress(n))); - NOT_LP64(__ fld_s(faddress(n))); + __ load_float(faddress(n)); } void TemplateTable::dload(int n) { transition(vtos, dtos); - LP64_ONLY(__ movdbl(xmm0, daddress(n))); - NOT_LP64(__ fld_d(daddress(n))); + __ load_double(daddress(n)); } void TemplateTable::aload(int n) { @@ -919,15 +916,13 @@ void TemplateTable::fstore() { transition(ftos, vtos); locals_index(rbx); - LP64_ONLY(__ movflt(faddress(rbx), xmm0)); - NOT_LP64(__ fstp_s(faddress(rbx))); + __ store_float(faddress(rbx)); } void TemplateTable::dstore() { transition(dtos, vtos); locals_index(rbx); - LP64_ONLY(__ movdbl(daddress(rbx), xmm0)); - NOT_LP64(__ fstp_d(daddress(rbx))); + __ store_double(daddress(rbx)); } void TemplateTable::astore() { @@ -956,7 +951,7 @@ void TemplateTable::wide_fstore() { #ifdef _LP64 transition(vtos, vtos); - __ pop_f(); + __ pop_f(xmm0); locals_index_wide(rbx); __ movflt(faddress(rbx), xmm0); #else @@ -967,7 +962,7 @@ void TemplateTable::wide_dstore() { #ifdef _LP64 transition(vtos, vtos); - __ pop_d(); + __ pop_d(xmm0); locals_index_wide(rbx); __ movdbl(daddress(rbx), xmm0); #else @@ -1011,29 +1006,21 @@ void TemplateTable::fastore() { transition(ftos, vtos); __ pop_i(rbx); - // xmm0: value + // value is in UseSSE >= 1 ? xmm0 : ST(0) // rbx: index // rdx: array index_check(rdx, rbx); // prefer index in rbx - LP64_ONLY(__ movflt(Address(rdx, rbx, - Address::times_4, - arrayOopDesc::base_offset_in_bytes(T_FLOAT)), - xmm0)); - NOT_LP64(__ fstp_s(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)))); + __ store_float(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT))); } void TemplateTable::dastore() { transition(dtos, vtos); __ pop_i(rbx); - // xmm0: value + // value is in UseSSE >= 2 ? xmm0 : ST(0) // rbx: index // rdx: array index_check(rdx, rbx); // prefer index in rbx - LP64_ONLY(__ movdbl(Address(rdx, rbx, - Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), - xmm0)); - NOT_LP64(__ fstp_d(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)))); + __ store_double(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))); } void TemplateTable::aastore() { @@ -1134,14 +1121,12 @@ void TemplateTable::fstore(int n) { transition(ftos, vtos); - LP64_ONLY(__ movflt(faddress(n), xmm0)); - NOT_LP64(__ fstp_s(faddress(n))); + __ store_float(faddress(n)); } void TemplateTable::dstore(int n) { transition(dtos, vtos); - LP64_ONLY(__ movdbl(daddress(n), xmm0)); - NOT_LP64(__ fstp_d(daddress(n))); + __ store_double(daddress(n)); } @@ -1425,82 +1410,127 @@ void TemplateTable::fop2(Operation op) { transition(ftos, ftos); + + if (UseSSE >= 1) { + switch (op) { + case add: + __ addss(xmm0, at_rsp()); + __ addptr(rsp, Interpreter::stackElementSize); + break; + case sub: + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ subss(xmm0, xmm1); + break; + case mul: + __ mulss(xmm0, at_rsp()); + __ addptr(rsp, Interpreter::stackElementSize); + break; + case div: + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ divss(xmm0, xmm1); + break; + case rem: + // On x86_64 platforms the SharedRuntime::frem method is called to perform the + // modulo operation. The frem method calls the function + // double fmod(double x, double y) in math.h. The documentation of fmod states: + // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN + // (signalling or quiet) is returned. + // + // On x86_32 platforms the FPU is used to perform the modulo operation. The + // reason is that on 32-bit Windows the sign of modulo operations diverges from + // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f). + // The fprem instruction used on x86_32 is functionally equivalent to + // SharedRuntime::frem in that it returns a NaN. +#ifdef _LP64 + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); +#else + __ push_f(xmm0); + __ pop_f(); + __ fld_s(at_rsp()); + __ fremr(rax); + __ f2ieee(); + __ pop(rax); // pop second operand off the stack + __ push_f(); + __ pop_f(xmm0); +#endif + break; + default: + ShouldNotReachHere(); + break; + } + } else { #ifdef _LP64 - switch (op) { - case add: - __ addss(xmm0, at_rsp()); - __ addptr(rsp, Interpreter::stackElementSize); - break; - case sub: - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ subss(xmm0, xmm1); - break; - case mul: - __ mulss(xmm0, at_rsp()); - __ addptr(rsp, Interpreter::stackElementSize); - break; - case div: - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ divss(xmm0, xmm1); - break; - case rem: - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); - break; - default: ShouldNotReachHere(); - break; - } #else - switch (op) { + switch (op) { case add: __ fadd_s (at_rsp()); break; case sub: __ fsubr_s(at_rsp()); break; case mul: __ fmul_s (at_rsp()); break; case div: __ fdivr_s(at_rsp()); break; case rem: __ fld_s (at_rsp()); __ fremr(rax); break; default : ShouldNotReachHere(); + } + __ f2ieee(); + __ pop(rax); // pop second operand off the stack +#endif // _LP64 } - __ f2ieee(); - __ pop(rax); // pop float thing off -#endif } void TemplateTable::dop2(Operation op) { transition(dtos, dtos); + if (UseSSE >= 2) { + switch (op) { + case add: + __ addsd(xmm0, at_rsp()); + __ addptr(rsp, 2 * Interpreter::stackElementSize); + break; + case sub: + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ subsd(xmm0, xmm1); + break; + case mul: + __ mulsd(xmm0, at_rsp()); + __ addptr(rsp, 2 * Interpreter::stackElementSize); + break; + case div: + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ divsd(xmm0, xmm1); + break; + case rem: + // Similar to fop2(), the modulo operation is performed using the + // SharedRuntime::drem method (on x86_64 platforms) or using the + // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2(). +#ifdef _LP64 + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); +#else + __ push_d(xmm0); + __ pop_d(); + __ fld_d(at_rsp()); + __ fremr(rax); + __ d2ieee(); + __ pop(rax); + __ pop(rdx); + __ push_d(); + __ pop_d(xmm0); +#endif + break; + default: + ShouldNotReachHere(); + break; + } + } else { #ifdef _LP64 - switch (op) { - case add: - __ addsd(xmm0, at_rsp()); - __ addptr(rsp, 2 * Interpreter::stackElementSize); - break; - case sub: - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ subsd(xmm0, xmm1); - break; - case mul: - __ mulsd(xmm0, at_rsp()); - __ addptr(rsp, 2 * Interpreter::stackElementSize); - break; - case div: - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ divsd(xmm0, xmm1); - break; - case rem: - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); - break; - default: ShouldNotReachHere(); - break; - } #else - switch (op) { + switch (op) { case add: __ fadd_d (at_rsp()); break; case sub: __ fsubr_d(at_rsp()); break; case mul: { @@ -1543,12 +1573,13 @@ } case rem: __ fld_d (at_rsp()); __ fremr(rax); break; default : ShouldNotReachHere(); - } - __ d2ieee(); - // Pop double precision number from rsp. - __ pop(rax); - __ pop(rdx); + } + __ d2ieee(); + // Pop double precision number from rsp. + __ pop(rax); + __ pop(rdx); #endif + } } void TemplateTable::ineg() { @@ -1562,7 +1593,6 @@ NOT_LP64(__ lneg(rdx, rax)); } -#ifdef _LP64 // Note: 'double' and 'long long' have 32-bits alignment on x86. static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { // Use the expression (adr)&(~0xF) to provide 128-bits aligned address @@ -1577,26 +1607,30 @@ // Buffer for 128-bits masks used by SSE instructions. static jlong float_signflip_pool[2*2]; static jlong double_signflip_pool[2*2]; -#endif void TemplateTable::fneg() { transition(ftos, ftos); -#ifdef _LP64 - static jlong *float_signflip = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000); - __ xorps(xmm0, ExternalAddress((address) float_signflip)); -#else - __ fchs(); -#endif + if (UseSSE >= 1) { + static jlong *float_signflip = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000); + __ xorps(xmm0, ExternalAddress((address) float_signflip)); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(__ fchs()); + } } void TemplateTable::dneg() { transition(dtos, dtos); + if (UseSSE >= 2) { + static jlong *double_signflip = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000); + __ xorpd(xmm0, ExternalAddress((address) double_signflip)); + } else { #ifdef _LP64 - static jlong *double_signflip = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000); - __ xorpd(xmm0, ExternalAddress((address) double_signflip)); + ShouldNotReachHere(); #else - __ fchs(); + __ fchs(); #endif + } } void TemplateTable::iinc() { @@ -1798,18 +1832,26 @@ __ extend_sign(rdx, rax); break; case Bytecodes::_i2f: - __ push(rax); // store int on tos - __ fild_s(at_rsp()); // load int to ST0 - __ f2ieee(); // truncate to float size - __ pop(rcx); // adjust rsp + if (UseSSE >= 1) { + __ cvtsi2ssl(xmm0, rax); + } else { + __ push(rax); // store int on tos + __ fild_s(at_rsp()); // load int to ST0 + __ f2ieee(); // truncate to float size + __ pop(rcx); // adjust rsp + } break; case Bytecodes::_i2d: + if (UseSSE >= 2) { + __ cvtsi2sdl(xmm0, rax); + } else { __ push(rax); // add one slot for d2ieee() __ push(rax); // store int on tos __ fild_s(at_rsp()); // load int to ST0 __ d2ieee(); // truncate to double size __ pop(rcx); // adjust rsp __ pop(rcx); + } break; case Bytecodes::_i2b: __ shll(rax, 24); // truncate upper 24 bits @@ -1829,50 +1871,102 @@ /* nothing to do */ break; case Bytecodes::_l2f: + // On 64-bit platforms, the cvtsi2ssq instruction is used to convert + // 64-bit long values to floats. On 32-bit platforms it is not possible + // to use that instruction with 64-bit operands, therefore the FPU is + // used to perform the conversion. __ push(rdx); // store long on tos __ push(rax); __ fild_d(at_rsp()); // load long to ST0 __ f2ieee(); // truncate to float size __ pop(rcx); // adjust rsp __ pop(rcx); + if (UseSSE >= 1) { + __ push_f(); + __ pop_f(xmm0); + } break; case Bytecodes::_l2d: + // On 32-bit platforms the FPU is used for conversion because on + // 32-bit platforms it is not not possible to use the cvtsi2sdq + // instruction with 64-bit operands. __ push(rdx); // store long on tos __ push(rax); __ fild_d(at_rsp()); // load long to ST0 __ d2ieee(); // truncate to double size __ pop(rcx); // adjust rsp __ pop(rcx); + if (UseSSE >= 2) { + __ push_d(); + __ pop_d(xmm0); + } break; case Bytecodes::_f2i: - __ push(rcx); // reserve space for argument - __ fstp_s(at_rsp()); // pass float argument on stack + // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs + // as it returns 0 for any NaN. + if (UseSSE >= 1) { + __ push_f(xmm0); + } else { + __ push(rcx); // reserve space for argument + __ fstp_s(at_rsp()); // pass float argument on stack + } __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); break; case Bytecodes::_f2l: - __ push(rcx); // reserve space for argument - __ fstp_s(at_rsp()); // pass float argument on stack + // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs + // as it returns 0 for any NaN. + if (UseSSE >= 1) { + __ push_f(xmm0); + } else { + __ push(rcx); // reserve space for argument + __ fstp_s(at_rsp()); // pass float argument on stack + } __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); break; case Bytecodes::_f2d: - /* nothing to do */ + if (UseSSE < 1) { + /* nothing to do */ + } else if (UseSSE == 1) { + __ push_f(xmm0); + __ pop_f(); + } else { // UseSSE >= 2 + __ cvtss2sd(xmm0, xmm0); + } break; case Bytecodes::_d2i: - __ push(rcx); // reserve space for argument - __ push(rcx); - __ fstp_d(at_rsp()); // pass double argument on stack + if (UseSSE >= 2) { + __ push_d(xmm0); + } else { + __ push(rcx); // reserve space for argument + __ push(rcx); + __ fstp_d(at_rsp()); // pass double argument on stack + } __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2); break; case Bytecodes::_d2l: - __ push(rcx); // reserve space for argument - __ push(rcx); - __ fstp_d(at_rsp()); // pass double argument on stack + if (UseSSE >= 2) { + __ push_d(xmm0); + } else { + __ push(rcx); // reserve space for argument + __ push(rcx); + __ fstp_d(at_rsp()); // pass double argument on stack + } __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2); break; case Bytecodes::_d2f: - __ push(rcx); // reserve space for f2ieee() - __ f2ieee(); // truncate to float size - __ pop(rcx); // adjust rsp + if (UseSSE <= 1) { + __ push(rcx); // reserve space for f2ieee() + __ f2ieee(); // truncate to float size + __ pop(rcx); // adjust rsp + if (UseSSE == 1) { + // The cvtsd2ss instruction is not available if UseSSE==1, therefore + // the conversion is performed using the FPU in this case. + __ push_f(); + __ pop_f(xmm0); + } + } else { // UseSSE >= 2 + __ cvtsd2ss(xmm0, xmm0); + } break; default : ShouldNotReachHere(); @@ -1901,42 +1995,47 @@ } void TemplateTable::float_cmp(bool is_float, int unordered_result) { -#ifdef _LP64 - Label done; - if (is_float) { - // XXX get rid of pop here, use ... reg, mem32 - __ pop_f(xmm1); - __ ucomiss(xmm1, xmm0); - } else { - // XXX get rid of pop here, use ... reg, mem64 - __ pop_d(xmm1); - __ ucomisd(xmm1, xmm0); - } - if (unordered_result < 0) { - __ movl(rax, -1); - __ jccb(Assembler::parity, done); - __ jccb(Assembler::below, done); - __ setb(Assembler::notEqual, rdx); - __ movzbl(rax, rdx); + if ((is_float && UseSSE >= 1) || + (!is_float && UseSSE >= 2)) { + Label done; + if (is_float) { + // XXX get rid of pop here, use ... reg, mem32 + __ pop_f(xmm1); + __ ucomiss(xmm1, xmm0); + } else { + // XXX get rid of pop here, use ... reg, mem64 + __ pop_d(xmm1); + __ ucomisd(xmm1, xmm0); + } + if (unordered_result < 0) { + __ movl(rax, -1); + __ jccb(Assembler::parity, done); + __ jccb(Assembler::below, done); + __ setb(Assembler::notEqual, rdx); + __ movzbl(rax, rdx); + } else { + __ movl(rax, 1); + __ jccb(Assembler::parity, done); + __ jccb(Assembler::above, done); + __ movl(rax, 0); + __ jccb(Assembler::equal, done); + __ decrementl(rax); + } + __ bind(done); } else { - __ movl(rax, 1); - __ jccb(Assembler::parity, done); - __ jccb(Assembler::above, done); - __ movl(rax, 0); - __ jccb(Assembler::equal, done); - __ decrementl(rax); - } - __ bind(done); +#ifdef _LP64 + ShouldNotReachHere(); #else - if (is_float) { - __ fld_s(at_rsp()); - } else { - __ fld_d(at_rsp()); - __ pop(rdx); + if (is_float) { + __ fld_s(at_rsp()); + } else { + __ fld_d(at_rsp()); + __ pop(rdx); + } + __ pop(rcx); + __ fcmp2int(rax, unordered_result < 0); +#endif // _LP64 } - __ pop(rcx); - __ fcmp2int(rax, unordered_result < 0); -#endif } void TemplateTable::branch(bool is_jsr, bool is_wide) { @@ -2747,8 +2846,7 @@ __ jcc(Assembler::notEqual, notFloat); // ftos - LP64_ONLY(__ movflt(xmm0, field)); - NOT_LP64(__ fld_s(field)); + __ load_float(field); __ push(ftos); // Rewrite bytecode to be faster if (!is_static && rc == may_rewrite) { @@ -2762,8 +2860,7 @@ __ jcc(Assembler::notEqual, notDouble); #endif // dtos - LP64_ONLY(__ movdbl(xmm0, field)); - NOT_LP64(__ fld_d(field)); + __ load_double(field); __ push(dtos); // Rewrite bytecode to be faster if (!is_static && rc == may_rewrite) { @@ -3045,8 +3142,7 @@ { __ pop(ftos); if (!is_static) pop_and_check_object(obj); - NOT_LP64( __ fstp_s(field);) - LP64_ONLY( __ movflt(field, xmm0);) + __ store_float(field); if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no); } @@ -3063,8 +3159,7 @@ { __ pop(dtos); if (!is_static) pop_and_check_object(obj); - NOT_LP64( __ fstp_d(field);) - LP64_ONLY( __ movdbl(field, xmm0);) + __ store_double(field); if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no); } @@ -3122,8 +3217,8 @@ case Bytecodes::_fast_sputfield: // fall through case Bytecodes::_fast_cputfield: // fall through case Bytecodes::_fast_iputfield: __ push_i(rax); break; - case Bytecodes::_fast_dputfield: __ push_d(); break; - case Bytecodes::_fast_fputfield: __ push_f(); break; + case Bytecodes::_fast_dputfield: __ push(dtos); break; + case Bytecodes::_fast_fputfield: __ push(ftos); break; case Bytecodes::_fast_lputfield: __ push_l(rax); break; default: @@ -3146,8 +3241,8 @@ case Bytecodes::_fast_sputfield: // fall through case Bytecodes::_fast_cputfield: // fall through case Bytecodes::_fast_iputfield: __ pop_i(rax); break; - case Bytecodes::_fast_dputfield: __ pop_d(); break; - case Bytecodes::_fast_fputfield: __ pop_f(); break; + case Bytecodes::_fast_dputfield: __ pop(dtos); break; + case Bytecodes::_fast_fputfield: __ pop(ftos); break; case Bytecodes::_fast_lputfield: __ pop_l(rax); break; } __ bind(L2); @@ -3211,12 +3306,10 @@ __ movw(field, rax); break; case Bytecodes::_fast_fputfield: - NOT_LP64( __ fstp_s(field); ) - LP64_ONLY( __ movflt(field, xmm0);) + __ store_float(field); break; case Bytecodes::_fast_dputfield: - NOT_LP64( __ fstp_d(field); ) - LP64_ONLY( __ movdbl(field, xmm0);) + __ store_double(field); break; default: ShouldNotReachHere(); @@ -3301,12 +3394,10 @@ __ load_unsigned_short(rax, field); break; case Bytecodes::_fast_fgetfield: - LP64_ONLY(__ movflt(xmm0, field)); - NOT_LP64(__ fld_s(field)); + __ load_float(field); break; case Bytecodes::_fast_dgetfield: - LP64_ONLY(__ movdbl(xmm0, field)); - NOT_LP64(__ fld_d(field)); + __ load_double(field); break; default: ShouldNotReachHere(); @@ -3346,8 +3437,7 @@ __ verify_oop(rax); break; case ftos: - LP64_ONLY(__ movflt(xmm0, field)); - NOT_LP64(__ fld_s(field)); + __ load_float(field); break; default: ShouldNotReachHere(); --- old/src/share/vm/compiler/compileBroker.cpp 2015-08-07 15:41:38.202585009 +0200 +++ new/src/share/vm/compiler/compileBroker.cpp 2015-08-07 15:41:37.922585022 +0200 @@ -1399,6 +1399,28 @@ // do the compilation if (method->is_native()) { if (!PreferInterpreterNativeStubs || method->is_method_handle_intrinsic()) { + // The following native methods: + // + // java.lang.Float.intBitsToFloat + // java.lang.Float.floatToRawIntBits + // java.lang.Double.longBitsToDouble + // java.lang.Double.doubleToRawLongBits + // + // are called through the interpreter even if interpreter native stubs + // are not preferred (i.e., calling through adapter handlers is preferred). + // The reason is that on x86_32 signaling NaNs (sNaNs) are not preserved + // if the version of the methods from the native libraries is called. + // As the interpreter and the C2-intrinsified version of the methods preserves + // sNaNs, that would result in an inconsistent way of handling of sNaNs. + if ((UseSSE >= 1 && + (method->intrinsic_id() == vmIntrinsics::_intBitsToFloat || + method->intrinsic_id() == vmIntrinsics::_floatToRawIntBits)) || + (UseSSE >= 2 && + (method->intrinsic_id() == vmIntrinsics::_longBitsToDouble || + method->intrinsic_id() == vmIntrinsics::_doubleToRawLongBits))) { + return NULL; + } + // To properly handle the appendix argument for out-of-line calls we are using a small trampoline that // pops off the appendix argument and jumps to the target (see gen_special_dispatch in SharedRuntime). // --- old/src/cpu/x86/vm/interp_masm_x86.hpp 2015-08-07 15:41:38.286585005 +0200 +++ new/src/cpu/x86/vm/interp_masm_x86.hpp 2015-08-07 15:41:38.066585015 +0200 @@ -140,20 +140,20 @@ void push_ptr(Register r = rax); void push_i(Register r = rax); + void push_f(XMMRegister r); + void pop_f(XMMRegister r); + void pop_d(XMMRegister r); + void push_d(XMMRegister r); #ifdef _LP64 void pop_l(Register r = rax); - void pop_f(XMMRegister r = xmm0); - void pop_d(XMMRegister r = xmm0); void push_l(Register r = rax); - void push_f(XMMRegister r = xmm0); - void push_d(XMMRegister r = xmm0); #else void pop_l(Register lo = rax, Register hi = rdx); void pop_f(); void pop_d(); void push_l(Register lo = rax, Register hi = rdx); - void push_d(Register r = rax); + void push_d(); void push_f(); #endif // _LP64 --- old/src/cpu/x86/vm/macroAssembler_x86.hpp 2015-08-07 15:41:38.286585005 +0200 +++ new/src/cpu/x86/vm/macroAssembler_x86.hpp 2015-08-07 15:41:38.022585017 +0200 @@ -470,6 +470,22 @@ // Pop ST (ffree & fincstp combined) void fpop(); + // Load float value from 'address'. If UseSSE >= 1, the value is loaded into + // register xmm0. Otherwise, the value is loaded onto the FPU stack. + void load_float(Address src); + + // Store float value to 'address'. If UseSSE >= 1, the value is stored + // from register xmm0. Otherwise, the value is stored from the FPU stack. + void store_float(Address dst); + + // Load double value from 'address'. If UseSSE >= 2, the value is loaded into + // register xmm0. Otherwise, the value is loaded onto the FPU stack. + void load_double(Address src); + + // Store double value to 'address'. If UseSSE >= 2, the value is stored + // from register xmm0. Otherwise, the value is stored from the FPU stack. + void store_double(Address dst); + // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack void push_fTOS(); --- old/src/cpu/x86/vm/assembler_x86.cpp 2015-08-07 15:41:38.286585005 +0200 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2015-08-07 15:41:38.014585018 +0200 @@ -1674,6 +1674,13 @@ emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true); } +void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true); + emit_int8(0x2A); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); @@ -6604,13 +6611,6 @@ emit_operand(dst, src); } -void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true); - emit_int8(0x2A); - emit_int8((unsigned char)(0xC0 | encode)); -} - void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); if (VM_Version::supports_evex()) { --- old/src/cpu/x86/vm/interp_masm_x86.cpp 2015-08-07 15:41:38.330585003 +0200 +++ new/src/cpu/x86/vm/interp_masm_x86.cpp 2015-08-07 15:41:38.126585012 +0200 @@ -355,8 +355,8 @@ case ctos: // fall through case stos: // fall through case itos: movl(rax, val_addr); break; - case ftos: movflt(xmm0, val_addr); break; - case dtos: movdbl(xmm0, val_addr); break; + case ftos: load_float(val_addr); break; + case dtos: load_double(val_addr); break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -376,8 +376,8 @@ case ctos: // fall through case stos: // fall through case itos: movl(rax, val_addr); break; - case ftos: fld_s(val_addr); break; - case dtos: fld_d(val_addr); break; + case ftos: load_float(val_addr); break; + case dtos: load_double(val_addr); break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -578,16 +578,9 @@ push(r); } -#ifdef _LP64 -void InterpreterMacroAssembler::pop_i(Register r) { - // XXX can't use pop currently, upper half non clean - movl(r, Address(rsp, 0)); - addptr(rsp, wordSize); -} - -void InterpreterMacroAssembler::pop_l(Register r) { - movq(r, Address(rsp, 0)); - addptr(rsp, 2 * Interpreter::stackElementSize); +void InterpreterMacroAssembler::push_f(XMMRegister r) { + subptr(rsp, wordSize); + movflt(Address(rsp, 0), r); } void InterpreterMacroAssembler::pop_f(XMMRegister r) { @@ -595,24 +588,31 @@ addptr(rsp, wordSize); } +void InterpreterMacroAssembler::push_d(XMMRegister r) { + subptr(rsp, 2 * wordSize); + movdbl(Address(rsp, 0), r); +} + void InterpreterMacroAssembler::pop_d(XMMRegister r) { movdbl(r, Address(rsp, 0)); addptr(rsp, 2 * Interpreter::stackElementSize); } -void InterpreterMacroAssembler::push_l(Register r) { - subptr(rsp, 2 * wordSize); - movq(Address(rsp, 0), r); +#ifdef _LP64 +void InterpreterMacroAssembler::pop_i(Register r) { + // XXX can't use pop currently, upper half non clean + movl(r, Address(rsp, 0)); + addptr(rsp, wordSize); } -void InterpreterMacroAssembler::push_f(XMMRegister r) { - subptr(rsp, wordSize); - movflt(Address(rsp, 0), r); +void InterpreterMacroAssembler::pop_l(Register r) { + movq(r, Address(rsp, 0)); + addptr(rsp, 2 * Interpreter::stackElementSize); } -void InterpreterMacroAssembler::push_d(XMMRegister r) { +void InterpreterMacroAssembler::push_l(Register r) { subptr(rsp, 2 * wordSize); - movdbl(Address(rsp, 0), r); + movq(Address(rsp, 0), r); } void InterpreterMacroAssembler::pop(TosState state) { @@ -623,8 +623,8 @@ case stos: case itos: pop_i(); break; case ltos: pop_l(); break; - case ftos: pop_f(); break; - case dtos: pop_d(); break; + case ftos: pop_f(xmm0); break; + case dtos: pop_d(xmm0); break; case vtos: /* nothing to do */ break; default: ShouldNotReachHere(); } @@ -640,8 +640,8 @@ case stos: case itos: push_i(); break; case ltos: push_l(); break; - case ftos: push_f(); break; - case dtos: push_d(); break; + case ftos: push_f(xmm0); break; + case dtos: push_d(xmm0); break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -675,8 +675,20 @@ case stos: // fall through case itos: pop_i(rax); break; case ltos: pop_l(rax, rdx); break; - case ftos: pop_f(); break; - case dtos: pop_d(); break; + case ftos: + if (UseSSE >= 1) { + pop_f(xmm0); + } else { + pop_f(); + } + break; + case dtos: + if (UseSSE >= 2) { + pop_d(xmm0); + } else { + pop_d(); + } + break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -695,7 +707,7 @@ fstp_s(Address(rsp, 0)); } -void InterpreterMacroAssembler::push_d(Register r) { +void InterpreterMacroAssembler::push_d() { // Do not schedule for no AGI! Never write beyond rsp! subptr(rsp, 2 * wordSize); fstp_d(Address(rsp, 0)); @@ -711,8 +723,20 @@ case stos: // fall through case itos: push_i(rax); break; case ltos: push_l(rax, rdx); break; - case ftos: push_f(); break; - case dtos: push_d(rax); break; + case ftos: + if (UseSSE >= 1) { + push_f(xmm0); + } else { + push_f(); + } + break; + case dtos: + if (UseSSE >= 2) { + push_d(xmm0); + } else { + push_d(); + } + break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -995,22 +1019,6 @@ leave(); // remove frame anchor pop(ret_addr); // get return address mov(rsp, rbx); // set sp to sender sp -#ifndef _LP64 - if (UseSSE) { - // float and double are returned in xmm register in SSE-mode - if (state == ftos && UseSSE >= 1) { - subptr(rsp, wordSize); - fstp_s(Address(rsp, 0)); - movflt(xmm0, Address(rsp, 0)); - addptr(rsp, wordSize); - } else if (state == dtos && UseSSE >= 2) { - subptr(rsp, 2*wordSize); - fstp_d(Address(rsp, 0)); - movdbl(xmm0, Address(rsp, 0)); - addptr(rsp, 2*wordSize); - } - } -#endif // _LP64 } #endif // !CC_INTERP @@ -1783,7 +1791,10 @@ void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { #ifndef _LP64 - if (state == ftos || state == dtos) MacroAssembler::verify_FPU(stack_depth); + if ((state == ftos && UseSSE < 1) || + (state == dtos && UseSSE < 2)) { + MacroAssembler::verify_FPU(stack_depth); + } #endif } --- old/src/cpu/x86/vm/interpreterGenerator_x86.hpp 2015-08-07 15:41:38.330585003 +0200 +++ new/src/cpu/x86/vm/interpreterGenerator_x86.hpp 2015-08-07 15:41:38.106585013 +0200 @@ -42,6 +42,12 @@ address generate_Reference_get_entry(); address generate_CRC32_update_entry(); address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); +#ifndef _LP64 + address generate_Float_intBitsToFloat_entry(); + address generate_Float_floatToRawIntBits_entry(); + address generate_Double_longBitsToDouble_entry(); + address generate_Double_doubleToRawLongBits_entry(); +#endif void lock_method(void); void generate_stack_overflow_check(void); --- old/src/cpu/x86/vm/templateInterpreter_x86_64.cpp 2015-08-07 15:41:38.314585004 +0200 +++ new/src/cpu/x86/vm/templateInterpreter_x86_64.cpp 2015-08-07 15:41:38.042585016 +0200 @@ -1707,10 +1707,10 @@ address& vep) { assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); Label L; - aep = __ pc(); __ push_ptr(); __ jmp(L); - fep = __ pc(); __ push_f(); __ jmp(L); - dep = __ pc(); __ push_d(); __ jmp(L); - lep = __ pc(); __ push_l(); __ jmp(L); + aep = __ pc(); __ push_ptr(); __ jmp(L); + fep = __ pc(); __ push_f(xmm0); __ jmp(L); + dep = __ pc(); __ push_d(xmm0); __ jmp(L); + lep = __ pc(); __ push_l(); __ jmp(L); bep = cep = sep = iep = __ pc(); __ push_i(); vep = __ pc(); --- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-08-07 15:41:38.342585002 +0200 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-08-07 15:41:38.110585013 +0200 @@ -3310,6 +3310,42 @@ fincstp(); } +void MacroAssembler::load_float(Address src) { + if (UseSSE >= 1) { + movflt(xmm0, src); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(fld_s(src)); + } +} + +void MacroAssembler::store_float(Address dst) { + if (UseSSE >= 1) { + movflt(dst, xmm0); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(fstp_s(dst)); + } +} + +void MacroAssembler::load_double(Address src) { + if (UseSSE >= 2) { + movdbl(xmm0, src); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(fld_d(src)); + } +} + +void MacroAssembler::store_double(Address dst) { + if (UseSSE >= 2) { + movdbl(dst, xmm0); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(fstp_d(dst)); + } +} + void MacroAssembler::fremr(Register tmp) { save_rax(tmp); { Label L; --- old/src/share/vm/interpreter/abstractInterpreter.hpp 2015-08-07 15:41:38.366585001 +0200 +++ new/src/share/vm/interpreter/abstractInterpreter.hpp 2015-08-07 15:41:38.166585011 +0200 @@ -90,6 +90,10 @@ java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update() java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes() java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer() + java_lang_Float_intBitsToFloat, // implementation of java.lang.Float.intBitsToFloat() + java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits() + java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble() + java_lang_Double_doubleToRawLongBits, // implementation of java.lang.Double.doubleToRawLongBits() number_of_method_entries, invalid = -1 }; --- old/src/cpu/x86/vm/templateInterpreter_x86_32.cpp 2015-08-07 15:41:38.382585000 +0200 +++ new/src/cpu/x86/vm/templateInterpreter_x86_32.cpp 2015-08-07 15:41:38.118585013 +0200 @@ -170,22 +170,12 @@ __ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled"); } - // In SSE mode, interpreter returns FP results in xmm0 but they need - // to end up back on the FPU so it can operate on them. - if (state == ftos && UseSSE >= 1) { - __ subptr(rsp, wordSize); - __ movflt(Address(rsp, 0), xmm0); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, wordSize); - } else if (state == dtos && UseSSE >= 2) { - __ subptr(rsp, 2*wordSize); - __ movdbl(Address(rsp, 0), xmm0); - __ fld_d(Address(rsp, 0)); - __ addptr(rsp, 2*wordSize); + if (state == ftos) { + __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_return_entry_for in interpreter"); + } else if (state == dtos) { + __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_return_entry_for in interpreter"); } - __ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_return_entry_for in interpreter"); - // Restore stack bottom in case i2c adjusted stack __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); // and NULL it as marker that rsp is now tos until next java call @@ -217,21 +207,12 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { address entry = __ pc(); - // In SSE mode, FP results are in xmm0 - if (state == ftos && UseSSE > 0) { - __ subptr(rsp, wordSize); - __ movflt(Address(rsp, 0), xmm0); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, wordSize); - } else if (state == dtos && UseSSE >= 2) { - __ subptr(rsp, 2*wordSize); - __ movdbl(Address(rsp, 0), xmm0); - __ fld_d(Address(rsp, 0)); - __ addptr(rsp, 2*wordSize); + if (state == ftos) { + __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_deopt_entry_for in interpreter"); + } else if (state == dtos) { + __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_deopt_entry_for in interpreter"); } - __ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_deopt_entry_for in interpreter"); - // The stack is not extended by deopt but we must NULL last_sp as this // entry is like a "return". __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); @@ -735,7 +716,7 @@ if (UseCRC32Intrinsics) { address entry = __ pc(); - // rbx,: Method* + // rbx: Method* // rsi: senderSP must preserved for slow path, set SP to it on fast path // rdx: scratch // rdi: scratch @@ -841,6 +822,124 @@ return generate_native_entry(false); } +/** + * Method entry for static native method: + * java.lang.Float.intBitsToFloat(int bits) + */ +address InterpreterGenerator::generate_Float_intBitsToFloat_entry() { + address entry; + + if (UseSSE >= 1) { + entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load 'bits' into xmm0 (interpreter returns results in xmm0) + __ movflt(xmm0, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + } else { + entry = generate_native_entry(false); + } + + return entry; +} + +/** + * Method entry for static native method: + * java.lang.Float.floatToRawIntBits(float value) + */ +address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() { + address entry; + + if (UseSSE >= 1) { + entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load the parameter (a floating-point value) into rax. + __ movl(rax, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + } else { + entry = generate_native_entry(false); + } + + return entry; +} + + +/** + * Method entry for static native method: + * java.lang.Double.longBitsToDouble(long bits) + */ +address InterpreterGenerator::generate_Double_longBitsToDouble_entry() { + address entry; + + if (UseSSE >= 2) { + entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load 'bits' into xmm0 (interpreter returns results in xmm0) + __ movdbl(xmm0, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + } else { + entry = generate_native_entry(false); + } + + return entry; +} + +/** + * Method entry for static native method: + * java.lang.Double.doubleToRawLongBits(double value) + */ +address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { + address entry; + + if (UseSSE >= 2) { + entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load the parameter (a floating-point value) into rax. + __ movl(rdx, Address(rsp, 2*wordSize)); + __ movl(rax, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + } else { + entry = generate_native_entry(false); + } + + return entry; +} + // // Interpreter stub for calling a native method. (asm interpreter) // This sets up a somewhat different looking stack for calling the native method @@ -1090,7 +1189,7 @@ double_handler.addr()); __ jcc(Assembler::notEqual, L); __ bind(push_double); - __ push(dtos); + __ push_d(); // FP values are returned using the FPU, so push FPU contents (even if UseSSE > 0). __ bind(L); } __ push(ltos); --- old/src/share/vm/interpreter/templateInterpreter.cpp 2015-08-07 15:41:38.662584987 +0200 +++ new/src/share/vm/interpreter/templateInterpreter.cpp 2015-08-07 15:41:38.606584990 +0200 @@ -397,34 +397,39 @@ // all non-native method kinds method_entry(zerolocals) - method_entry(zerolocals_synchronized) - method_entry(empty) - method_entry(accessor) - method_entry(abstract) - method_entry(java_lang_math_sin ) - method_entry(java_lang_math_cos ) - method_entry(java_lang_math_tan ) - method_entry(java_lang_math_abs ) - method_entry(java_lang_math_sqrt ) - method_entry(java_lang_math_log ) - method_entry(java_lang_math_log10) - method_entry(java_lang_math_exp ) - method_entry(java_lang_math_pow ) - method_entry(java_lang_ref_reference_get) - - if (UseCRC32Intrinsics) { - method_entry(java_util_zip_CRC32_update) - method_entry(java_util_zip_CRC32_updateBytes) - method_entry(java_util_zip_CRC32_updateByteBuffer) - } + method_entry(zerolocals_synchronized) + method_entry(empty) + method_entry(accessor) + method_entry(abstract) + method_entry(java_lang_math_sin ) + method_entry(java_lang_math_cos ) + method_entry(java_lang_math_tan ) + method_entry(java_lang_math_abs ) + method_entry(java_lang_math_sqrt ) + method_entry(java_lang_math_log ) + method_entry(java_lang_math_log10) + method_entry(java_lang_math_exp ) + method_entry(java_lang_math_pow ) + method_entry(java_lang_ref_reference_get) + + if (UseCRC32Intrinsics) { + method_entry(java_util_zip_CRC32_update) + method_entry(java_util_zip_CRC32_updateBytes) + method_entry(java_util_zip_CRC32_updateByteBuffer) + } + + method_entry(java_lang_Float_intBitsToFloat); + method_entry(java_lang_Float_floatToRawIntBits); + method_entry(java_lang_Double_longBitsToDouble); + method_entry(java_lang_Double_doubleToRawLongBits); initialize_method_handle_entries(); // all native method kinds (must be one contiguous block) Interpreter::_native_entry_begin = Interpreter::code()->code_end(); method_entry(native) - method_entry(native_synchronized) - Interpreter::_native_entry_end = Interpreter::code()->code_end(); + method_entry(native_synchronized) + Interpreter::_native_entry_end = Interpreter::code()->code_end(); #undef method_entry --- /dev/null 2015-08-03 09:28:11.732350737 +0200 +++ new/test/compiler/floatingpoint/NaNTest.java 2015-08-07 15:41:39.786584935 +0200 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +/** + * @test + * @bug 8076373 + * @summary Verify if signaling NaNs are preserved. + * @run main NaNTest + */ +public class NaNTest { + static void testFloat() { + int originalValue = 0x7f800001; + int readBackValue = Float.floatToRawIntBits(Float.intBitsToFloat(originalValue)); + if (originalValue != readBackValue) { + String errorMessage = String.format("Original and read back float values mismatch\n0x%X 0x%X\n", + originalValue, + readBackValue); + throw new RuntimeException(errorMessage); + } else { + System.out.printf("Written and read back float values match\n0x%X 0x%X\n", + originalValue, + readBackValue); + } + } + + static void testDouble() { + long originalValue = 0xFFF0000000000001L; + long readBackValue = Double.doubleToRawLongBits(Double.longBitsToDouble(originalValue)); + if (originalValue != readBackValue) { + String errorMessage = String.format("Original and read back double values mismatch\n0x%X 0x%X\n", + originalValue, + readBackValue); + throw new RuntimeException(errorMessage); + } else { + System.out.printf("Written and read back double values match\n0x%X 0x%X\n", + originalValue, + readBackValue); + } + + } + + public static void main(String args[]) { + System.out.println("### NanTest started"); + + testFloat(); + testDouble(); + + System.out.println("### NanTest ended"); + } +}