--- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2016-05-18 14:03:53.640118400 -0700 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2016-05-18 14:03:53.163118400 -0700 @@ -5629,233 +5629,8 @@ #endif } -void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { - pusha(); - - // if we are coming from c1, xmm registers may be live - int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); - if (UseAVX > 2) { - num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); - } - - if (UseSSE == 1) { - subptr(rsp, sizeof(jdouble)*8); - for (int n = 0; n < 8; n++) { - movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n)); - } - } else if (UseSSE >= 2) { - if (UseAVX > 2) { - push(rbx); - movl(rbx, 0xffff); - kmovwl(k1, rbx); - pop(rbx); - } -#ifdef COMPILER2 - if (MaxVectorSize > 16) { - if(UseAVX > 2) { - // Save upper half of ZMM registers - subptr(rsp, 32*num_xmm_regs); - for (int n = 0; n < num_xmm_regs; n++) { - vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); - } - } - assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); - // Save upper half of YMM registers - subptr(rsp, 16*num_xmm_regs); - for (int n = 0; n < num_xmm_regs; n++) { - vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); - } - } -#endif - // Save whole 128bit (16 bytes) XMM registers - subptr(rsp, 16*num_xmm_regs); -#ifdef _LP64 - if (VM_Version::supports_evex()) { - for (int n = 0; n < num_xmm_regs; n++) { - vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0); - } - } else { - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(Address(rsp, n*16), as_XMMRegister(n)); - } - } -#else - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(Address(rsp, n*16), as_XMMRegister(n)); - } -#endif - } - - // Preserve registers across runtime call - int incoming_argument_and_return_value_offset = -1; - if (num_fpu_regs_in_use > 1) { - // Must preserve all other FPU regs (could alternatively convert - // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash - // FPU state, but can not trust C compiler) - NEEDS_CLEANUP; - // NOTE that in this case we also push the incoming argument(s) to - // the stack and restore it later; we also use this stack slot to - // hold the return value from dsin, dcos etc. - for (int i = 0; i < num_fpu_regs_in_use; i++) { - subptr(rsp, sizeof(jdouble)); - fstp_d(Address(rsp, 0)); - } - incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); - for (int i = nb_args-1; i >= 0; i--) { - fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); - } - } - - subptr(rsp, nb_args*sizeof(jdouble)); - for (int i = 0; i < nb_args; i++) { - fstp_d(Address(rsp, i*sizeof(jdouble))); - } - -#ifdef _LP64 - if (nb_args > 0) { - movdbl(xmm0, Address(rsp, 0)); - } - if (nb_args > 1) { - movdbl(xmm1, Address(rsp, sizeof(jdouble))); - } - assert(nb_args <= 2, "unsupported number of args"); -#endif // _LP64 - - // NOTE: we must not use call_VM_leaf here because that requires a - // complete interpreter frame in debug mode -- same bug as 4387334 - // MacroAssembler::call_VM_leaf_base is perfectly safe and will - // do proper 64bit abi - - NEEDS_CLEANUP; - // Need to add stack banging before this runtime call if it needs to - // be taken; however, there is no generic stack banging routine at - // the MacroAssembler level - +void MacroAssembler::mathfunc(address runtime_entry) { MacroAssembler::call_VM_leaf_base(runtime_entry, 0); - -#ifdef _LP64 - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); -#endif // _LP64 - addptr(rsp, sizeof(jdouble)*nb_args); - if (num_fpu_regs_in_use > 1) { - // Must save return value to stack and then restore entire FPU - // stack except incoming arguments - fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); - for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { - fld_d(Address(rsp, 0)); - addptr(rsp, sizeof(jdouble)); - } - fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); - addptr(rsp, sizeof(jdouble)*nb_args); - } - - if (UseSSE == 1) { - for (int n = 0; n < 8; n++) { - movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble))); - } - addptr(rsp, sizeof(jdouble)*8); - } else if (UseSSE >= 2) { - // Restore whole 128bit (16 bytes) XMM registers -#ifdef _LP64 - if (VM_Version::supports_evex()) { - for (int n = 0; n < num_xmm_regs; n++) { - vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0); - } - } else { - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(as_XMMRegister(n), Address(rsp, n*16)); - } - } -#else - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(as_XMMRegister(n), Address(rsp, n*16)); - } -#endif - addptr(rsp, 16*num_xmm_regs); - -#ifdef COMPILER2 - if (MaxVectorSize > 16) { - // Restore upper half of YMM registers. - for (int n = 0; n < num_xmm_regs; n++) { - vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16)); - } - addptr(rsp, 16*num_xmm_regs); - if(UseAVX > 2) { - for (int n = 0; n < num_xmm_regs; n++) { - vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32)); - } - addptr(rsp, 32*num_xmm_regs); - } - } -#endif - } - popa(); -} - -static const double pi_4 = 0.7853981633974483; - -void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { - // A hand-coded argument reduction for values in fabs(pi/4, pi/2) - // was attempted in this code; unfortunately it appears that the - // switch to 80-bit precision and back causes this to be - // unprofitable compared with simply performing a runtime call if - // the argument is out of the (-pi/4, pi/4) range. - - Register tmp = noreg; - if (!VM_Version::supports_cmov()) { - // fcmp needs a temporary so preserve rbx, - tmp = rbx; - push(tmp); - } - - Label slow_case, done; - if (trig == 't') { - ExternalAddress pi4_adr = (address)&pi_4; - if (reachable(pi4_adr)) { - // x ?<= pi/4 - fld_d(pi4_adr); - fld_s(1); // Stack: X PI/4 X - fabs(); // Stack: |X| PI/4 X - fcmp(tmp); - jcc(Assembler::above, slow_case); - - // fastest case: -pi/4 <= x <= pi/4 - ftan(); - - jmp(done); - } - } - // slow case: runtime call - bind(slow_case); - - switch(trig) { - case 's': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); - } - break; - case 'c': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); - } - break; - case 't': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); - } - break; - default: - assert(false, "bad intrinsic"); - break; - } - - // Come here with result in F-TOS - bind(done); - - if (tmp != noreg) { - pop(tmp); - } } // Look up the method for a megamorphic invokeinterface call.