< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page

        

*** 5619,5855 **** } adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); #endif } ! void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { ! pusha(); ! ! // if we are coming from c1, xmm registers may be live ! int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); ! if (UseAVX > 2) { ! num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); ! } ! ! if (UseSSE == 1) { ! subptr(rsp, sizeof(jdouble)*8); ! for (int n = 0; n < 8; n++) { ! movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n)); ! } ! } else if (UseSSE >= 2) { ! if (UseAVX > 2) { ! push(rbx); ! movl(rbx, 0xffff); ! kmovwl(k1, rbx); ! pop(rbx); ! } ! #ifdef COMPILER2 ! if (MaxVectorSize > 16) { ! if(UseAVX > 2) { ! // Save upper half of ZMM registers ! subptr(rsp, 32*num_xmm_regs); ! for (int n = 0; n < num_xmm_regs; n++) { ! vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); ! } ! } ! assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); ! // Save upper half of YMM registers ! subptr(rsp, 16*num_xmm_regs); ! for (int n = 0; n < num_xmm_regs; n++) { ! vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); ! } ! } ! #endif ! // Save whole 128bit (16 bytes) XMM registers ! subptr(rsp, 16*num_xmm_regs); ! #ifdef _LP64 ! if (VM_Version::supports_evex()) { ! for (int n = 0; n < num_xmm_regs; n++) { ! vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0); ! } ! } else { ! for (int n = 0; n < num_xmm_regs; n++) { ! movdqu(Address(rsp, n*16), as_XMMRegister(n)); ! } ! } ! #else ! for (int n = 0; n < num_xmm_regs; n++) { ! movdqu(Address(rsp, n*16), as_XMMRegister(n)); ! } ! #endif ! } ! ! // Preserve registers across runtime call ! int incoming_argument_and_return_value_offset = -1; ! if (num_fpu_regs_in_use > 1) { ! // Must preserve all other FPU regs (could alternatively convert ! // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash ! // FPU state, but can not trust C compiler) ! NEEDS_CLEANUP; ! // NOTE that in this case we also push the incoming argument(s) to ! // the stack and restore it later; we also use this stack slot to ! // hold the return value from dsin, dcos etc. ! for (int i = 0; i < num_fpu_regs_in_use; i++) { ! subptr(rsp, sizeof(jdouble)); ! fstp_d(Address(rsp, 0)); ! } ! incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); ! for (int i = nb_args-1; i >= 0; i--) { ! fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); ! } ! } ! ! subptr(rsp, nb_args*sizeof(jdouble)); ! for (int i = 0; i < nb_args; i++) { ! fstp_d(Address(rsp, i*sizeof(jdouble))); ! } ! ! #ifdef _LP64 ! if (nb_args > 0) { ! movdbl(xmm0, Address(rsp, 0)); ! } ! if (nb_args > 1) { ! movdbl(xmm1, Address(rsp, sizeof(jdouble))); ! } ! assert(nb_args <= 2, "unsupported number of args"); ! #endif // _LP64 ! ! // NOTE: we must not use call_VM_leaf here because that requires a ! // complete interpreter frame in debug mode -- same bug as 4387334 ! // MacroAssembler::call_VM_leaf_base is perfectly safe and will ! // do proper 64bit abi ! ! NEEDS_CLEANUP; ! // Need to add stack banging before this runtime call if it needs to ! // be taken; however, there is no generic stack banging routine at ! // the MacroAssembler level ! MacroAssembler::call_VM_leaf_base(runtime_entry, 0); - - #ifdef _LP64 - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - #endif // _LP64 - addptr(rsp, sizeof(jdouble)*nb_args); - if (num_fpu_regs_in_use > 1) { - // Must save return value to stack and then restore entire FPU - // stack except incoming arguments - fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); - for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { - fld_d(Address(rsp, 0)); - addptr(rsp, sizeof(jdouble)); - } - fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); - addptr(rsp, sizeof(jdouble)*nb_args); - } - - if (UseSSE == 1) { - for (int n = 0; n < 8; n++) { - movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble))); - } - addptr(rsp, sizeof(jdouble)*8); - } else if (UseSSE >= 2) { - // Restore whole 128bit (16 bytes) XMM registers - #ifdef _LP64 - if (VM_Version::supports_evex()) { - for (int n = 0; n < num_xmm_regs; n++) { - vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0); - } - } else { - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(as_XMMRegister(n), Address(rsp, n*16)); - } - } - #else - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(as_XMMRegister(n), Address(rsp, n*16)); - } - #endif - addptr(rsp, 16*num_xmm_regs); - - #ifdef COMPILER2 - if (MaxVectorSize > 16) { - // Restore upper half of YMM registers. - for (int n = 0; n < num_xmm_regs; n++) { - vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16)); - } - addptr(rsp, 16*num_xmm_regs); - if(UseAVX > 2) { - for (int n = 0; n < num_xmm_regs; n++) { - vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32)); - } - addptr(rsp, 32*num_xmm_regs); - } - } - #endif - } - popa(); - } - - static const double pi_4 = 0.7853981633974483; - - void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { - // A hand-coded argument reduction for values in fabs(pi/4, pi/2) - // was attempted in this code; unfortunately it appears that the - // switch to 80-bit precision and back causes this to be - // unprofitable compared with simply performing a runtime call if - // the argument is out of the (-pi/4, pi/4) range. - - Register tmp = noreg; - if (!VM_Version::supports_cmov()) { - // fcmp needs a temporary so preserve rbx, - tmp = rbx; - push(tmp); - } - - Label slow_case, done; - if (trig == 't') { - ExternalAddress pi4_adr = (address)&pi_4; - if (reachable(pi4_adr)) { - // x ?<= pi/4 - fld_d(pi4_adr); - fld_s(1); // Stack: X PI/4 X - fabs(); // Stack: |X| PI/4 X - fcmp(tmp); - jcc(Assembler::above, slow_case); - - // fastest case: -pi/4 <= x <= pi/4 - ftan(); - - jmp(done); - } - } - // slow case: runtime call - bind(slow_case); - - switch(trig) { - case 's': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); - } - break; - case 'c': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); - } - break; - case 't': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); - } - break; - default: - assert(false, "bad intrinsic"); - break; - } - - // Come here with result in F-TOS - bind(done); - - if (tmp != noreg) { - pop(tmp); - } } // Look up the method for a megamorphic invokeinterface call. // The target method is determined by <intf_klass, itable_index>. // The receiver klass is in recv_klass. --- 5619,5630 ---- } adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); #endif } ! void MacroAssembler::mathfunc(address runtime_entry) { MacroAssembler::call_VM_leaf_base(runtime_entry, 0); } // Look up the method for a megamorphic invokeinterface call. // The target method is determined by <intf_klass, itable_index>. // The receiver klass is in recv_klass.
< prev index next >