< prev index next >
src/cpu/x86/vm/macroAssembler_x86.cpp
Print this page
*** 5619,5855 ****
}
adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
#endif
}
! void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
! pusha();
!
! // if we are coming from c1, xmm registers may be live
! int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
! if (UseAVX > 2) {
! num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
! }
!
! if (UseSSE == 1) {
! subptr(rsp, sizeof(jdouble)*8);
! for (int n = 0; n < 8; n++) {
! movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
! }
! } else if (UseSSE >= 2) {
! if (UseAVX > 2) {
! push(rbx);
! movl(rbx, 0xffff);
! kmovwl(k1, rbx);
! pop(rbx);
! }
! #ifdef COMPILER2
! if (MaxVectorSize > 16) {
! if(UseAVX > 2) {
! // Save upper half of ZMM registers
! subptr(rsp, 32*num_xmm_regs);
! for (int n = 0; n < num_xmm_regs; n++) {
! vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
! }
! }
! assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
! // Save upper half of YMM registers
! subptr(rsp, 16*num_xmm_regs);
! for (int n = 0; n < num_xmm_regs; n++) {
! vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
! }
! }
! #endif
! // Save whole 128bit (16 bytes) XMM registers
! subptr(rsp, 16*num_xmm_regs);
! #ifdef _LP64
! if (VM_Version::supports_evex()) {
! for (int n = 0; n < num_xmm_regs; n++) {
! vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
! }
! } else {
! for (int n = 0; n < num_xmm_regs; n++) {
! movdqu(Address(rsp, n*16), as_XMMRegister(n));
! }
! }
! #else
! for (int n = 0; n < num_xmm_regs; n++) {
! movdqu(Address(rsp, n*16), as_XMMRegister(n));
! }
! #endif
! }
!
! // Preserve registers across runtime call
! int incoming_argument_and_return_value_offset = -1;
! if (num_fpu_regs_in_use > 1) {
! // Must preserve all other FPU regs (could alternatively convert
! // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
! // FPU state, but can not trust C compiler)
! NEEDS_CLEANUP;
! // NOTE that in this case we also push the incoming argument(s) to
! // the stack and restore it later; we also use this stack slot to
! // hold the return value from dsin, dcos etc.
! for (int i = 0; i < num_fpu_regs_in_use; i++) {
! subptr(rsp, sizeof(jdouble));
! fstp_d(Address(rsp, 0));
! }
! incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
! for (int i = nb_args-1; i >= 0; i--) {
! fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
! }
! }
!
! subptr(rsp, nb_args*sizeof(jdouble));
! for (int i = 0; i < nb_args; i++) {
! fstp_d(Address(rsp, i*sizeof(jdouble)));
! }
!
! #ifdef _LP64
! if (nb_args > 0) {
! movdbl(xmm0, Address(rsp, 0));
! }
! if (nb_args > 1) {
! movdbl(xmm1, Address(rsp, sizeof(jdouble)));
! }
! assert(nb_args <= 2, "unsupported number of args");
! #endif // _LP64
!
! // NOTE: we must not use call_VM_leaf here because that requires a
! // complete interpreter frame in debug mode -- same bug as 4387334
! // MacroAssembler::call_VM_leaf_base is perfectly safe and will
! // do proper 64bit abi
!
! NEEDS_CLEANUP;
! // Need to add stack banging before this runtime call if it needs to
! // be taken; however, there is no generic stack banging routine at
! // the MacroAssembler level
!
MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
-
- #ifdef _LP64
- movsd(Address(rsp, 0), xmm0);
- fld_d(Address(rsp, 0));
- #endif // _LP64
- addptr(rsp, sizeof(jdouble)*nb_args);
- if (num_fpu_regs_in_use > 1) {
- // Must save return value to stack and then restore entire FPU
- // stack except incoming arguments
- fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
- for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
- fld_d(Address(rsp, 0));
- addptr(rsp, sizeof(jdouble));
- }
- fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
- addptr(rsp, sizeof(jdouble)*nb_args);
- }
-
- if (UseSSE == 1) {
- for (int n = 0; n < 8; n++) {
- movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
- }
- addptr(rsp, sizeof(jdouble)*8);
- } else if (UseSSE >= 2) {
- // Restore whole 128bit (16 bytes) XMM registers
- #ifdef _LP64
- if (VM_Version::supports_evex()) {
- for (int n = 0; n < num_xmm_regs; n++) {
- vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
- }
- } else {
- for (int n = 0; n < num_xmm_regs; n++) {
- movdqu(as_XMMRegister(n), Address(rsp, n*16));
- }
- }
- #else
- for (int n = 0; n < num_xmm_regs; n++) {
- movdqu(as_XMMRegister(n), Address(rsp, n*16));
- }
- #endif
- addptr(rsp, 16*num_xmm_regs);
-
- #ifdef COMPILER2
- if (MaxVectorSize > 16) {
- // Restore upper half of YMM registers.
- for (int n = 0; n < num_xmm_regs; n++) {
- vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
- }
- addptr(rsp, 16*num_xmm_regs);
- if(UseAVX > 2) {
- for (int n = 0; n < num_xmm_regs; n++) {
- vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
- }
- addptr(rsp, 32*num_xmm_regs);
- }
- }
- #endif
- }
- popa();
- }
-
- static const double pi_4 = 0.7853981633974483;
-
- void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
- // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
- // was attempted in this code; unfortunately it appears that the
- // switch to 80-bit precision and back causes this to be
- // unprofitable compared with simply performing a runtime call if
- // the argument is out of the (-pi/4, pi/4) range.
-
- Register tmp = noreg;
- if (!VM_Version::supports_cmov()) {
- // fcmp needs a temporary so preserve rbx,
- tmp = rbx;
- push(tmp);
- }
-
- Label slow_case, done;
- if (trig == 't') {
- ExternalAddress pi4_adr = (address)&pi_4;
- if (reachable(pi4_adr)) {
- // x ?<= pi/4
- fld_d(pi4_adr);
- fld_s(1); // Stack: X PI/4 X
- fabs(); // Stack: |X| PI/4 X
- fcmp(tmp);
- jcc(Assembler::above, slow_case);
-
- // fastest case: -pi/4 <= x <= pi/4
- ftan();
-
- jmp(done);
- }
- }
- // slow case: runtime call
- bind(slow_case);
-
- switch(trig) {
- case 's':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
- }
- break;
- case 'c':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
- }
- break;
- case 't':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
- }
- break;
- default:
- assert(false, "bad intrinsic");
- break;
- }
-
- // Come here with result in F-TOS
- bind(done);
-
- if (tmp != noreg) {
- pop(tmp);
- }
}
// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by <intf_klass, itable_index>.
// The receiver klass is in recv_klass.
--- 5619,5630 ----
}
adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
#endif
}
! void MacroAssembler::mathfunc(address runtime_entry) {
MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
}
// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by <intf_klass, itable_index>.
// The receiver klass is in recv_klass.
< prev index next >