< prev index next >
src/cpu/x86/vm/macroAssembler_x86.cpp
Print this page
@@ -5619,237 +5619,23 @@
}
adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
#endif
}
-void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
- pusha();
-
- // if we are coming from c1, xmm registers may be live
- int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
- if (UseAVX > 2) {
- num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
- }
-
- if (UseSSE == 1) {
- subptr(rsp, sizeof(jdouble)*8);
- for (int n = 0; n < 8; n++) {
- movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
- }
- } else if (UseSSE >= 2) {
- if (UseAVX > 2) {
- push(rbx);
- movl(rbx, 0xffff);
- kmovwl(k1, rbx);
- pop(rbx);
- }
-#ifdef COMPILER2
- if (MaxVectorSize > 16) {
- if(UseAVX > 2) {
- // Save upper half of ZMM registers
- subptr(rsp, 32*num_xmm_regs);
- for (int n = 0; n < num_xmm_regs; n++) {
- vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
- }
- }
- assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
- // Save upper half of YMM registers
- subptr(rsp, 16*num_xmm_regs);
- for (int n = 0; n < num_xmm_regs; n++) {
- vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
- }
- }
-#endif
- // Save whole 128bit (16 bytes) XMM registers
- subptr(rsp, 16*num_xmm_regs);
-#ifdef _LP64
- if (VM_Version::supports_evex()) {
- for (int n = 0; n < num_xmm_regs; n++) {
- vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
- }
- } else {
- for (int n = 0; n < num_xmm_regs; n++) {
- movdqu(Address(rsp, n*16), as_XMMRegister(n));
- }
- }
-#else
- for (int n = 0; n < num_xmm_regs; n++) {
- movdqu(Address(rsp, n*16), as_XMMRegister(n));
- }
-#endif
- }
-
- // Preserve registers across runtime call
- int incoming_argument_and_return_value_offset = -1;
- if (num_fpu_regs_in_use > 1) {
- // Must preserve all other FPU regs (could alternatively convert
- // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
- // FPU state, but can not trust C compiler)
- NEEDS_CLEANUP;
- // NOTE that in this case we also push the incoming argument(s) to
- // the stack and restore it later; we also use this stack slot to
- // hold the return value from dsin, dcos etc.
- for (int i = 0; i < num_fpu_regs_in_use; i++) {
- subptr(rsp, sizeof(jdouble));
- fstp_d(Address(rsp, 0));
- }
- incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
- for (int i = nb_args-1; i >= 0; i--) {
- fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
- }
- }
-
- subptr(rsp, nb_args*sizeof(jdouble));
- for (int i = 0; i < nb_args; i++) {
- fstp_d(Address(rsp, i*sizeof(jdouble)));
- }
-
-#ifdef _LP64
- if (nb_args > 0) {
- movdbl(xmm0, Address(rsp, 0));
- }
- if (nb_args > 1) {
- movdbl(xmm1, Address(rsp, sizeof(jdouble)));
- }
- assert(nb_args <= 2, "unsupported number of args");
-#endif // _LP64
-
- // NOTE: we must not use call_VM_leaf here because that requires a
- // complete interpreter frame in debug mode -- same bug as 4387334
- // MacroAssembler::call_VM_leaf_base is perfectly safe and will
- // do proper 64bit abi
-
- NEEDS_CLEANUP;
- // Need to add stack banging before this runtime call if it needs to
- // be taken; however, there is no generic stack banging routine at
- // the MacroAssembler level
-
- MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
-
-#ifdef _LP64
- movsd(Address(rsp, 0), xmm0);
- fld_d(Address(rsp, 0));
-#endif // _LP64
- addptr(rsp, sizeof(jdouble)*nb_args);
- if (num_fpu_regs_in_use > 1) {
- // Must save return value to stack and then restore entire FPU
- // stack except incoming arguments
- fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
- for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
- fld_d(Address(rsp, 0));
- addptr(rsp, sizeof(jdouble));
- }
- fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
- addptr(rsp, sizeof(jdouble)*nb_args);
- }
-
- if (UseSSE == 1) {
- for (int n = 0; n < 8; n++) {
- movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
- }
- addptr(rsp, sizeof(jdouble)*8);
- } else if (UseSSE >= 2) {
- // Restore whole 128bit (16 bytes) XMM registers
-#ifdef _LP64
- if (VM_Version::supports_evex()) {
- for (int n = 0; n < num_xmm_regs; n++) {
- vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
- }
- } else {
- for (int n = 0; n < num_xmm_regs; n++) {
- movdqu(as_XMMRegister(n), Address(rsp, n*16));
- }
- }
-#else
- for (int n = 0; n < num_xmm_regs; n++) {
- movdqu(as_XMMRegister(n), Address(rsp, n*16));
- }
-#endif
- addptr(rsp, 16*num_xmm_regs);
-
-#ifdef COMPILER2
- if (MaxVectorSize > 16) {
- // Restore upper half of YMM registers.
- for (int n = 0; n < num_xmm_regs; n++) {
- vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
- }
- addptr(rsp, 16*num_xmm_regs);
- if(UseAVX > 2) {
- for (int n = 0; n < num_xmm_regs; n++) {
- vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
- }
- addptr(rsp, 32*num_xmm_regs);
- }
- }
-#endif
- }
- popa();
-}
-
-static const double pi_4 = 0.7853981633974483;
-
-void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
- // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
- // was attempted in this code; unfortunately it appears that the
- // switch to 80-bit precision and back causes this to be
- // unprofitable compared with simply performing a runtime call if
- // the argument is out of the (-pi/4, pi/4) range.
-
- Register tmp = noreg;
- if (!VM_Version::supports_cmov()) {
- // fcmp needs a temporary so preserve rbx,
- tmp = rbx;
- push(tmp);
- }
-
- Label slow_case, done;
- if (trig == 't') {
- ExternalAddress pi4_adr = (address)&pi_4;
- if (reachable(pi4_adr)) {
- // x ?<= pi/4
- fld_d(pi4_adr);
- fld_s(1); // Stack: X PI/4 X
- fabs(); // Stack: |X| PI/4 X
- fcmp(tmp);
- jcc(Assembler::above, slow_case);
-
- // fastest case: -pi/4 <= x <= pi/4
- ftan();
-
- jmp(done);
- }
- }
- // slow case: runtime call
- bind(slow_case);
-
- switch(trig) {
- case 's':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
- }
- break;
- case 'c':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
- }
- break;
- case 't':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
- }
- break;
- default:
- assert(false, "bad intrinsic");
- break;
- }
-
- // Come here with result in F-TOS
- bind(done);
-
- if (tmp != noreg) {
- pop(tmp);
- }
+void MacroAssembler::mathfunc(address runtime_entry) {
+ Label E, L;
+ subq(rsp, 0x20);
+ testl(rsp, 15);
+ jcc(Assembler::zero, L);
+ subq(rsp, 8);
+ call(RuntimeAddress(runtime_entry));
+ addq(rsp, 8);
+ jmp(E);
+ bind(L);
+ call(RuntimeAddress(runtime_entry));
+ bind(E);
+ addq(rsp, 0x20);
}
// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by <intf_klass, itable_index>.
// The receiver klass is in recv_klass.
< prev index next >