< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




5604     get_thread(thread);
5605 #endif
5606   }
5607 
5608 #ifdef _LP64
5609   if (var_size_in_bytes->is_valid()) {
5610     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5611   } else {
5612     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5613   }
5614 #else
5615   if (var_size_in_bytes->is_valid()) {
5616     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5617   } else {
5618     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5619   }
5620   adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
5621 #endif
5622 }
5623 
5624 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
5625   pusha();
5626 
5627   // if we are coming from c1, xmm registers may be live
5628   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
5629   if (UseAVX > 2) {
5630     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
5631   }
5632 
5633   if (UseSSE == 1)  {
5634     subptr(rsp, sizeof(jdouble)*8);
5635     for (int n = 0; n < 8; n++) {
5636       movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
5637     }
5638   } else if (UseSSE >= 2)  {
5639     if (UseAVX > 2) {
5640       push(rbx);
5641       movl(rbx, 0xffff);
5642       kmovwl(k1, rbx);
5643       pop(rbx);
5644     }
5645 #ifdef COMPILER2
5646     if (MaxVectorSize > 16) {
5647       if(UseAVX > 2) {
5648         // Save upper half of ZMM registers
5649         subptr(rsp, 32*num_xmm_regs);
5650         for (int n = 0; n < num_xmm_regs; n++) {
5651           vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
5652         }
5653       }
5654       assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
5655       // Save upper half of YMM registers
5656       subptr(rsp, 16*num_xmm_regs);
5657       for (int n = 0; n < num_xmm_regs; n++) {
5658         vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
5659       }
5660     }
5661 #endif
5662     // Save whole 128bit (16 bytes) XMM registers
5663     subptr(rsp, 16*num_xmm_regs);
5664 #ifdef _LP64
5665     if (VM_Version::supports_evex()) {
5666       for (int n = 0; n < num_xmm_regs; n++) {
5667         vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
5668       }
5669     } else {
5670       for (int n = 0; n < num_xmm_regs; n++) {
5671         movdqu(Address(rsp, n*16), as_XMMRegister(n));
5672       }
5673     }
5674 #else
5675     for (int n = 0; n < num_xmm_regs; n++) {
5676       movdqu(Address(rsp, n*16), as_XMMRegister(n));
5677     }
5678 #endif
5679   }
5680 
5681   // Preserve registers across runtime call
5682   int incoming_argument_and_return_value_offset = -1;
5683   if (num_fpu_regs_in_use > 1) {
5684     // Must preserve all other FPU regs (could alternatively convert
5685     // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
5686     // FPU state, but can not trust C compiler)
5687     NEEDS_CLEANUP;
5688     // NOTE that in this case we also push the incoming argument(s) to
5689     // the stack and restore it later; we also use this stack slot to
5690     // hold the return value from dsin, dcos etc.
5691     for (int i = 0; i < num_fpu_regs_in_use; i++) {
5692       subptr(rsp, sizeof(jdouble));
5693       fstp_d(Address(rsp, 0));
5694     }
5695     incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
5696     for (int i = nb_args-1; i >= 0; i--) {
5697       fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
5698     }
5699   }
5700 
5701   subptr(rsp, nb_args*sizeof(jdouble));
5702   for (int i = 0; i < nb_args; i++) {
5703     fstp_d(Address(rsp, i*sizeof(jdouble)));
5704   }
5705 
5706 #ifdef _LP64
5707   if (nb_args > 0) {
5708     movdbl(xmm0, Address(rsp, 0));
5709   }
5710   if (nb_args > 1) {
5711     movdbl(xmm1, Address(rsp, sizeof(jdouble)));
5712   }
5713   assert(nb_args <= 2, "unsupported number of args");
5714 #endif // _LP64
5715 
5716   // NOTE: we must not use call_VM_leaf here because that requires a
5717   // complete interpreter frame in debug mode -- same bug as 4387334
5718   // MacroAssembler::call_VM_leaf_base is perfectly safe and will
5719   // do proper 64bit abi
5720 
5721   NEEDS_CLEANUP;
5722   // Need to add stack banging before this runtime call if it needs to
5723   // be taken; however, there is no generic stack banging routine at
5724   // the MacroAssembler level
5725 
5726   MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
5727 
5728 #ifdef _LP64
5729   movsd(Address(rsp, 0), xmm0);
5730   fld_d(Address(rsp, 0));
5731 #endif // _LP64
5732   addptr(rsp, sizeof(jdouble)*nb_args);
5733   if (num_fpu_regs_in_use > 1) {
5734     // Must save return value to stack and then restore entire FPU
5735     // stack except incoming arguments
5736     fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
5737     for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
5738       fld_d(Address(rsp, 0));
5739       addptr(rsp, sizeof(jdouble));
5740     }
5741     fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
5742     addptr(rsp, sizeof(jdouble)*nb_args);
5743   }
5744 
5745   if (UseSSE == 1)  {
5746     for (int n = 0; n < 8; n++) {
5747       movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
5748     }
5749     addptr(rsp, sizeof(jdouble)*8);
5750   } else if (UseSSE >= 2)  {
5751     // Restore whole 128bit (16 bytes) XMM registers
5752 #ifdef _LP64
5753   if (VM_Version::supports_evex()) {
5754     for (int n = 0; n < num_xmm_regs; n++) {
5755       vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
5756     }
5757   } else {
5758     for (int n = 0; n < num_xmm_regs; n++) {
5759       movdqu(as_XMMRegister(n), Address(rsp, n*16));
5760     }
5761   }
5762 #else
5763   for (int n = 0; n < num_xmm_regs; n++) {
5764     movdqu(as_XMMRegister(n), Address(rsp, n*16));
5765   }
5766 #endif
5767     addptr(rsp, 16*num_xmm_regs);
5768 
5769 #ifdef COMPILER2
5770     if (MaxVectorSize > 16) {
5771       // Restore upper half of YMM registers.
5772       for (int n = 0; n < num_xmm_regs; n++) {
5773         vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
5774       }
5775       addptr(rsp, 16*num_xmm_regs);
5776       if(UseAVX > 2) {
5777         for (int n = 0; n < num_xmm_regs; n++) {
5778           vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
5779         }
5780         addptr(rsp, 32*num_xmm_regs);
5781       }
5782     }
5783 #endif
5784   }
5785   popa();
5786 }
5787 
5788 static const double     pi_4 =  0.7853981633974483;
5789 
5790 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
5791   // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
5792   // was attempted in this code; unfortunately it appears that the
5793   // switch to 80-bit precision and back causes this to be
5794   // unprofitable compared with simply performing a runtime call if
5795   // the argument is out of the (-pi/4, pi/4) range.
5796 
5797   Register tmp = noreg;
5798   if (!VM_Version::supports_cmov()) {
5799     // fcmp needs a temporary so preserve rbx,
5800     tmp = rbx;
5801     push(tmp);
5802   }
5803 
5804   Label slow_case, done;
5805   if (trig == 't') {
5806     ExternalAddress pi4_adr = (address)&pi_4;
5807     if (reachable(pi4_adr)) {
5808       // x ?<= pi/4
5809       fld_d(pi4_adr);
5810       fld_s(1);                // Stack:  X  PI/4  X
5811       fabs();                  // Stack: |X| PI/4  X
5812       fcmp(tmp);
5813       jcc(Assembler::above, slow_case);
5814 
5815       // fastest case: -pi/4 <= x <= pi/4
5816       ftan();
5817 
5818       jmp(done);
5819     }
5820   }
5821   // slow case: runtime call
5822   bind(slow_case);
5823 
5824   switch(trig) {
5825   case 's':
5826     {
5827       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
5828     }
5829     break;
5830   case 'c':
5831     {
5832       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
5833     }
5834     break;
5835   case 't':
5836     {
5837       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
5838     }
5839     break;
5840   default:
5841     assert(false, "bad intrinsic");
5842     break;
5843   }
5844 
5845   // Come here with result in F-TOS
5846   bind(done);
5847 
5848   if (tmp != noreg) {
5849     pop(tmp);
5850   }
5851 }
5852 
5853 // Look up the method for a megamorphic invokeinterface call.
5854 // The target method is determined by <intf_klass, itable_index>.
5855 // The receiver klass is in recv_klass.
5856 // On success, the result will be in method_result, and execution falls through.
5857 // On failure, execution transfers to the given label.
5858 void MacroAssembler::lookup_interface_method(Register recv_klass,
5859                                              Register intf_klass,
5860                                              RegisterOrConstant itable_index,
5861                                              Register method_result,
5862                                              Register scan_temp,
5863                                              Label& L_no_such_interface) {
5864   assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
5865   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
5866          "caller must use same register for non-constant itable index as for method");
5867 
5868   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
5869   int vtable_base = in_bytes(Klass::vtable_start_offset());
5870   int itentry_off = itableMethodEntry::method_offset_in_bytes();




5604     get_thread(thread);
5605 #endif
5606   }
5607 
5608 #ifdef _LP64
5609   if (var_size_in_bytes->is_valid()) {
5610     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5611   } else {
5612     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5613   }
5614 #else
5615   if (var_size_in_bytes->is_valid()) {
5616     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5617   } else {
5618     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5619   }
5620   adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
5621 #endif
5622 }
5623 
5624 void MacroAssembler::mathfunc(address runtime_entry) {





































































































5625   MacroAssembler::call_VM_leaf_base(runtime_entry, 0);




























































































































5626 }
5627 
5628 // Look up the method for a megamorphic invokeinterface call.
5629 // The target method is determined by <intf_klass, itable_index>.
5630 // The receiver klass is in recv_klass.
5631 // On success, the result will be in method_result, and execution falls through.
5632 // On failure, execution transfers to the given label.
5633 void MacroAssembler::lookup_interface_method(Register recv_klass,
5634                                              Register intf_klass,
5635                                              RegisterOrConstant itable_index,
5636                                              Register method_result,
5637                                              Register scan_temp,
5638                                              Label& L_no_such_interface) {
5639   assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
5640   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
5641          "caller must use same register for non-constant itable index as for method");
5642 
5643   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
5644   int vtable_base = in_bytes(Klass::vtable_start_offset());
5645   int itentry_off = itableMethodEntry::method_offset_in_bytes();


< prev index next >