5604 get_thread(thread);
5605 #endif
5606 }
5607
5608 #ifdef _LP64
5609 if (var_size_in_bytes->is_valid()) {
5610 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5611 } else {
5612 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5613 }
5614 #else
5615 if (var_size_in_bytes->is_valid()) {
5616 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5617 } else {
5618 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5619 }
5620 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
5621 #endif
5622 }
5623
5624 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
5625 pusha();
5626
5627 // if we are coming from c1, xmm registers may be live
5628 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
5629 if (UseAVX > 2) {
5630 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
5631 }
5632
5633 if (UseSSE == 1) {
5634 subptr(rsp, sizeof(jdouble)*8);
5635 for (int n = 0; n < 8; n++) {
5636 movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
5637 }
5638 } else if (UseSSE >= 2) {
5639 if (UseAVX > 2) {
5640 push(rbx);
5641 movl(rbx, 0xffff);
5642 kmovwl(k1, rbx);
5643 pop(rbx);
5644 }
5645 #ifdef COMPILER2
5646 if (MaxVectorSize > 16) {
5647 if(UseAVX > 2) {
5648 // Save upper half of ZMM registers
5649 subptr(rsp, 32*num_xmm_regs);
5650 for (int n = 0; n < num_xmm_regs; n++) {
5651 vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
5652 }
5653 }
5654 assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
5655 // Save upper half of YMM registers
5656 subptr(rsp, 16*num_xmm_regs);
5657 for (int n = 0; n < num_xmm_regs; n++) {
5658 vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
5659 }
5660 }
5661 #endif
5662 // Save whole 128bit (16 bytes) XMM registers
5663 subptr(rsp, 16*num_xmm_regs);
5664 #ifdef _LP64
5665 if (VM_Version::supports_evex()) {
5666 for (int n = 0; n < num_xmm_regs; n++) {
5667 vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
5668 }
5669 } else {
5670 for (int n = 0; n < num_xmm_regs; n++) {
5671 movdqu(Address(rsp, n*16), as_XMMRegister(n));
5672 }
5673 }
5674 #else
5675 for (int n = 0; n < num_xmm_regs; n++) {
5676 movdqu(Address(rsp, n*16), as_XMMRegister(n));
5677 }
5678 #endif
5679 }
5680
5681 // Preserve registers across runtime call
5682 int incoming_argument_and_return_value_offset = -1;
5683 if (num_fpu_regs_in_use > 1) {
5684 // Must preserve all other FPU regs (could alternatively convert
5685 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
5686 // FPU state, but can not trust C compiler)
5687 NEEDS_CLEANUP;
5688 // NOTE that in this case we also push the incoming argument(s) to
5689 // the stack and restore it later; we also use this stack slot to
5690 // hold the return value from dsin, dcos etc.
5691 for (int i = 0; i < num_fpu_regs_in_use; i++) {
5692 subptr(rsp, sizeof(jdouble));
5693 fstp_d(Address(rsp, 0));
5694 }
5695 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
5696 for (int i = nb_args-1; i >= 0; i--) {
5697 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
5698 }
5699 }
5700
5701 subptr(rsp, nb_args*sizeof(jdouble));
5702 for (int i = 0; i < nb_args; i++) {
5703 fstp_d(Address(rsp, i*sizeof(jdouble)));
5704 }
5705
5706 #ifdef _LP64
5707 if (nb_args > 0) {
5708 movdbl(xmm0, Address(rsp, 0));
5709 }
5710 if (nb_args > 1) {
5711 movdbl(xmm1, Address(rsp, sizeof(jdouble)));
5712 }
5713 assert(nb_args <= 2, "unsupported number of args");
5714 #endif // _LP64
5715
5716 // NOTE: we must not use call_VM_leaf here because that requires a
5717 // complete interpreter frame in debug mode -- same bug as 4387334
5718 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
5719 // do proper 64bit abi
5720
5721 NEEDS_CLEANUP;
5722 // Need to add stack banging before this runtime call if it needs to
5723 // be taken; however, there is no generic stack banging routine at
5724 // the MacroAssembler level
5725
5726 MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
5727
5728 #ifdef _LP64
5729 movsd(Address(rsp, 0), xmm0);
5730 fld_d(Address(rsp, 0));
5731 #endif // _LP64
5732 addptr(rsp, sizeof(jdouble)*nb_args);
5733 if (num_fpu_regs_in_use > 1) {
5734 // Must save return value to stack and then restore entire FPU
5735 // stack except incoming arguments
5736 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
5737 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
5738 fld_d(Address(rsp, 0));
5739 addptr(rsp, sizeof(jdouble));
5740 }
5741 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
5742 addptr(rsp, sizeof(jdouble)*nb_args);
5743 }
5744
5745 if (UseSSE == 1) {
5746 for (int n = 0; n < 8; n++) {
5747 movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
5748 }
5749 addptr(rsp, sizeof(jdouble)*8);
5750 } else if (UseSSE >= 2) {
5751 // Restore whole 128bit (16 bytes) XMM registers
5752 #ifdef _LP64
5753 if (VM_Version::supports_evex()) {
5754 for (int n = 0; n < num_xmm_regs; n++) {
5755 vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
5756 }
5757 } else {
5758 for (int n = 0; n < num_xmm_regs; n++) {
5759 movdqu(as_XMMRegister(n), Address(rsp, n*16));
5760 }
5761 }
5762 #else
5763 for (int n = 0; n < num_xmm_regs; n++) {
5764 movdqu(as_XMMRegister(n), Address(rsp, n*16));
5765 }
5766 #endif
5767 addptr(rsp, 16*num_xmm_regs);
5768
5769 #ifdef COMPILER2
5770 if (MaxVectorSize > 16) {
5771 // Restore upper half of YMM registers.
5772 for (int n = 0; n < num_xmm_regs; n++) {
5773 vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
5774 }
5775 addptr(rsp, 16*num_xmm_regs);
5776 if(UseAVX > 2) {
5777 for (int n = 0; n < num_xmm_regs; n++) {
5778 vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
5779 }
5780 addptr(rsp, 32*num_xmm_regs);
5781 }
5782 }
5783 #endif
5784 }
5785 popa();
5786 }
5787
5788 static const double pi_4 = 0.7853981633974483;
5789
5790 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
5791 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
5792 // was attempted in this code; unfortunately it appears that the
5793 // switch to 80-bit precision and back causes this to be
5794 // unprofitable compared with simply performing a runtime call if
5795 // the argument is out of the (-pi/4, pi/4) range.
5796
5797 Register tmp = noreg;
5798 if (!VM_Version::supports_cmov()) {
5799 // fcmp needs a temporary so preserve rbx,
5800 tmp = rbx;
5801 push(tmp);
5802 }
5803
5804 Label slow_case, done;
5805 if (trig == 't') {
5806 ExternalAddress pi4_adr = (address)&pi_4;
5807 if (reachable(pi4_adr)) {
5808 // x ?<= pi/4
5809 fld_d(pi4_adr);
5810 fld_s(1); // Stack: X PI/4 X
5811 fabs(); // Stack: |X| PI/4 X
5812 fcmp(tmp);
5813 jcc(Assembler::above, slow_case);
5814
5815 // fastest case: -pi/4 <= x <= pi/4
5816 ftan();
5817
5818 jmp(done);
5819 }
5820 }
5821 // slow case: runtime call
5822 bind(slow_case);
5823
5824 switch(trig) {
5825 case 's':
5826 {
5827 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
5828 }
5829 break;
5830 case 'c':
5831 {
5832 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
5833 }
5834 break;
5835 case 't':
5836 {
5837 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
5838 }
5839 break;
5840 default:
5841 assert(false, "bad intrinsic");
5842 break;
5843 }
5844
5845 // Come here with result in F-TOS
5846 bind(done);
5847
5848 if (tmp != noreg) {
5849 pop(tmp);
5850 }
5851 }
5852
5853 // Look up the method for a megamorphic invokeinterface call.
5854 // The target method is determined by <intf_klass, itable_index>.
5855 // The receiver klass is in recv_klass.
5856 // On success, the result will be in method_result, and execution falls through.
5857 // On failure, execution transfers to the given label.
5858 void MacroAssembler::lookup_interface_method(Register recv_klass,
5859 Register intf_klass,
5860 RegisterOrConstant itable_index,
5861 Register method_result,
5862 Register scan_temp,
5863 Label& L_no_such_interface) {
5864 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
5865 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
5866 "caller must use same register for non-constant itable index as for method");
5867
5868 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
5869 int vtable_base = in_bytes(Klass::vtable_start_offset());
5870 int itentry_off = itableMethodEntry::method_offset_in_bytes();
|
5604 get_thread(thread);
5605 #endif
5606 }
5607
5608 #ifdef _LP64
5609 if (var_size_in_bytes->is_valid()) {
5610 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5611 } else {
5612 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5613 }
5614 #else
5615 if (var_size_in_bytes->is_valid()) {
5616 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5617 } else {
5618 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5619 }
5620 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
5621 #endif
5622 }
5623
5624 void MacroAssembler::mathfunc(address runtime_entry) {
5625 MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
5626 }
5627
5628 // Look up the method for a megamorphic invokeinterface call.
5629 // The target method is determined by <intf_klass, itable_index>.
5630 // The receiver klass is in recv_klass.
5631 // On success, the result will be in method_result, and execution falls through.
5632 // On failure, execution transfers to the given label.
5633 void MacroAssembler::lookup_interface_method(Register recv_klass,
5634 Register intf_klass,
5635 RegisterOrConstant itable_index,
5636 Register method_result,
5637 Register scan_temp,
5638 Label& L_no_such_interface) {
5639 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
5640 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
5641 "caller must use same register for non-constant itable index as for method");
5642
5643 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
5644 int vtable_base = in_bytes(Klass::vtable_start_offset());
5645 int itentry_off = itableMethodEntry::method_offset_in_bytes();
|