5612 get_thread(thread);
5613 #endif
5614 }
5615
5616 #ifdef _LP64
5617 if (var_size_in_bytes->is_valid()) {
5618 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5619 } else {
5620 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5621 }
5622 #else
5623 if (var_size_in_bytes->is_valid()) {
5624 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5625 } else {
5626 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5627 }
5628 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
5629 #endif
5630 }
5631
5632 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
5633 pusha();
5634
5635 // if we are coming from c1, xmm registers may be live
5636 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
5637 if (UseAVX > 2) {
5638 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
5639 }
5640
5641 if (UseSSE == 1) {
5642 subptr(rsp, sizeof(jdouble)*8);
5643 for (int n = 0; n < 8; n++) {
5644 movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
5645 }
5646 } else if (UseSSE >= 2) {
5647 if (UseAVX > 2) {
5648 push(rbx);
5649 movl(rbx, 0xffff);
5650 kmovwl(k1, rbx);
5651 pop(rbx);
5652 }
5653 #ifdef COMPILER2
5654 if (MaxVectorSize > 16) {
5655 if(UseAVX > 2) {
5656 // Save upper half of ZMM registers
5657 subptr(rsp, 32*num_xmm_regs);
5658 for (int n = 0; n < num_xmm_regs; n++) {
5659 vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
5660 }
5661 }
5662 assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
5663 // Save upper half of YMM registers
5664 subptr(rsp, 16*num_xmm_regs);
5665 for (int n = 0; n < num_xmm_regs; n++) {
5666 vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
5667 }
5668 }
5669 #endif
5670 // Save whole 128bit (16 bytes) XMM registers
5671 subptr(rsp, 16*num_xmm_regs);
5672 #ifdef _LP64
5673 if (VM_Version::supports_evex()) {
5674 for (int n = 0; n < num_xmm_regs; n++) {
5675 vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
5676 }
5677 } else {
5678 for (int n = 0; n < num_xmm_regs; n++) {
5679 movdqu(Address(rsp, n*16), as_XMMRegister(n));
5680 }
5681 }
5682 #else
5683 for (int n = 0; n < num_xmm_regs; n++) {
5684 movdqu(Address(rsp, n*16), as_XMMRegister(n));
5685 }
5686 #endif
5687 }
5688
5689 // Preserve registers across runtime call
5690 int incoming_argument_and_return_value_offset = -1;
5691 if (num_fpu_regs_in_use > 1) {
5692 // Must preserve all other FPU regs (could alternatively convert
5693 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
5694 // FPU state, but can not trust C compiler)
5695 NEEDS_CLEANUP;
5696 // NOTE that in this case we also push the incoming argument(s) to
5697 // the stack and restore it later; we also use this stack slot to
5698 // hold the return value from dsin, dcos etc.
5699 for (int i = 0; i < num_fpu_regs_in_use; i++) {
5700 subptr(rsp, sizeof(jdouble));
5701 fstp_d(Address(rsp, 0));
5702 }
5703 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
5704 for (int i = nb_args-1; i >= 0; i--) {
5705 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
5706 }
5707 }
5708
5709 subptr(rsp, nb_args*sizeof(jdouble));
5710 for (int i = 0; i < nb_args; i++) {
5711 fstp_d(Address(rsp, i*sizeof(jdouble)));
5712 }
5713
5714 #ifdef _LP64
5715 if (nb_args > 0) {
5716 movdbl(xmm0, Address(rsp, 0));
5717 }
5718 if (nb_args > 1) {
5719 movdbl(xmm1, Address(rsp, sizeof(jdouble)));
5720 }
5721 assert(nb_args <= 2, "unsupported number of args");
5722 #endif // _LP64
5723
5724 // NOTE: we must not use call_VM_leaf here because that requires a
5725 // complete interpreter frame in debug mode -- same bug as 4387334
5726 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
5727 // do proper 64bit abi
5728
5729 NEEDS_CLEANUP;
5730 // Need to add stack banging before this runtime call if it needs to
5731 // be taken; however, there is no generic stack banging routine at
5732 // the MacroAssembler level
5733
5734 MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
5735
5736 #ifdef _LP64
5737 movsd(Address(rsp, 0), xmm0);
5738 fld_d(Address(rsp, 0));
5739 #endif // _LP64
5740 addptr(rsp, sizeof(jdouble)*nb_args);
5741 if (num_fpu_regs_in_use > 1) {
5742 // Must save return value to stack and then restore entire FPU
5743 // stack except incoming arguments
5744 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
5745 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
5746 fld_d(Address(rsp, 0));
5747 addptr(rsp, sizeof(jdouble));
5748 }
5749 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
5750 addptr(rsp, sizeof(jdouble)*nb_args);
5751 }
5752
5753 if (UseSSE == 1) {
5754 for (int n = 0; n < 8; n++) {
5755 movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
5756 }
5757 addptr(rsp, sizeof(jdouble)*8);
5758 } else if (UseSSE >= 2) {
5759 // Restore whole 128bit (16 bytes) XMM registers
5760 #ifdef _LP64
5761 if (VM_Version::supports_evex()) {
5762 for (int n = 0; n < num_xmm_regs; n++) {
5763 vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
5764 }
5765 } else {
5766 for (int n = 0; n < num_xmm_regs; n++) {
5767 movdqu(as_XMMRegister(n), Address(rsp, n*16));
5768 }
5769 }
5770 #else
5771 for (int n = 0; n < num_xmm_regs; n++) {
5772 movdqu(as_XMMRegister(n), Address(rsp, n*16));
5773 }
5774 #endif
5775 addptr(rsp, 16*num_xmm_regs);
5776
5777 #ifdef COMPILER2
5778 if (MaxVectorSize > 16) {
5779 // Restore upper half of YMM registers.
5780 for (int n = 0; n < num_xmm_regs; n++) {
5781 vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
5782 }
5783 addptr(rsp, 16*num_xmm_regs);
5784 if(UseAVX > 2) {
5785 for (int n = 0; n < num_xmm_regs; n++) {
5786 vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
5787 }
5788 addptr(rsp, 32*num_xmm_regs);
5789 }
5790 }
5791 #endif
5792 }
5793 popa();
5794 }
5795
5796 static const double pi_4 = 0.7853981633974483;
5797
5798 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
5799 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
5800 // was attempted in this code; unfortunately it appears that the
5801 // switch to 80-bit precision and back causes this to be
5802 // unprofitable compared with simply performing a runtime call if
5803 // the argument is out of the (-pi/4, pi/4) range.
5804
5805 Register tmp = noreg;
5806 if (!VM_Version::supports_cmov()) {
5807 // fcmp needs a temporary so preserve rbx,
5808 tmp = rbx;
5809 push(tmp);
5810 }
5811
5812 Label slow_case, done;
5813 if (trig == 't') {
5814 ExternalAddress pi4_adr = (address)&pi_4;
5815 if (reachable(pi4_adr)) {
5816 // x ?<= pi/4
5817 fld_d(pi4_adr);
5818 fld_s(1); // Stack: X PI/4 X
5819 fabs(); // Stack: |X| PI/4 X
5820 fcmp(tmp);
5821 jcc(Assembler::above, slow_case);
5822
5823 // fastest case: -pi/4 <= x <= pi/4
5824 ftan();
5825
5826 jmp(done);
5827 }
5828 }
5829 // slow case: runtime call
5830 bind(slow_case);
5831
5832 switch(trig) {
5833 case 's':
5834 {
5835 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
5836 }
5837 break;
5838 case 'c':
5839 {
5840 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
5841 }
5842 break;
5843 case 't':
5844 {
5845 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
5846 }
5847 break;
5848 default:
5849 assert(false, "bad intrinsic");
5850 break;
5851 }
5852
5853 // Come here with result in F-TOS
5854 bind(done);
5855
5856 if (tmp != noreg) {
5857 pop(tmp);
5858 }
5859 }
5860
5861 // Look up the method for a megamorphic invokeinterface call.
5862 // The target method is determined by <intf_klass, itable_index>.
5863 // The receiver klass is in recv_klass.
5864 // On success, the result will be in method_result, and execution falls through.
5865 // On failure, execution transfers to the given label.
5866 void MacroAssembler::lookup_interface_method(Register recv_klass,
5867 Register intf_klass,
5868 RegisterOrConstant itable_index,
5869 Register method_result,
5870 Register scan_temp,
5871 Label& L_no_such_interface) {
5872 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
5873 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
5874 "caller must use same register for non-constant itable index as for method");
5875
5876 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
5877 int vtable_base = in_bytes(Klass::vtable_start_offset());
5878 int itentry_off = itableMethodEntry::method_offset_in_bytes();
|
5612 get_thread(thread);
5613 #endif
5614 }
5615
5616 #ifdef _LP64
5617 if (var_size_in_bytes->is_valid()) {
5618 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5619 } else {
5620 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5621 }
5622 #else
5623 if (var_size_in_bytes->is_valid()) {
5624 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
5625 } else {
5626 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
5627 }
5628 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
5629 #endif
5630 }
5631
5632 void MacroAssembler::mathfunc(address runtime_entry) {
5633 MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
5634 }
5635
5636 // Look up the method for a megamorphic invokeinterface call.
5637 // The target method is determined by <intf_klass, itable_index>.
5638 // The receiver klass is in recv_klass.
5639 // On success, the result will be in method_result, and execution falls through.
5640 // On failure, execution transfers to the given label.
5641 void MacroAssembler::lookup_interface_method(Register recv_klass,
5642 Register intf_klass,
5643 RegisterOrConstant itable_index,
5644 Register method_result,
5645 Register scan_temp,
5646 Label& L_no_such_interface) {
5647 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
5648 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
5649 "caller must use same register for non-constant itable index as for method");
5650
5651 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
5652 int vtable_base = in_bytes(Klass::vtable_start_offset());
5653 int itentry_off = itableMethodEntry::method_offset_in_bytes();
|