303
304 // tell the simulator we have returned to the stub
305
306 // we do this here because the notify will already have been done
307 // if we get to the next instruction via an exception
308 //
309 // n.b. adding this instruction here affects the calculation of
310 // whether or not a routine returns to the call stub (used when
311 // doing stack walks) since the normal test is to check the return
312 // pc against the address saved below. so we may need to allow for
313 // this extra instruction in the check.
314
315 if (NotifySimulator) {
316 __ notify(Assembler::method_reentry);
317 }
318 // save current address for use by exception handling code
319
320 return_address = __ pc();
321
322 // store result depending on type (everything that is not
323 // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
324 // n.b. this assumes Java returns an integral result in r0
325 // and a floating result in j_farg0
326 __ ldr(j_rarg2, result);
327 Label is_long, is_float, is_double, exit;
328 __ ldr(j_rarg1, result_type);
329 __ cmp(j_rarg1, (u1)T_OBJECT);
330 __ br(Assembler::EQ, is_long);
331 __ cmp(j_rarg1, (u1)T_LONG);
332 __ br(Assembler::EQ, is_long);
333 __ cmp(j_rarg1, (u1)T_FLOAT);
334 __ br(Assembler::EQ, is_float);
335 __ cmp(j_rarg1, (u1)T_DOUBLE);
336 __ br(Assembler::EQ, is_double);
337
338 // handle T_INT case
339 __ strw(r0, Address(j_rarg2));
340
341 __ BIND(exit);
342
343 // pop parameters
344 __ sub(esp, rfp, -sp_after_call_off * wordSize);
345
346 #ifdef ASSERT
347 // verify that threads correspond
348 {
349 Label L, S;
350 __ ldr(rscratch1, thread);
1812 bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
1813
1814 // save the original count
1815 __ mov(count_save, count);
1816
1817 // Copy from low to high addresses
1818 __ mov(start_to, to); // Save destination array start address
1819 __ b(L_load_element);
1820
1821 // ======== begin loop ========
1822 // (Loop is rotated; its entry is L_load_element.)
1823 // Loop control:
1824 // for (; count != 0; count--) {
1825 // copied_oop = load_heap_oop(from++);
1826 // ... generate_type_check ...;
1827 // store_heap_oop(to++, copied_oop);
1828 // }
1829 __ align(OptoLoopAlignment);
1830
1831 __ BIND(L_store_element);
1832 __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop, noreg, noreg, AS_RAW); // store the oop
1833 __ sub(count, count, 1);
1834 __ cbz(count, L_do_card_marks);
1835
1836 // ======== loop entry is here ========
1837 __ BIND(L_load_element);
1838 __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8), noreg, noreg, AS_RAW); // load the oop
1839 __ cbz(copied_oop, L_store_element);
1840
1841 __ load_klass(r19_klass, copied_oop);// query the object klass
1842 generate_type_check(r19_klass, ckoff, ckval, L_store_element);
1843 // ======== end loop ========
1844
1845 // It was a real error; we must depend on the caller to finish the job.
1846 // Register count = remaining oops, count_orig = total oops.
1847 // Emit GC store barriers for the oops we have copied and report
1848 // their number to the caller.
1849
1850 __ subs(count, count_save, count); // K = partially copied oop count
1851 __ eon(count, count, zr); // report (-1^K) to caller
1852 __ br(Assembler::EQ, L_done_pop);
5633 // MACC(Ra, Ra, t0, t1, t2);
5634 // }
5635 // iters = (2*len-i)/2;
5636 // assert(iters == len-j, "must be");
5637 // for (; iters--; j++) {
5638 // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
5639 // MACC(Rm, Rn, t0, t1, t2);
5640 // Rm = *++Pm;
5641 // Rn = *--Pn;
5642 // }
5643 // Pm_base[i-len] = t0;
5644 // t0 = t1; t1 = t2; t2 = 0;
5645 // }
5646
5647 // while (t0)
5648 // t0 = sub(Pm_base, Pn_base, t0, len);
5649 // }
5650 };
5651
5652
5653 // Initialization
5654 void generate_initial() {
5655 // Generate initial stubs and initializes the entry points
5656
5657 // entry points that exist in all platforms Note: This is code
5658 // that could be shared among different platforms - however the
5659 // benefit seems to be smaller than the disadvantage of having a
5660 // much more complicated generator structure. See also comment in
5661 // stubRoutines.hpp.
5662
5663 StubRoutines::_forward_exception_entry = generate_forward_exception();
5664
5665 StubRoutines::_call_stub_entry =
5666 generate_call_stub(StubRoutines::_call_stub_return_address);
5667
5668 // is referenced by megamorphic call
5669 StubRoutines::_catch_exception_entry = generate_catch_exception();
5670
5671 // Build this early so it's available for the interpreter.
5672 StubRoutines::_throw_StackOverflowError_entry =
5682 StubRoutines::_crc_table_adr = (address)StubRoutines::aarch64::_crc_table;
5683 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5684 }
5685
5686 if (UseCRC32CIntrinsics) {
5687 StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
5688 }
5689
5690 // Disabled until JDK-8210858 is fixed
5691 // if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) {
5692 // StubRoutines::_dlog = generate_dlog();
5693 // }
5694
5695 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
5696 StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false);
5697 }
5698
5699 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
5700 StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
5701 }
5702 }
5703
5704 void generate_all() {
5705 // support for verify_oop (must happen after universe_init)
5706 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
5707 StubRoutines::_throw_AbstractMethodError_entry =
5708 generate_throw_exception("AbstractMethodError throw_exception",
5709 CAST_FROM_FN_PTR(address,
5710 SharedRuntime::
5711 throw_AbstractMethodError));
5712
5713 StubRoutines::_throw_IncompatibleClassChangeError_entry =
5714 generate_throw_exception("IncompatibleClassChangeError throw_exception",
5715 CAST_FROM_FN_PTR(address,
5716 SharedRuntime::
5717 throw_IncompatibleClassChangeError));
5718
5719 StubRoutines::_throw_NullPointerException_at_call_entry =
5720 generate_throw_exception("NullPointerException at call throw_exception",
5721 CAST_FROM_FN_PTR(address,
|
303
304 // tell the simulator we have returned to the stub
305
306 // we do this here because the notify will already have been done
307 // if we get to the next instruction via an exception
308 //
309 // n.b. adding this instruction here affects the calculation of
310 // whether or not a routine returns to the call stub (used when
311 // doing stack walks) since the normal test is to check the return
312 // pc against the address saved below. so we may need to allow for
313 // this extra instruction in the check.
314
315 if (NotifySimulator) {
316 __ notify(Assembler::method_reentry);
317 }
318 // save current address for use by exception handling code
319
320 return_address = __ pc();
321
322 // store result depending on type (everything that is not
323 // T_OBJECT, T_VALUETYPE, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
324 // n.b. this assumes Java returns an integral result in r0
325 // and a floating result in j_farg0
326 __ ldr(j_rarg2, result);
327 Label is_long, is_float, is_double, exit;
328 __ ldr(j_rarg1, result_type);
329 __ cmp(j_rarg1, (u1)T_OBJECT);
330 __ br(Assembler::EQ, is_long);
331 __ cmp(j_rarg1, (u1)T_VALUETYPE);
332 __ br(Assembler::EQ, is_long);
333 __ cmp(j_rarg1, (u1)T_LONG);
334 __ br(Assembler::EQ, is_long);
335 __ cmp(j_rarg1, (u1)T_FLOAT);
336 __ br(Assembler::EQ, is_float);
337 __ cmp(j_rarg1, (u1)T_DOUBLE);
338 __ br(Assembler::EQ, is_double);
339
340 // handle T_INT case
341 __ strw(r0, Address(j_rarg2));
342
343 __ BIND(exit);
344
345 // pop parameters
346 __ sub(esp, rfp, -sp_after_call_off * wordSize);
347
348 #ifdef ASSERT
349 // verify that threads correspond
350 {
351 Label L, S;
352 __ ldr(rscratch1, thread);
1814 bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
1815
1816 // save the original count
1817 __ mov(count_save, count);
1818
1819 // Copy from low to high addresses
1820 __ mov(start_to, to); // Save destination array start address
1821 __ b(L_load_element);
1822
1823 // ======== begin loop ========
1824 // (Loop is rotated; its entry is L_load_element.)
1825 // Loop control:
1826 // for (; count != 0; count--) {
1827 // copied_oop = load_heap_oop(from++);
1828 // ... generate_type_check ...;
1829 // store_heap_oop(to++, copied_oop);
1830 // }
1831 __ align(OptoLoopAlignment);
1832
1833 __ BIND(L_store_element);
1834 __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop
1835 __ sub(count, count, 1);
1836 __ cbz(count, L_do_card_marks);
1837
1838 // ======== loop entry is here ========
1839 __ BIND(L_load_element);
1840 __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8), noreg, noreg, AS_RAW); // load the oop
1841 __ cbz(copied_oop, L_store_element);
1842
1843 __ load_klass(r19_klass, copied_oop);// query the object klass
1844 generate_type_check(r19_klass, ckoff, ckval, L_store_element);
1845 // ======== end loop ========
1846
1847 // It was a real error; we must depend on the caller to finish the job.
1848 // Register count = remaining oops, count_orig = total oops.
1849 // Emit GC store barriers for the oops we have copied and report
1850 // their number to the caller.
1851
1852 __ subs(count, count_save, count); // K = partially copied oop count
1853 __ eon(count, count, zr); // report (-1^K) to caller
1854 __ br(Assembler::EQ, L_done_pop);
5635 // MACC(Ra, Ra, t0, t1, t2);
5636 // }
5637 // iters = (2*len-i)/2;
5638 // assert(iters == len-j, "must be");
5639 // for (; iters--; j++) {
5640 // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
5641 // MACC(Rm, Rn, t0, t1, t2);
5642 // Rm = *++Pm;
5643 // Rn = *--Pn;
5644 // }
5645 // Pm_base[i-len] = t0;
5646 // t0 = t1; t1 = t2; t2 = 0;
5647 // }
5648
5649 // while (t0)
5650 // t0 = sub(Pm_base, Pn_base, t0, len);
5651 // }
5652 };
5653
5654
5655 // Call here from the interpreter or compiled code to either load
5656 // multiple returned values from the value type instance being
5657 // returned to registers or to store returned values to a newly
5658 // allocated value type instance.
5659 address generate_return_value_stub(address destination, const char* name, bool has_res) {
5660
5661 // Information about frame layout at time of blocking runtime call.
5662 // Note that we only have to preserve callee-saved registers since
5663 // the compilers are responsible for supplying a continuation point
5664 // if they expect all registers to be preserved.
5665 // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0
5666 enum layout {
5667 rfp_off = 0, rfp_off2,
5668
5669 j_rarg7_off, j_rarg7_2,
5670 j_rarg6_off, j_rarg6_2,
5671 j_rarg5_off, j_rarg5_2,
5672 j_rarg4_off, j_rarg4_2,
5673 j_rarg3_off, j_rarg3_2,
5674 j_rarg2_off, j_rarg2_2,
5675 j_rarg1_off, j_rarg1_2,
5676 j_rarg0_off, j_rarg0_2,
5677
5678 j_farg0_off, j_farg0_2,
5679 j_farg1_off, j_farg1_2,
5680 j_farg2_off, j_farg2_2,
5681 j_farg3_off, j_farg3_2,
5682 j_farg4_off, j_farg4_2,
5683 j_farg5_off, j_farg5_2,
5684 j_farg6_off, j_farg6_2,
5685 j_farg7_off, j_farg7_2,
5686
5687 return_off, return_off2,
5688 framesize // inclusive of return address
5689 };
5690
5691 int insts_size = 512;
5692 int locs_size = 64;
5693
5694 CodeBuffer code(name, insts_size, locs_size);
5695 OopMapSet* oop_maps = new OopMapSet();
5696 MacroAssembler* masm = new MacroAssembler(&code);
5697
5698 address start = __ pc();
5699
5700 const Address f7_save (rfp, j_farg7_off * wordSize);
5701 const Address f6_save (rfp, j_farg6_off * wordSize);
5702 const Address f5_save (rfp, j_farg5_off * wordSize);
5703 const Address f4_save (rfp, j_farg4_off * wordSize);
5704 const Address f3_save (rfp, j_farg3_off * wordSize);
5705 const Address f2_save (rfp, j_farg2_off * wordSize);
5706 const Address f1_save (rfp, j_farg1_off * wordSize);
5707 const Address f0_save (rfp, j_farg0_off * wordSize);
5708
5709 const Address r0_save (rfp, j_rarg0_off * wordSize);
5710 const Address r1_save (rfp, j_rarg1_off * wordSize);
5711 const Address r2_save (rfp, j_rarg2_off * wordSize);
5712 const Address r3_save (rfp, j_rarg3_off * wordSize);
5713 const Address r4_save (rfp, j_rarg4_off * wordSize);
5714 const Address r5_save (rfp, j_rarg5_off * wordSize);
5715 const Address r6_save (rfp, j_rarg6_off * wordSize);
5716 const Address r7_save (rfp, j_rarg7_off * wordSize);
5717
5718 // Generate oop map
5719 OopMap* map = new OopMap(framesize, 0);
5720
5721 map->set_callee_saved(VMRegImpl::stack2reg(rfp_off), rfp->as_VMReg());
5722 map->set_callee_saved(VMRegImpl::stack2reg(j_rarg7_off), j_rarg7->as_VMReg());
5723 map->set_callee_saved(VMRegImpl::stack2reg(j_rarg6_off), j_rarg6->as_VMReg());
5724 map->set_callee_saved(VMRegImpl::stack2reg(j_rarg5_off), j_rarg5->as_VMReg());
5725 map->set_callee_saved(VMRegImpl::stack2reg(j_rarg4_off), j_rarg4->as_VMReg());
5726 map->set_callee_saved(VMRegImpl::stack2reg(j_rarg3_off), j_rarg3->as_VMReg());
5727 map->set_callee_saved(VMRegImpl::stack2reg(j_rarg2_off), j_rarg2->as_VMReg());
5728 map->set_callee_saved(VMRegImpl::stack2reg(j_rarg1_off), j_rarg1->as_VMReg());
5729 map->set_callee_saved(VMRegImpl::stack2reg(j_rarg0_off), j_rarg0->as_VMReg());
5730
5731 map->set_callee_saved(VMRegImpl::stack2reg(j_farg0_off), j_farg0->as_VMReg());
5732 map->set_callee_saved(VMRegImpl::stack2reg(j_farg1_off), j_farg1->as_VMReg());
5733 map->set_callee_saved(VMRegImpl::stack2reg(j_farg2_off), j_farg2->as_VMReg());
5734 map->set_callee_saved(VMRegImpl::stack2reg(j_farg3_off), j_farg3->as_VMReg());
5735 map->set_callee_saved(VMRegImpl::stack2reg(j_farg4_off), j_farg4->as_VMReg());
5736 map->set_callee_saved(VMRegImpl::stack2reg(j_farg5_off), j_farg5->as_VMReg());
5737 map->set_callee_saved(VMRegImpl::stack2reg(j_farg6_off), j_farg6->as_VMReg());
5738 map->set_callee_saved(VMRegImpl::stack2reg(j_farg7_off), j_farg7->as_VMReg());
5739
5740 // This is an inlined and slightly modified version of call_VM
5741 // which has the ability to fetch the return PC out of
5742 // thread-local storage and also sets up last_Java_sp slightly
5743 // differently than the real call_VM
5744
5745 __ enter(); // Save FP and LR before call
5746
5747 assert(is_even(framesize/2), "sp not 16-byte aligned");
5748
5749 // lr and fp are already in place
5750 __ sub(sp, rfp, ((unsigned)framesize - 4) << LogBytesPerInt); // prolog
5751
5752 __ strd(j_farg7, f7_save);
5753 __ strd(j_farg6, f6_save);
5754 __ strd(j_farg5, f5_save);
5755 __ strd(j_farg4, f4_save);
5756 __ strd(j_farg3, f3_save);
5757 __ strd(j_farg2, f2_save);
5758 __ strd(j_farg1, f1_save);
5759 __ strd(j_farg0, f0_save);
5760
5761 __ str(j_rarg0, r0_save);
5762 __ str(j_rarg1, r1_save);
5763 __ str(j_rarg2, r2_save);
5764 __ str(j_rarg3, r3_save);
5765 __ str(j_rarg4, r4_save);
5766 __ str(j_rarg5, r5_save);
5767 __ str(j_rarg6, r6_save);
5768 __ str(j_rarg7, r7_save);
5769
5770 int frame_complete = __ pc() - start;
5771
5772 // Set up last_Java_sp and last_Java_fp
5773 address the_pc = __ pc();
5774 __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
5775
5776 // Call runtime
5777 __ mov(c_rarg0, rthread);
5778 __ mov(c_rarg1, r0);
5779
5780 BLOCK_COMMENT("call runtime_entry");
5781 __ mov(rscratch1, destination);
5782 __ blrt(rscratch1, 2 /* number_of_arguments */, 0, 1);
5783
5784 oop_maps->add_gc_map(the_pc - start, map);
5785
5786 __ reset_last_Java_frame(false);
5787 __ maybe_isb();
5788
5789 __ ldrd(j_farg7, f7_save);
5790 __ ldrd(j_farg6, f6_save);
5791 __ ldrd(j_farg5, f5_save);
5792 __ ldrd(j_farg4, f4_save);
5793 __ ldrd(j_farg3, f3_save);
5794 __ ldrd(j_farg3, f2_save);
5795 __ ldrd(j_farg1, f1_save);
5796 __ ldrd(j_farg0, f0_save);
5797
5798 __ ldr(j_rarg0, r0_save);
5799 __ ldr(j_rarg1, r1_save);
5800 __ ldr(j_rarg2, r2_save);
5801 __ ldr(j_rarg3, r3_save);
5802 __ ldr(j_rarg4, r4_save);
5803 __ ldr(j_rarg5, r5_save);
5804 __ ldr(j_rarg6, r6_save);
5805 __ ldr(j_rarg7, r7_save);
5806
5807 __ leave();
5808
5809 // check for pending exceptions
5810 Label pending;
5811 __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
5812 __ cmp(rscratch1, (u1)NULL_WORD);
5813 __ br(Assembler::NE, pending);
5814
5815 if (has_res) {
5816 __ get_vm_result(r0, rthread);
5817 }
5818 __ ret(lr);
5819
5820 __ bind(pending);
5821 __ ldr(r0, Address(rthread, in_bytes(Thread::pending_exception_offset())));
5822 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5823
5824
5825 // codeBlob framesize is in words (not VMRegImpl::slot_size)
5826 int frame_size_in_words = (framesize >> (LogBytesPerWord - LogBytesPerInt));
5827 RuntimeStub* stub =
5828 RuntimeStub::new_runtime_stub(name, &code, frame_complete, frame_size_in_words, oop_maps, false);
5829
5830 return stub->entry_point();
5831 }
5832
5833 // Initialization
5834 void generate_initial() {
5835 // Generate initial stubs and initializes the entry points
5836
5837 // entry points that exist in all platforms Note: This is code
5838 // that could be shared among different platforms - however the
5839 // benefit seems to be smaller than the disadvantage of having a
5840 // much more complicated generator structure. See also comment in
5841 // stubRoutines.hpp.
5842
5843 StubRoutines::_forward_exception_entry = generate_forward_exception();
5844
5845 StubRoutines::_call_stub_entry =
5846 generate_call_stub(StubRoutines::_call_stub_return_address);
5847
5848 // is referenced by megamorphic call
5849 StubRoutines::_catch_exception_entry = generate_catch_exception();
5850
5851 // Build this early so it's available for the interpreter.
5852 StubRoutines::_throw_StackOverflowError_entry =
5862 StubRoutines::_crc_table_adr = (address)StubRoutines::aarch64::_crc_table;
5863 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5864 }
5865
5866 if (UseCRC32CIntrinsics) {
5867 StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
5868 }
5869
5870 // Disabled until JDK-8210858 is fixed
5871 // if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) {
5872 // StubRoutines::_dlog = generate_dlog();
5873 // }
5874
5875 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
5876 StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false);
5877 }
5878
5879 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
5880 StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
5881 }
5882
5883
5884 StubRoutines::_load_value_type_fields_in_regs =
5885 generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::load_value_type_fields_in_regs), "load_value_type_fields_in_regs", false);
5886 StubRoutines::_store_value_type_fields_to_buf =
5887 generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::store_value_type_fields_to_buf), "store_value_type_fields_to_buf", true);
5888 }
5889
5890 void generate_all() {
5891 // support for verify_oop (must happen after universe_init)
5892 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
5893 StubRoutines::_throw_AbstractMethodError_entry =
5894 generate_throw_exception("AbstractMethodError throw_exception",
5895 CAST_FROM_FN_PTR(address,
5896 SharedRuntime::
5897 throw_AbstractMethodError));
5898
5899 StubRoutines::_throw_IncompatibleClassChangeError_entry =
5900 generate_throw_exception("IncompatibleClassChangeError throw_exception",
5901 CAST_FROM_FN_PTR(address,
5902 SharedRuntime::
5903 throw_IncompatibleClassChangeError));
5904
5905 StubRoutines::_throw_NullPointerException_at_call_entry =
5906 generate_throw_exception("NullPointerException at call throw_exception",
5907 CAST_FROM_FN_PTR(address,
|