src/cpu/aarch64/vm/aarch64.ad

Print this page




 768  public:
 769   // Size of call trampoline stub.
 770   static uint size_call_trampoline() {
 771     return 0; // no call trampolines on this platform
 772   }
 773   
 774   // number of relocations needed by a call trampoline stub
 775   static uint reloc_call_trampoline() { 
 776     return 0; // no call trampolines on this platform
 777   }
 778 };
 779 
 780 class HandlerImpl {
 781 
 782  public:
 783 
 784   static int emit_exception_handler(CodeBuffer &cbuf);
 785   static int emit_deopt_handler(CodeBuffer& cbuf);
 786 
 787   static uint size_exception_handler() {
 788     // count up to 4 movz/n/k instructions and one branch instruction
 789     return 5 * NativeInstruction::instruction_size;
 790   }
 791 
 792   static uint size_deopt_handler() {
 793     // count one adr and one branch instruction
 794     return 2 * NativeInstruction::instruction_size;
 795   }
 796 };
 797 
 798   bool preceded_by_ordered_load(const Node *barrier);
 799 
 800 %}
 801 
 802 source %{
 803 
 804   // AArch64 has load acquire and store release instructions which we
 805   // use for ordered memory accesses, e.g. for volatiles.  The ideal
 806   // graph generator also inserts memory barriers around volatile
 807   // accesses, and we don't want to generate both barriers and acq/rel
 808   // instructions.  So, when we emit a MemBarAcquire we look back in
 809   // the ideal graph for an ordered load and only emit the barrier if
 810   // we don't find one.
 811 
 812 bool preceded_by_ordered_load(const Node *barrier) {
 813   Node *x = barrier->lookup(TypeFunc::Parms);
 814 


 842 
 843 int MachCallStaticJavaNode::ret_addr_offset()
 844 {
 845   // call should be a simple bl
 846   // unless this is a method handle invoke in which case it is
 847   // mov(rfp, sp), bl, mov(sp, rfp)
 848   int off = 4;
 849   if (_method_handle_invoke) {
 850     off += 4;
 851   }
 852   return off;
 853 }
 854 
 855 int MachCallDynamicJavaNode::ret_addr_offset()
 856 {
 857   return 16; // movz, movk, movk, bl
 858 }
 859 
 860 int MachCallRuntimeNode::ret_addr_offset() {
 861   // for generated stubs the call will be
 862   //   bl(addr)
 863   // for real runtime callouts it will be six instructions
 864   // see aarch64_enc_java_to_runtime
 865   //   adr(rscratch2, retaddr)
 866   //   lea(rscratch1, RuntimeAddress(addr)
 867   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 868   //   blrt rscratch1
 869   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 870   if (cb) {
 871     return NativeInstruction::instruction_size;
 872   } else {
 873     return 6 * NativeInstruction::instruction_size;
 874   }
 875 }
 876 
 877 // Indicate if the safepoint node needs the polling page as an input
 878 
 879 // the shared code plants the oop data at the start of the generated
 880 // code for the safepoint node and that needs ot be at the load
 881 // instruction itself. so we cannot plant a mov of the safepoint poll
 882 // address followed by a load. setting this to true means the mov is
 883 // scheduled as a prior instruction. that's better for scheduling
 884 // anyway.
 885 
 886 bool SafePointNode::needs_polling_address_input()
 887 {
 888   return true;
 889 }
 890 
 891 //=============================================================================


1451 {
1452   st->print_cr("# MachUEPNode");
1453   if (UseCompressedClassPointers) {
1454     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1455     if (Universe::narrow_klass_shift() != 0) {
1456       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1457     }
1458   } else {
1459    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1460   }
1461   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1462   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1463 }
1464 #endif
1465 
1466 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1467 {
1468   // This is the unverified entry point.
1469   MacroAssembler _masm(&cbuf);
1470 
1471   // no need to worry about 4-byte of br alignment on AArch64
1472   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1473   Label skip;
1474   // TODO
1475   // can we avoid this skip and still use a reloc?
1476   __ br(Assembler::EQ, skip);
1477   __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1478   __ bind(skip);
1479 }
1480 
1481 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1482 {
1483   return MachNode::size(ra_);
1484 }
1485 
1486 // REQUIRED EMIT CODE
1487 
1488 //=============================================================================
1489 
1490 // Emit exception handler code.
1491 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1492 {
1493   // mov rscratch1 #exception_blob_entry_point
1494   // br rscratch1
1495   // Note that the code buffer's insts_mark is always relative to insts.
1496   // That's why we must use the macroassembler to generate a handler.
1497   MacroAssembler _masm(&cbuf);
1498   address base =
1499   __ start_a_stub(size_exception_handler());
1500   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1501   int offset = __ offset();
1502   __ b(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1503   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1504   __ end_a_stub();
1505   return offset;
1506 }
1507 
1508 // Emit deopt handler code.
1509 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
1510 {
1511   // Note that the code buffer's insts_mark is always relative to insts.
1512   // That's why we must use the macroassembler to generate a handler.
1513   MacroAssembler _masm(&cbuf);
1514   address base =
1515   __ start_a_stub(size_deopt_handler());
1516   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1517   int offset = __ offset();
1518 
1519   __ adr(lr, __ pc());
1520   // should we load this into rscratch1 and use a br?
1521   __ b(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1522 
1523   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1524   __ end_a_stub();
1525   return offset;
1526 }
1527 
1528 // REQUIRED MATCHER CODE
1529 
1530 //=============================================================================
1531 
1532 const bool Matcher::match_rule_supported(int opcode) {
1533 
1534   // TODO 
1535   // identify extra cases that we might want to provide match rules for
1536   // e.g. Op_StrEquals and other intrinsics
1537   if (!has_match_rule(opcode)) {
1538     return false;
1539   }
1540 
1541   return true;  // Per default match rules are supported.


2729   %{
2730      Register sub_reg = as_Register($sub$$reg);
2731      Register super_reg = as_Register($super$$reg);
2732      Register temp_reg = as_Register($temp$$reg);
2733      Register result_reg = as_Register($result$$reg);
2734 
2735      Label miss;
2736      MacroAssembler _masm(&cbuf);
2737      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
2738                                      NULL, &miss,
2739                                      /*set_cond_codes:*/ true);
2740      if ($primary) {
2741        __ mov(result_reg, zr);
2742      }
2743      __ bind(miss);
2744   %}
2745 
2746   enc_class aarch64_enc_java_static_call(method meth) %{
2747     MacroAssembler _masm(&cbuf);
2748 
2749     cbuf.set_insts_mark();
2750     address addr = (address)$meth$$method;
2751     if (!_method) {
2752       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
2753       __ bl(Address(addr, relocInfo::runtime_call_type));
2754     } else if (_optimized_virtual) {
2755       __ bl(Address(addr, relocInfo::opt_virtual_call_type));
2756     } else {
2757       __ bl(Address(addr, relocInfo::static_call_type));
2758     }
2759 
2760     if (_method) {
2761       // Emit stub for static call
2762       CompiledStaticCall::emit_to_interp_stub(cbuf);
2763     }
2764   %}
2765 
2766   enc_class aarch64_enc_java_handle_call(method meth) %{
2767     MacroAssembler _masm(&cbuf);
2768     relocInfo::relocType reloc;
2769 
2770     // RFP is preserved across all calls, even compiled calls.
2771     // Use it to preserve SP.
2772     __ mov(rfp, sp);
2773 
2774     cbuf.set_insts_mark();
2775     address addr = (address)$meth$$method;
2776     if (!_method) {
2777       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
2778       __ bl(Address(addr, relocInfo::runtime_call_type));
2779     } else if (_optimized_virtual) {
2780       __ bl(Address(addr, relocInfo::opt_virtual_call_type));
2781     } else {
2782       __ bl(Address(addr, relocInfo::static_call_type));
2783     }
2784 
2785     if (_method) {
2786       // Emit stub for static call
2787       CompiledStaticCall::emit_to_interp_stub(cbuf);
2788     }
2789 
2790     // now restore sp
2791     __ mov(sp, rfp);
2792   %}
2793 
2794   enc_class aarch64_enc_java_dynamic_call(method meth) %{
2795     MacroAssembler _masm(&cbuf);
2796     __ ic_call((address)$meth$$method);
2797   %}
2798 
2799   enc_class aarch64_enc_call_epilog() %{
2800     MacroAssembler _masm(&cbuf);
2801     if (VerifyStackAtCalls) {
2802       // Check that stack depth is unchanged: find majik cookie on stack
2803       __ call_Unimplemented();
2804     }
2805   %}
2806 
2807   enc_class aarch64_enc_java_to_runtime(method meth) %{
2808     MacroAssembler _masm(&cbuf);
2809 
2810     // some calls to generated routines (arraycopy code) are scheduled
2811     // by C2 as runtime calls. if so we can call them using a br (they
2812     // will be in a reachable segment) otherwise we have to use a blrt
2813     // which loads the absolute address into a register.
2814     address entry = (address)$meth$$method;
2815     CodeBlob *cb = CodeCache::find_blob(entry);
2816     if (cb) {
2817       __ bl(Address(entry));
2818     } else {
2819       int gpcnt;
2820       int fpcnt;
2821       int rtype;
2822       getCallInfo(tf(), gpcnt, fpcnt, rtype);
2823       Label retaddr;
2824       __ adr(rscratch2, retaddr);
2825       __ lea(rscratch1, RuntimeAddress(entry));
2826       // Leave a breadcrumb for JavaThread::pd_last_frame().
2827       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
2828       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
2829       __ bind(retaddr);
2830       __ add(sp, sp, 2 * wordSize);
2831     }
2832   %}
2833 
2834   enc_class aarch64_enc_rethrow() %{
2835     MacroAssembler _masm(&cbuf);
2836     __ b(RuntimeAddress(OptoRuntime::rethrow_stub()));
2837   %}
2838 
2839   enc_class aarch64_enc_ret() %{
2840     MacroAssembler _masm(&cbuf);
2841     __ ret(lr);
2842   %}
2843 
2844   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
2845     MacroAssembler _masm(&cbuf);
2846     Register target_reg = as_Register($jump_target$$reg);
2847     __ br(target_reg);
2848   %}
2849 
2850   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
2851     MacroAssembler _masm(&cbuf);
2852     Register target_reg = as_Register($jump_target$$reg);
2853     // exception oop should be in r0
2854     // ret addr has been popped into lr
2855     // callee expects it in r3
2856     __ mov(r3, lr);




 768  public:
 769   // Size of call trampoline stub.
 770   static uint size_call_trampoline() {
 771     return 0; // no call trampolines on this platform
 772   }
 773   
 774   // number of relocations needed by a call trampoline stub
 775   static uint reloc_call_trampoline() { 
 776     return 0; // no call trampolines on this platform
 777   }
 778 };
 779 
 780 class HandlerImpl {
 781 
 782  public:
 783 
 784   static int emit_exception_handler(CodeBuffer &cbuf);
 785   static int emit_deopt_handler(CodeBuffer& cbuf);
 786 
 787   static uint size_exception_handler() {
 788     return MacroAssembler::far_branch_size();

 789   }
 790 
 791   static uint size_deopt_handler() {
 792     // count one adr and one far branch instruction
 793     return 4 * NativeInstruction::instruction_size;
 794   }
 795 };
 796 
 797   bool preceded_by_ordered_load(const Node *barrier);
 798 
 799 %}
 800 
 801 source %{
 802 
 803   // AArch64 has load acquire and store release instructions which we
 804   // use for ordered memory accesses, e.g. for volatiles.  The ideal
 805   // graph generator also inserts memory barriers around volatile
 806   // accesses, and we don't want to generate both barriers and acq/rel
 807   // instructions.  So, when we emit a MemBarAcquire we look back in
 808   // the ideal graph for an ordered load and only emit the barrier if
 809   // we don't find one.
 810 
 811 bool preceded_by_ordered_load(const Node *barrier) {
 812   Node *x = barrier->lookup(TypeFunc::Parms);
 813 


 841 
 842 int MachCallStaticJavaNode::ret_addr_offset()
 843 {
 844   // call should be a simple bl
 845   // unless this is a method handle invoke in which case it is
 846   // mov(rfp, sp), bl, mov(sp, rfp)
 847   int off = 4;
 848   if (_method_handle_invoke) {
 849     off += 4;
 850   }
 851   return off;
 852 }
 853 
 854 int MachCallDynamicJavaNode::ret_addr_offset()
 855 {
 856   return 16; // movz, movk, movk, bl
 857 }
 858 
 859 int MachCallRuntimeNode::ret_addr_offset() {
 860   // for generated stubs the call will be
 861   //   far_call(addr)
 862   // for real runtime callouts it will be six instructions
 863   // see aarch64_enc_java_to_runtime
 864   //   adr(rscratch2, retaddr)
 865   //   lea(rscratch1, RuntimeAddress(addr)
 866   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
 867   //   blrt rscratch1
 868   CodeBlob *cb = CodeCache::find_blob(_entry_point);
 869   if (cb) {
 870     return MacroAssembler::far_branch_size();
 871   } else {
 872     return 6 * NativeInstruction::instruction_size;
 873   }
 874 }
 875 
 876 // Indicate if the safepoint node needs the polling page as an input
 877 
 878 // the shared code plants the oop data at the start of the generated
 879 // code for the safepoint node and that needs ot be at the load
 880 // instruction itself. so we cannot plant a mov of the safepoint poll
 881 // address followed by a load. setting this to true means the mov is
 882 // scheduled as a prior instruction. that's better for scheduling
 883 // anyway.
 884 
 885 bool SafePointNode::needs_polling_address_input()
 886 {
 887   return true;
 888 }
 889 
 890 //=============================================================================


1450 {
1451   st->print_cr("# MachUEPNode");
1452   if (UseCompressedClassPointers) {
1453     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1454     if (Universe::narrow_klass_shift() != 0) {
1455       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1456     }
1457   } else {
1458    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1459   }
1460   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1461   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1462 }
1463 #endif
1464 
1465 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1466 {
1467   // This is the unverified entry point.
1468   MacroAssembler _masm(&cbuf);
1469 

1470   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1471   Label skip;
1472   // TODO
1473   // can we avoid this skip and still use a reloc?
1474   __ br(Assembler::EQ, skip);
1475   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1476   __ bind(skip);
1477 }
1478 
1479 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1480 {
1481   return MachNode::size(ra_);
1482 }
1483 
1484 // REQUIRED EMIT CODE
1485 
1486 //=============================================================================
1487 
1488 // Emit exception handler code.
1489 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1490 {
1491   // mov rscratch1 #exception_blob_entry_point
1492   // br rscratch1
1493   // Note that the code buffer's insts_mark is always relative to insts.
1494   // That's why we must use the macroassembler to generate a handler.
1495   MacroAssembler _masm(&cbuf);
1496   address base =
1497   __ start_a_stub(size_exception_handler());
1498   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1499   int offset = __ offset();
1500   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1501   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1502   __ end_a_stub();
1503   return offset;
1504 }
1505 
1506 // Emit deopt handler code.
1507 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
1508 {
1509   // Note that the code buffer's insts_mark is always relative to insts.
1510   // That's why we must use the macroassembler to generate a handler.
1511   MacroAssembler _masm(&cbuf);
1512   address base =
1513   __ start_a_stub(size_deopt_handler());
1514   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1515   int offset = __ offset();
1516 
1517   __ adr(lr, __ pc());
1518   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));

1519 
1520   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1521   __ end_a_stub();
1522   return offset;
1523 }
1524 
1525 // REQUIRED MATCHER CODE
1526 
1527 //=============================================================================
1528 
1529 const bool Matcher::match_rule_supported(int opcode) {
1530 
1531   // TODO 
1532   // identify extra cases that we might want to provide match rules for
1533   // e.g. Op_StrEquals and other intrinsics
1534   if (!has_match_rule(opcode)) {
1535     return false;
1536   }
1537 
1538   return true;  // Per default match rules are supported.


2726   %{
2727      Register sub_reg = as_Register($sub$$reg);
2728      Register super_reg = as_Register($super$$reg);
2729      Register temp_reg = as_Register($temp$$reg);
2730      Register result_reg = as_Register($result$$reg);
2731 
2732      Label miss;
2733      MacroAssembler _masm(&cbuf);
2734      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
2735                                      NULL, &miss,
2736                                      /*set_cond_codes:*/ true);
2737      if ($primary) {
2738        __ mov(result_reg, zr);
2739      }
2740      __ bind(miss);
2741   %}
2742 
2743   enc_class aarch64_enc_java_static_call(method meth) %{
2744     MacroAssembler _masm(&cbuf);
2745 

2746     address addr = (address)$meth$$method;
2747     if (!_method) {
2748       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
2749       __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
2750     } else if (_optimized_virtual) {
2751       __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
2752     } else {
2753       __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
2754     }
2755 
2756     if (_method) {
2757       // Emit stub for static call
2758       CompiledStaticCall::emit_to_interp_stub(cbuf);
2759     }
2760   %}
2761 
2762   enc_class aarch64_enc_java_handle_call(method meth) %{
2763     MacroAssembler _masm(&cbuf);
2764     relocInfo::relocType reloc;
2765 
2766     // RFP is preserved across all calls, even compiled calls.
2767     // Use it to preserve SP.
2768     __ mov(rfp, sp);
2769 
2770     const int start_offset = __ offset();
2771     address addr = (address)$meth$$method;
2772     if (!_method) {
2773       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
2774       __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
2775     } else if (_optimized_virtual) {
2776       __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
2777     } else {
2778       __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
2779     }
2780 
2781     if (_method) {
2782       // Emit stub for static call
2783       CompiledStaticCall::emit_to_interp_stub(cbuf);
2784     }
2785 
2786     // now restore sp
2787     __ mov(sp, rfp);
2788   %}
2789 
2790   enc_class aarch64_enc_java_dynamic_call(method meth) %{
2791     MacroAssembler _masm(&cbuf);
2792     __ ic_call((address)$meth$$method);
2793   %}
2794 
2795   enc_class aarch64_enc_call_epilog() %{
2796     MacroAssembler _masm(&cbuf);
2797     if (VerifyStackAtCalls) {
2798       // Check that stack depth is unchanged: find majik cookie on stack
2799       __ call_Unimplemented();
2800     }
2801   %}
2802 
2803   enc_class aarch64_enc_java_to_runtime(method meth) %{
2804     MacroAssembler _masm(&cbuf);
2805 
2806     // some calls to generated routines (arraycopy code) are scheduled
2807     // by C2 as runtime calls. if so we can call them using a br (they
2808     // will be in a reachable segment) otherwise we have to use a blrt
2809     // which loads the absolute address into a register.
2810     address entry = (address)$meth$$method;
2811     CodeBlob *cb = CodeCache::find_blob(entry);
2812     if (cb) {
2813       __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
2814     } else {
2815       int gpcnt;
2816       int fpcnt;
2817       int rtype;
2818       getCallInfo(tf(), gpcnt, fpcnt, rtype);
2819       Label retaddr;
2820       __ adr(rscratch2, retaddr);
2821       __ lea(rscratch1, RuntimeAddress(entry));
2822       // Leave a breadcrumb for JavaThread::pd_last_frame().
2823       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
2824       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
2825       __ bind(retaddr);
2826       __ add(sp, sp, 2 * wordSize);
2827     }
2828   %}
2829 
2830   enc_class aarch64_enc_rethrow() %{
2831     MacroAssembler _masm(&cbuf);
2832     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
2833   %}
2834 
2835   enc_class aarch64_enc_ret() %{
2836     MacroAssembler _masm(&cbuf);
2837     __ ret(lr);
2838   %}
2839 
2840   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
2841     MacroAssembler _masm(&cbuf);
2842     Register target_reg = as_Register($jump_target$$reg);
2843     __ br(target_reg);
2844   %}
2845 
2846   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
2847     MacroAssembler _masm(&cbuf);
2848     Register target_reg = as_Register($jump_target$$reg);
2849     // exception oop should be in r0
2850     // ret addr has been popped into lr
2851     // callee expects it in r3
2852     __ mov(r3, lr);