< prev index next >

src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp

Print this page
rev 60615 : 8231441: Initial SVE backend support
Reviewed-by: adinn, pli
Contributed-by: joshua.zhu@arm.com, yang.zhang@arm.com, ningsheng.jian@arm.com


  98   // During deoptimization only the result registers need to be restored,
  99   // all the other values have already been extracted.
 100   static void restore_result_registers(MacroAssembler* masm);
 101 
 102     // Capture info about frame layout
 103   enum layout {
 104                 fpu_state_off = 0,
 105                 fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
 106                 // The frame sender code expects that rfp will be in
 107                 // the "natural" place and will override any oopMap
 108                 // setting for it. We must therefore force the layout
 109                 // so that it agrees with the frame sender code.
 110                 r0_off = fpu_state_off + FPUStateSizeInWords,
 111                 rfp_off = r0_off + (RegisterImpl::number_of_registers - 2) * RegisterImpl::max_slots_per_register,
 112                 return_off = rfp_off + RegisterImpl::max_slots_per_register,      // slot for return address
 113                 reg_save_size = return_off + RegisterImpl::max_slots_per_register};
 114 
 115 };
 116 
 117 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {










 118 #if COMPILER2_OR_JVMCI
 119   if (save_vectors) {


 120     // Save upper half of vector registers
 121     int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register /





 122                      VMRegImpl::slots_per_word;
 123     additional_frame_words += vect_words;
 124   }
 125 #else
 126   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 127 #endif
 128 
 129   int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
 130                                      reg_save_size * BytesPerInt, 16);
 131   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 132   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 133   // The caller will allocate additional_frame_words
 134   int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
 135   // CodeBlob frame size is in words.
 136   int frame_size_in_words = frame_size_in_bytes / wordSize;
 137   *total_frame_words = frame_size_in_words;
 138 
 139   // Save Integer and Float registers.
 140   __ enter();
 141   __ push_CPU_state(save_vectors);
 142 
 143   // Set an oopmap for the call site.  This oopmap will map all
 144   // oop-registers and debug-info registers as callee-saved.  This
 145   // will allow deoptimization at this safepoint to find all possible
 146   // debug-info recordings, as well as let GC find all oops.
 147 
 148   OopMapSet *oop_maps = new OopMapSet();
 149   OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
 150 
 151   for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
 152     Register r = as_Register(i);
 153     if (r <= rfp && r != rscratch1 && r != rscratch2) {
 154       // SP offsets are in 4-byte words.
 155       // Register slots are 8 bytes wide, 32 floating-point registers.
 156       int sp_offset = RegisterImpl::max_slots_per_register * i +
 157                       FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers;
 158       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
 159                                 r->as_VMReg());
 160     }
 161   }
 162 
 163   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
 164     FloatRegister r = as_FloatRegister(i);
 165     int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) :
 166                                    (FloatRegisterImpl::save_slots_per_register * i);





 167     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
 168                               r->as_VMReg());
 169   }
 170 
 171   return oop_map;
 172 }
 173 
 174 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 175 #if !COMPILER2_OR_JVMCI
 176   assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
 177 #endif
 178   __ pop_CPU_state(restore_vectors);




 179   __ leave();
 180 
 181 }
 182 
 183 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 184 
 185   // Just restore result register. Only used by deoptimization. By
 186   // now any callee save register that needs to be restored to a c2
 187   // caller of the deoptee has been extracted into the vframeArray
 188   // and will be stuffed into the c2i adapter we create for later
 189   // restoration so only result registers need to be restored here.
 190 
 191   // Restore fp result register
 192   __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
 193   // Restore integer result register
 194   __ ldr(r0, Address(sp, r0_offset_in_bytes()));
 195 
 196   // Pop all of the register save are off the stack
 197   __ add(sp, sp, align_up(return_offset_in_bytes(), 16));
 198 }


1825       break; // can't de-handlize until after safepoint check
1826   case T_VOID: break;
1827   case T_LONG: break;
1828   default       : ShouldNotReachHere();
1829   }
1830 
1831   // Switch thread to "native transition" state before reading the synchronization state.
1832   // This additional state is necessary because reading and testing the synchronization
1833   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1834   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1835   //     VM thread changes sync state to synchronizing and suspends threads for GC.
1836   //     Thread A is resumed to finish this native method, but doesn't block here since it
1837   //     didn't see any synchronization is progress, and escapes.
1838   __ mov(rscratch1, _thread_in_native_trans);
1839 
1840   __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
1841 
1842   // Force this write out before the read below
1843   __ dmb(Assembler::ISH);
1844 





1845   // check for safepoint operation in progress and/or pending suspend requests
1846   Label safepoint_in_progress, safepoint_in_progress_done;
1847   {
1848     __ safepoint_poll_acquire(safepoint_in_progress);
1849     __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
1850     __ cbnzw(rscratch1, safepoint_in_progress);
1851     __ bind(safepoint_in_progress_done);
1852   }
1853 
1854   // change thread state
1855   Label after_transition;
1856   __ mov(rscratch1, _thread_in_Java);
1857   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1858   __ stlrw(rscratch1, rscratch2);
1859   __ bind(after_transition);
1860 
1861   Label reguard;
1862   Label reguard_done;
1863   __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
1864   __ cmpw(rscratch1, JavaThread::stack_guard_yellow_reserved_disabled);


2756 
2757   // Do the call
2758   __ mov(c_rarg0, rthread);
2759   __ lea(rscratch1, RuntimeAddress(call_ptr));
2760   __ blr(rscratch1);
2761   __ bind(retaddr);
2762 
2763   // Set an oopmap for the call site.  This oopmap will map all
2764   // oop-registers and debug-info registers as callee-saved.  This
2765   // will allow deoptimization at this safepoint to find all possible
2766   // debug-info recordings, as well as let GC find all oops.
2767 
2768   oop_maps->add_gc_map( __ pc() - start, map);
2769 
2770   Label noException;
2771 
2772   __ reset_last_Java_frame(false);
2773 
2774   __ maybe_isb();
2775   __ membar(Assembler::LoadLoad | Assembler::LoadStore);






2776 
2777   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2778   __ cbz(rscratch1, noException);
2779 
2780   // Exception pending
2781 
2782   RegisterSaver::restore_live_registers(masm, save_vectors);
2783 
2784   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2785 
2786   // No exception case
2787   __ bind(noException);
2788 
2789   Label no_adjust, bail;
2790   if (!cause_return) {
2791     // If our stashed return pc was modified by the runtime we avoid touching it
2792     __ ldr(rscratch1, Address(rfp, wordSize));
2793     __ cmp(r20, rscratch1);
2794     __ br(Assembler::NE, no_adjust);
2795 




  98   // During deoptimization only the result registers need to be restored,
  99   // all the other values have already been extracted.
 100   static void restore_result_registers(MacroAssembler* masm);
 101 
 102     // Capture info about frame layout
 103   enum layout {
 104                 fpu_state_off = 0,
 105                 fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
 106                 // The frame sender code expects that rfp will be in
 107                 // the "natural" place and will override any oopMap
 108                 // setting for it. We must therefore force the layout
 109                 // so that it agrees with the frame sender code.
 110                 r0_off = fpu_state_off + FPUStateSizeInWords,
 111                 rfp_off = r0_off + (RegisterImpl::number_of_registers - 2) * RegisterImpl::max_slots_per_register,
 112                 return_off = rfp_off + RegisterImpl::max_slots_per_register,      // slot for return address
 113                 reg_save_size = return_off + RegisterImpl::max_slots_per_register};
 114 
 115 };
 116 
 117 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 118   bool use_sve = false;
 119   int sve_vector_size_in_bytes = 0;
 120   int sve_vector_size_in_slots = 0;
 121 
 122 #ifdef COMPILER2
 123   use_sve = Matcher::supports_scalable_vector();
 124   sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
 125   sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
 126 #endif
 127 
 128 #if COMPILER2_OR_JVMCI
 129   if (save_vectors) {
 130     int vect_words = 0;
 131     int extra_save_slots_per_register = 0;
 132     // Save upper half of vector registers
 133     if (use_sve) {
 134       extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
 135     } else {
 136       extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
 137     }
 138     vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
 139                  VMRegImpl::slots_per_word;
 140     additional_frame_words += vect_words;
 141   }
 142 #else
 143   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 144 #endif
 145 
 146   int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
 147                                      reg_save_size * BytesPerInt, 16);
 148   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 149   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 150   // The caller will allocate additional_frame_words
 151   int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
 152   // CodeBlob frame size is in words.
 153   int frame_size_in_words = frame_size_in_bytes / wordSize;
 154   *total_frame_words = frame_size_in_words;
 155 
 156   // Save Integer and Float registers.
 157   __ enter();
 158   __ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
 159 
 160   // Set an oopmap for the call site.  This oopmap will map all
 161   // oop-registers and debug-info registers as callee-saved.  This
 162   // will allow deoptimization at this safepoint to find all possible
 163   // debug-info recordings, as well as let GC find all oops.
 164 
 165   OopMapSet *oop_maps = new OopMapSet();
 166   OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
 167 
 168   for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
 169     Register r = as_Register(i);
 170     if (r <= rfp && r != rscratch1 && r != rscratch2) {
 171       // SP offsets are in 4-byte words.
 172       // Register slots are 8 bytes wide, 32 floating-point registers.
 173       int sp_offset = RegisterImpl::max_slots_per_register * i +
 174                       FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers;
 175       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
 176                                 r->as_VMReg());
 177     }
 178   }
 179 
 180   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
 181     FloatRegister r = as_FloatRegister(i);
 182     int sp_offset = 0;
 183     if (save_vectors) {
 184       sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
 185                             (FloatRegisterImpl::slots_per_neon_register * i);
 186     } else {
 187       sp_offset = FloatRegisterImpl::save_slots_per_register * i;
 188     }
 189     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
 190                               r->as_VMReg());
 191   }
 192 
 193   return oop_map;
 194 }
 195 
 196 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 197 #if !COMPILER2_OR_JVMCI
 198   assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");

 199   __ pop_CPU_state(restore_vectors);
 200 #else
 201   __ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
 202                    Matcher::scalable_vector_reg_size(T_BYTE));
 203 #endif
 204   __ leave();
 205 
 206 }
 207 
 208 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 209 
 210   // Just restore result register. Only used by deoptimization. By
 211   // now any callee save register that needs to be restored to a c2
 212   // caller of the deoptee has been extracted into the vframeArray
 213   // and will be stuffed into the c2i adapter we create for later
 214   // restoration so only result registers need to be restored here.
 215 
 216   // Restore fp result register
 217   __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
 218   // Restore integer result register
 219   __ ldr(r0, Address(sp, r0_offset_in_bytes()));
 220 
 221   // Pop all of the register save are off the stack
 222   __ add(sp, sp, align_up(return_offset_in_bytes(), 16));
 223 }


1850       break; // can't de-handlize until after safepoint check
1851   case T_VOID: break;
1852   case T_LONG: break;
1853   default       : ShouldNotReachHere();
1854   }
1855 
1856   // Switch thread to "native transition" state before reading the synchronization state.
1857   // This additional state is necessary because reading and testing the synchronization
1858   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1859   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1860   //     VM thread changes sync state to synchronizing and suspends threads for GC.
1861   //     Thread A is resumed to finish this native method, but doesn't block here since it
1862   //     didn't see any synchronization is progress, and escapes.
1863   __ mov(rscratch1, _thread_in_native_trans);
1864 
1865   __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
1866 
1867   // Force this write out before the read below
1868   __ dmb(Assembler::ISH);
1869 
1870   if (UseSVE > 0) {
1871     // Make sure that jni code does not change SVE vector length.
1872     __ verify_sve_vector_length();
1873   }
1874 
1875   // check for safepoint operation in progress and/or pending suspend requests
1876   Label safepoint_in_progress, safepoint_in_progress_done;
1877   {
1878     __ safepoint_poll_acquire(safepoint_in_progress);
1879     __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
1880     __ cbnzw(rscratch1, safepoint_in_progress);
1881     __ bind(safepoint_in_progress_done);
1882   }
1883 
1884   // change thread state
1885   Label after_transition;
1886   __ mov(rscratch1, _thread_in_Java);
1887   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1888   __ stlrw(rscratch1, rscratch2);
1889   __ bind(after_transition);
1890 
1891   Label reguard;
1892   Label reguard_done;
1893   __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
1894   __ cmpw(rscratch1, JavaThread::stack_guard_yellow_reserved_disabled);


2786 
2787   // Do the call
2788   __ mov(c_rarg0, rthread);
2789   __ lea(rscratch1, RuntimeAddress(call_ptr));
2790   __ blr(rscratch1);
2791   __ bind(retaddr);
2792 
2793   // Set an oopmap for the call site.  This oopmap will map all
2794   // oop-registers and debug-info registers as callee-saved.  This
2795   // will allow deoptimization at this safepoint to find all possible
2796   // debug-info recordings, as well as let GC find all oops.
2797 
2798   oop_maps->add_gc_map( __ pc() - start, map);
2799 
2800   Label noException;
2801 
2802   __ reset_last_Java_frame(false);
2803 
2804   __ maybe_isb();
2805   __ membar(Assembler::LoadLoad | Assembler::LoadStore);
2806 
2807   if (UseSVE > 0 && save_vectors) {
2808     // Reinitialize the ptrue predicate register, in case the external runtime
2809     // call clobbers ptrue reg, as we may return to SVE compiled code.
2810     __ reinitialize_ptrue();
2811   }
2812 
2813   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2814   __ cbz(rscratch1, noException);
2815 
2816   // Exception pending
2817 
2818   RegisterSaver::restore_live_registers(masm, save_vectors);
2819 
2820   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2821 
2822   // No exception case
2823   __ bind(noException);
2824 
2825   Label no_adjust, bail;
2826   if (!cause_return) {
2827     // If our stashed return pc was modified by the runtime we avoid touching it
2828     __ ldr(rscratch1, Address(rfp, wordSize));
2829     __ cmp(r20, rscratch1);
2830     __ br(Assembler::NE, no_adjust);
2831 


< prev index next >