98 // During deoptimization only the result registers need to be restored,
99 // all the other values have already been extracted.
100 static void restore_result_registers(MacroAssembler* masm);
101
102 // Capture info about frame layout
103 enum layout {
104 fpu_state_off = 0,
105 fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
106 // The frame sender code expects that rfp will be in
107 // the "natural" place and will override any oopMap
108 // setting for it. We must therefore force the layout
109 // so that it agrees with the frame sender code.
110 r0_off = fpu_state_off + FPUStateSizeInWords,
111 rfp_off = r0_off + (RegisterImpl::number_of_registers - 2) * RegisterImpl::max_slots_per_register,
112 return_off = rfp_off + RegisterImpl::max_slots_per_register, // slot for return address
113 reg_save_size = return_off + RegisterImpl::max_slots_per_register};
114
115 };
116
117 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
118 #if COMPILER2_OR_JVMCI
119 if (save_vectors) {
120 // Save upper half of vector registers
121 int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register /
122 VMRegImpl::slots_per_word;
123 additional_frame_words += vect_words;
124 }
125 #else
126 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
127 #endif
128
129 int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
130 reg_save_size * BytesPerInt, 16);
131 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
132 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
133 // The caller will allocate additional_frame_words
134 int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
135 // CodeBlob frame size is in words.
136 int frame_size_in_words = frame_size_in_bytes / wordSize;
137 *total_frame_words = frame_size_in_words;
138
139 // Save Integer and Float registers.
140 __ enter();
141 __ push_CPU_state(save_vectors);
142
143 // Set an oopmap for the call site. This oopmap will map all
144 // oop-registers and debug-info registers as callee-saved. This
145 // will allow deoptimization at this safepoint to find all possible
146 // debug-info recordings, as well as let GC find all oops.
147
148 OopMapSet *oop_maps = new OopMapSet();
149 OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
150
151 for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
152 Register r = as_Register(i);
153 if (r <= rfp && r != rscratch1 && r != rscratch2) {
154 // SP offsets are in 4-byte words.
155 // Register slots are 8 bytes wide, 32 floating-point registers.
156 int sp_offset = RegisterImpl::max_slots_per_register * i +
157 FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers;
158 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
159 r->as_VMReg());
160 }
161 }
162
163 for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
164 FloatRegister r = as_FloatRegister(i);
165 int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) :
166 (FloatRegisterImpl::save_slots_per_register * i);
167 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
168 r->as_VMReg());
169 }
170
171 return oop_map;
172 }
173
174 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
175 #if !COMPILER2_OR_JVMCI
176 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
177 #endif
178 __ pop_CPU_state(restore_vectors);
179 __ leave();
180
181 }
182
183 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
184
185 // Just restore result register. Only used by deoptimization. By
186 // now any callee save register that needs to be restored to a c2
187 // caller of the deoptee has been extracted into the vframeArray
188 // and will be stuffed into the c2i adapter we create for later
189 // restoration so only result registers need to be restored here.
190
191 // Restore fp result register
192 __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
193 // Restore integer result register
194 __ ldr(r0, Address(sp, r0_offset_in_bytes()));
195
196 // Pop all of the register save are off the stack
197 __ add(sp, sp, align_up(return_offset_in_bytes(), 16));
198 }
1825 break; // can't de-handlize until after safepoint check
1826 case T_VOID: break;
1827 case T_LONG: break;
1828 default : ShouldNotReachHere();
1829 }
1830
1831 // Switch thread to "native transition" state before reading the synchronization state.
1832 // This additional state is necessary because reading and testing the synchronization
1833 // state is not atomic w.r.t. GC, as this scenario demonstrates:
1834 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1835 // VM thread changes sync state to synchronizing and suspends threads for GC.
1836 // Thread A is resumed to finish this native method, but doesn't block here since it
1837 // didn't see any synchronization is progress, and escapes.
1838 __ mov(rscratch1, _thread_in_native_trans);
1839
1840 __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
1841
1842 // Force this write out before the read below
1843 __ dmb(Assembler::ISH);
1844
1845 // check for safepoint operation in progress and/or pending suspend requests
1846 Label safepoint_in_progress, safepoint_in_progress_done;
1847 {
1848 __ safepoint_poll_acquire(safepoint_in_progress);
1849 __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
1850 __ cbnzw(rscratch1, safepoint_in_progress);
1851 __ bind(safepoint_in_progress_done);
1852 }
1853
1854 // change thread state
1855 Label after_transition;
1856 __ mov(rscratch1, _thread_in_Java);
1857 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1858 __ stlrw(rscratch1, rscratch2);
1859 __ bind(after_transition);
1860
1861 Label reguard;
1862 Label reguard_done;
1863 __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
1864 __ cmpw(rscratch1, JavaThread::stack_guard_yellow_reserved_disabled);
2756
2757 // Do the call
2758 __ mov(c_rarg0, rthread);
2759 __ lea(rscratch1, RuntimeAddress(call_ptr));
2760 __ blr(rscratch1);
2761 __ bind(retaddr);
2762
2763 // Set an oopmap for the call site. This oopmap will map all
2764 // oop-registers and debug-info registers as callee-saved. This
2765 // will allow deoptimization at this safepoint to find all possible
2766 // debug-info recordings, as well as let GC find all oops.
2767
2768 oop_maps->add_gc_map( __ pc() - start, map);
2769
2770 Label noException;
2771
2772 __ reset_last_Java_frame(false);
2773
2774 __ maybe_isb();
2775 __ membar(Assembler::LoadLoad | Assembler::LoadStore);
2776
2777 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2778 __ cbz(rscratch1, noException);
2779
2780 // Exception pending
2781
2782 RegisterSaver::restore_live_registers(masm, save_vectors);
2783
2784 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2785
2786 // No exception case
2787 __ bind(noException);
2788
2789 Label no_adjust, bail;
2790 if (!cause_return) {
2791 // If our stashed return pc was modified by the runtime we avoid touching it
2792 __ ldr(rscratch1, Address(rfp, wordSize));
2793 __ cmp(r20, rscratch1);
2794 __ br(Assembler::NE, no_adjust);
2795
|
98 // During deoptimization only the result registers need to be restored,
99 // all the other values have already been extracted.
100 static void restore_result_registers(MacroAssembler* masm);
101
102 // Capture info about frame layout
103 enum layout {
104 fpu_state_off = 0,
105 fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
106 // The frame sender code expects that rfp will be in
107 // the "natural" place and will override any oopMap
108 // setting for it. We must therefore force the layout
109 // so that it agrees with the frame sender code.
110 r0_off = fpu_state_off + FPUStateSizeInWords,
111 rfp_off = r0_off + (RegisterImpl::number_of_registers - 2) * RegisterImpl::max_slots_per_register,
112 return_off = rfp_off + RegisterImpl::max_slots_per_register, // slot for return address
113 reg_save_size = return_off + RegisterImpl::max_slots_per_register};
114
115 };
116
117 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
118 bool use_sve = false;
119 int sve_vector_size_in_bytes = 0;
120 int sve_vector_size_in_slots = 0;
121
122 #ifdef COMPILER2
123 use_sve = Matcher::supports_scalable_vector();
124 sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
125 sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
126 #endif
127
128 #if COMPILER2_OR_JVMCI
129 if (save_vectors) {
130 int vect_words = 0;
131 int extra_save_slots_per_register = 0;
132 // Save upper half of vector registers
133 if (use_sve) {
134 extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
135 } else {
136 extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
137 }
138 vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
139 VMRegImpl::slots_per_word;
140 additional_frame_words += vect_words;
141 }
142 #else
143 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
144 #endif
145
146 int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
147 reg_save_size * BytesPerInt, 16);
148 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
149 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
150 // The caller will allocate additional_frame_words
151 int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
152 // CodeBlob frame size is in words.
153 int frame_size_in_words = frame_size_in_bytes / wordSize;
154 *total_frame_words = frame_size_in_words;
155
156 // Save Integer and Float registers.
157 __ enter();
158 __ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
159
160 // Set an oopmap for the call site. This oopmap will map all
161 // oop-registers and debug-info registers as callee-saved. This
162 // will allow deoptimization at this safepoint to find all possible
163 // debug-info recordings, as well as let GC find all oops.
164
165 OopMapSet *oop_maps = new OopMapSet();
166 OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
167
168 for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
169 Register r = as_Register(i);
170 if (r <= rfp && r != rscratch1 && r != rscratch2) {
171 // SP offsets are in 4-byte words.
172 // Register slots are 8 bytes wide, 32 floating-point registers.
173 int sp_offset = RegisterImpl::max_slots_per_register * i +
174 FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers;
175 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
176 r->as_VMReg());
177 }
178 }
179
180 for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
181 FloatRegister r = as_FloatRegister(i);
182 int sp_offset = 0;
183 if (save_vectors) {
184 sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
185 (FloatRegisterImpl::slots_per_neon_register * i);
186 } else {
187 sp_offset = FloatRegisterImpl::save_slots_per_register * i;
188 }
189 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
190 r->as_VMReg());
191 }
192
193 return oop_map;
194 }
195
196 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
197 #if !COMPILER2_OR_JVMCI
198 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
199 __ pop_CPU_state(restore_vectors);
200 #else
201 __ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
202 Matcher::scalable_vector_reg_size(T_BYTE));
203 #endif
204 __ leave();
205
206 }
207
208 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
209
210 // Just restore result register. Only used by deoptimization. By
211 // now any callee save register that needs to be restored to a c2
212 // caller of the deoptee has been extracted into the vframeArray
213 // and will be stuffed into the c2i adapter we create for later
214 // restoration so only result registers need to be restored here.
215
216 // Restore fp result register
217 __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
218 // Restore integer result register
219 __ ldr(r0, Address(sp, r0_offset_in_bytes()));
220
221 // Pop all of the register save are off the stack
222 __ add(sp, sp, align_up(return_offset_in_bytes(), 16));
223 }
1850 break; // can't de-handlize until after safepoint check
1851 case T_VOID: break;
1852 case T_LONG: break;
1853 default : ShouldNotReachHere();
1854 }
1855
1856 // Switch thread to "native transition" state before reading the synchronization state.
1857 // This additional state is necessary because reading and testing the synchronization
1858 // state is not atomic w.r.t. GC, as this scenario demonstrates:
1859 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1860 // VM thread changes sync state to synchronizing and suspends threads for GC.
1861 // Thread A is resumed to finish this native method, but doesn't block here since it
1862 // didn't see any synchronization is progress, and escapes.
1863 __ mov(rscratch1, _thread_in_native_trans);
1864
1865 __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
1866
1867 // Force this write out before the read below
1868 __ dmb(Assembler::ISH);
1869
1870 if (UseSVE > 0) {
1871 // Make sure that jni code does not change SVE vector length.
1872 __ verify_sve_vector_length();
1873 }
1874
1875 // check for safepoint operation in progress and/or pending suspend requests
1876 Label safepoint_in_progress, safepoint_in_progress_done;
1877 {
1878 __ safepoint_poll_acquire(safepoint_in_progress);
1879 __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
1880 __ cbnzw(rscratch1, safepoint_in_progress);
1881 __ bind(safepoint_in_progress_done);
1882 }
1883
1884 // change thread state
1885 Label after_transition;
1886 __ mov(rscratch1, _thread_in_Java);
1887 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1888 __ stlrw(rscratch1, rscratch2);
1889 __ bind(after_transition);
1890
1891 Label reguard;
1892 Label reguard_done;
1893 __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
1894 __ cmpw(rscratch1, JavaThread::stack_guard_yellow_reserved_disabled);
2786
2787 // Do the call
2788 __ mov(c_rarg0, rthread);
2789 __ lea(rscratch1, RuntimeAddress(call_ptr));
2790 __ blr(rscratch1);
2791 __ bind(retaddr);
2792
2793 // Set an oopmap for the call site. This oopmap will map all
2794 // oop-registers and debug-info registers as callee-saved. This
2795 // will allow deoptimization at this safepoint to find all possible
2796 // debug-info recordings, as well as let GC find all oops.
2797
2798 oop_maps->add_gc_map( __ pc() - start, map);
2799
2800 Label noException;
2801
2802 __ reset_last_Java_frame(false);
2803
2804 __ maybe_isb();
2805 __ membar(Assembler::LoadLoad | Assembler::LoadStore);
2806
2807 if (UseSVE > 0 && save_vectors) {
2808 // Reinitialize the ptrue predicate register, in case the external runtime
2809 // call clobbers ptrue reg, as we may return to SVE compiled code.
2810 __ reinitialize_ptrue();
2811 }
2812
2813 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2814 __ cbz(rscratch1, noException);
2815
2816 // Exception pending
2817
2818 RegisterSaver::restore_live_registers(masm, save_vectors);
2819
2820 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2821
2822 // No exception case
2823 __ bind(noException);
2824
2825 Label no_adjust, bail;
2826 if (!cause_return) {
2827 // If our stashed return pc was modified by the runtime we avoid touching it
2828 __ ldr(rscratch1, Address(rfp, wordSize));
2829 __ cmp(r20, rscratch1);
2830 __ br(Assembler::NE, no_adjust);
2831
|