153
154 address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
155 address entry = __ pc();
156
157 #ifdef COMPILER2
158 // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
159 if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) {
160 for (int i = 1; i < 8; i++) {
161 __ ffree(i);
162 }
163 } else if (UseSSE < 2) {
164 __ empty_FPU_stack();
165 }
166 #endif
167 if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) {
168 __ MacroAssembler::verify_FPU(1, "generate_return_entry_for compiled");
169 } else {
170 __ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled");
171 }
172
173 // In SSE mode, interpreter returns FP results in xmm0 but they need
174 // to end up back on the FPU so it can operate on them.
175 if (state == ftos && UseSSE >= 1) {
176 __ subptr(rsp, wordSize);
177 __ movflt(Address(rsp, 0), xmm0);
178 __ fld_s(Address(rsp, 0));
179 __ addptr(rsp, wordSize);
180 } else if (state == dtos && UseSSE >= 2) {
181 __ subptr(rsp, 2*wordSize);
182 __ movdbl(Address(rsp, 0), xmm0);
183 __ fld_d(Address(rsp, 0));
184 __ addptr(rsp, 2*wordSize);
185 }
186
187 __ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_return_entry_for in interpreter");
188
189 // Restore stack bottom in case i2c adjusted stack
190 __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize));
191 // and NULL it as marker that rsp is now tos until next java call
192 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
193
194 __ restore_bcp();
195 __ restore_locals();
196
197 if (state == atos) {
198 Register mdp = rbx;
199 Register tmp = rcx;
200 __ profile_return_type(mdp, rax, tmp);
201 }
202
203 const Register cache = rbx;
204 const Register index = rcx;
205 __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
206
207 const Register flags = cache;
208 __ movl(flags, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
209 __ andl(flags, ConstantPoolCacheEntry::parameter_size_mask);
210 __ lea(rsp, Address(rsp, flags, Interpreter::stackElementScale()));
211 __ dispatch_next(state, step);
212
213 return entry;
214 }
215
216
217 address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) {
218 address entry = __ pc();
219
220 // In SSE mode, FP results are in xmm0
221 if (state == ftos && UseSSE > 0) {
222 __ subptr(rsp, wordSize);
223 __ movflt(Address(rsp, 0), xmm0);
224 __ fld_s(Address(rsp, 0));
225 __ addptr(rsp, wordSize);
226 } else if (state == dtos && UseSSE >= 2) {
227 __ subptr(rsp, 2*wordSize);
228 __ movdbl(Address(rsp, 0), xmm0);
229 __ fld_d(Address(rsp, 0));
230 __ addptr(rsp, 2*wordSize);
231 }
232
233 __ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_deopt_entry_for in interpreter");
234
235 // The stack is not extended by deopt but we must NULL last_sp as this
236 // entry is like a "return".
237 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
238 __ restore_bcp();
239 __ restore_locals();
240 // handle exceptions
241 { Label L;
242 const Register thread = rcx;
243 __ get_thread(thread);
244 __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
245 __ jcc(Assembler::zero, L);
246 __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
247 __ should_not_reach_here();
248 __ bind(L);
249 }
250 __ dispatch_next(state, step);
251 return entry;
252 }
253
254
718 __ bind(slow_path);
719 (void) generate_normal_entry(false);
720
721 return entry;
722 }
723 #endif // INCLUDE_ALL_GCS
724
725 // If G1 is not enabled then attempt to go through the accessor entry point
726 // Reference.get is an accessor
727 return generate_jump_to_normal_entry();
728 }
729
730 /**
731 * Method entry for static native methods:
732 * int java.util.zip.CRC32.update(int crc, int b)
733 */
734 address InterpreterGenerator::generate_CRC32_update_entry() {
735 if (UseCRC32Intrinsics) {
736 address entry = __ pc();
737
738 // rbx,: Method*
739 // rsi: senderSP must preserved for slow path, set SP to it on fast path
740 // rdx: scratch
741 // rdi: scratch
742
743 Label slow_path;
744 // If we need a safepoint check, generate full interpreter entry.
745 ExternalAddress state(SafepointSynchronize::address_of_state());
746 __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
747 SafepointSynchronize::_not_synchronized);
748 __ jcc(Assembler::notEqual, slow_path);
749
750 // We don't generate local frame and don't align stack because
751 // we call stub code and there is no safepoint on this path.
752
753 // Load parameters
754 const Register crc = rax; // crc
755 const Register val = rdx; // source java byte value
756 const Register tbl = rdi; // scratch
757
758 // Arguments are reversed on java expression stack
824 }
825
826 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
827 // result in rax
828
829 // _areturn
830 __ pop(rdi); // get return address
831 __ mov(rsp, rsi); // set sp to sender sp
832 __ jmp(rdi);
833
834 // generate a vanilla native entry as the slow path
835 __ bind(slow_path);
836
837 (void) generate_native_entry(false);
838
839 return entry;
840 }
841 return generate_native_entry(false);
842 }
843
844 //
845 // Interpreter stub for calling a native method. (asm interpreter)
846 // This sets up a somewhat different looking stack for calling the native method
847 // than the typical interpreter frame setup.
848 //
849
850 address InterpreterGenerator::generate_native_entry(bool synchronized) {
851 // determine code generation flags
852 bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
853
854 // rbx,: Method*
855 // rsi: sender sp
856 // rsi: previous interpreter state (C++ interpreter) must preserve
857 address entry_point = __ pc();
858
859 const Address constMethod (rbx, Method::const_offset());
860 const Address access_flags (rbx, Method::access_flags_offset());
861 const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset());
862
863 // get parameter size (always needed)
1073 // (if result handler is the T_FLOAT or T_DOUBLE handler, result must be in ST0 -
1074 // the check is necessary to avoid potential Intel FPU overflow problems by saving/restoring 'empty' FPU registers)
1075 // It is safe to do this push because state is _thread_in_native and return address will be found
1076 // via _last_native_pc and not via _last_jave_sp
1077
1078 // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
1079 // If the order changes or anything else is added to the stack the code in
1080 // interpreter_frame_result will have to be changed.
1081
1082 { Label L;
1083 Label push_double;
1084 ExternalAddress float_handler(AbstractInterpreter::result_handler(T_FLOAT));
1085 ExternalAddress double_handler(AbstractInterpreter::result_handler(T_DOUBLE));
1086 __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize),
1087 float_handler.addr());
1088 __ jcc(Assembler::equal, push_double);
1089 __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize),
1090 double_handler.addr());
1091 __ jcc(Assembler::notEqual, L);
1092 __ bind(push_double);
1093 __ push(dtos);
1094 __ bind(L);
1095 }
1096 __ push(ltos);
1097
1098 // change thread state
1099 __ get_thread(thread);
1100 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1101 if(os::is_MP()) {
1102 if (UseMembar) {
1103 // Force this write out before the read below
1104 __ membar(Assembler::Membar_mask_bits(
1105 Assembler::LoadLoad | Assembler::LoadStore |
1106 Assembler::StoreLoad | Assembler::StoreStore));
1107 } else {
1108 // Write serialization page so VM thread can do a pseudo remote membar.
1109 // We use the current thread pointer to calculate a thread specific
1110 // offset to write to within the page. This minimizes bus traffic
1111 // due to cache line collision.
1112 __ serialize_memory(thread, rcx);
1113 }
|
153
154 address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
155 address entry = __ pc();
156
157 #ifdef COMPILER2
158 // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
159 if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) {
160 for (int i = 1; i < 8; i++) {
161 __ ffree(i);
162 }
163 } else if (UseSSE < 2) {
164 __ empty_FPU_stack();
165 }
166 #endif
167 if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) {
168 __ MacroAssembler::verify_FPU(1, "generate_return_entry_for compiled");
169 } else {
170 __ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled");
171 }
172
173 if (state == ftos) {
174 __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_return_entry_for in interpreter");
175 } else if (state == dtos) {
176 __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_return_entry_for in interpreter");
177 }
178
179 // Restore stack bottom in case i2c adjusted stack
180 __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize));
181 // and NULL it as marker that rsp is now tos until next java call
182 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
183
184 __ restore_bcp();
185 __ restore_locals();
186
187 if (state == atos) {
188 Register mdp = rbx;
189 Register tmp = rcx;
190 __ profile_return_type(mdp, rax, tmp);
191 }
192
193 const Register cache = rbx;
194 const Register index = rcx;
195 __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
196
197 const Register flags = cache;
198 __ movl(flags, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
199 __ andl(flags, ConstantPoolCacheEntry::parameter_size_mask);
200 __ lea(rsp, Address(rsp, flags, Interpreter::stackElementScale()));
201 __ dispatch_next(state, step);
202
203 return entry;
204 }
205
206
207 address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) {
208 address entry = __ pc();
209
210 if (state == ftos) {
211 __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_deopt_entry_for in interpreter");
212 } else if (state == dtos) {
213 __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_deopt_entry_for in interpreter");
214 }
215
216 // The stack is not extended by deopt but we must NULL last_sp as this
217 // entry is like a "return".
218 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
219 __ restore_bcp();
220 __ restore_locals();
221 // handle exceptions
222 { Label L;
223 const Register thread = rcx;
224 __ get_thread(thread);
225 __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
226 __ jcc(Assembler::zero, L);
227 __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
228 __ should_not_reach_here();
229 __ bind(L);
230 }
231 __ dispatch_next(state, step);
232 return entry;
233 }
234
235
699 __ bind(slow_path);
700 (void) generate_normal_entry(false);
701
702 return entry;
703 }
704 #endif // INCLUDE_ALL_GCS
705
706 // If G1 is not enabled then attempt to go through the accessor entry point
707 // Reference.get is an accessor
708 return generate_jump_to_normal_entry();
709 }
710
711 /**
712 * Method entry for static native methods:
713 * int java.util.zip.CRC32.update(int crc, int b)
714 */
715 address InterpreterGenerator::generate_CRC32_update_entry() {
716 if (UseCRC32Intrinsics) {
717 address entry = __ pc();
718
719 // rbx: Method*
720 // rsi: senderSP must preserved for slow path, set SP to it on fast path
721 // rdx: scratch
722 // rdi: scratch
723
724 Label slow_path;
725 // If we need a safepoint check, generate full interpreter entry.
726 ExternalAddress state(SafepointSynchronize::address_of_state());
727 __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
728 SafepointSynchronize::_not_synchronized);
729 __ jcc(Assembler::notEqual, slow_path);
730
731 // We don't generate local frame and don't align stack because
732 // we call stub code and there is no safepoint on this path.
733
734 // Load parameters
735 const Register crc = rax; // crc
736 const Register val = rdx; // source java byte value
737 const Register tbl = rdi; // scratch
738
739 // Arguments are reversed on java expression stack
805 }
806
807 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
808 // result in rax
809
810 // _areturn
811 __ pop(rdi); // get return address
812 __ mov(rsp, rsi); // set sp to sender sp
813 __ jmp(rdi);
814
815 // generate a vanilla native entry as the slow path
816 __ bind(slow_path);
817
818 (void) generate_native_entry(false);
819
820 return entry;
821 }
822 return generate_native_entry(false);
823 }
824
825 /**
826 * Method entry for static native method:
827 * java.lang.Float.intBitsToFloat(int bits)
828 */
829 address InterpreterGenerator::generate_Float_intBitsToFloat_entry() {
830 address entry;
831
832 if (UseSSE >= 1) {
833 entry = __ pc();
834
835 // rsi: the sender's SP
836
837 // Skip safepoint check (compiler intrinsic versions of this method
838 // do not perform safepoint checks either).
839
840 // Load 'bits' into xmm0 (interpreter returns results in xmm0)
841 __ movflt(xmm0, Address(rsp, wordSize));
842
843 // Return
844 __ pop(rdi); // get return address
845 __ mov(rsp, rsi); // set rsp to the sender's SP
846 __ jmp(rdi);
847 } else {
848 entry = generate_native_entry(false);
849 }
850
851 return entry;
852 }
853
854 /**
855 * Method entry for static native method:
856 * java.lang.Float.floatToRawIntBits(float value)
857 */
858 address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() {
859 address entry;
860
861 if (UseSSE >= 1) {
862 entry = __ pc();
863
864 // rsi: the sender's SP
865
866 // Skip safepoint check (compiler intrinsic versions of this method
867 // do not perform safepoint checks either).
868
869 // Load the parameter (a floating-point value) into rax.
870 __ movl(rax, Address(rsp, wordSize));
871
872 // Return
873 __ pop(rdi); // get return address
874 __ mov(rsp, rsi); // set rsp to the sender's SP
875 __ jmp(rdi);
876 } else {
877 entry = generate_native_entry(false);
878 }
879
880 return entry;
881 }
882
883
884 /**
885 * Method entry for static native method:
886 * java.lang.Double.longBitsToDouble(long bits)
887 */
888 address InterpreterGenerator::generate_Double_longBitsToDouble_entry() {
889 address entry;
890
891 if (UseSSE >= 2) {
892 entry = __ pc();
893
894 // rsi: the sender's SP
895
896 // Skip safepoint check (compiler intrinsic versions of this method
897 // do not perform safepoint checks either).
898
899 // Load 'bits' into xmm0 (interpreter returns results in xmm0)
900 __ movdbl(xmm0, Address(rsp, wordSize));
901
902 // Return
903 __ pop(rdi); // get return address
904 __ mov(rsp, rsi); // set rsp to the sender's SP
905 __ jmp(rdi);
906 } else {
907 entry = generate_native_entry(false);
908 }
909
910 return entry;
911 }
912
913 /**
914 * Method entry for static native method:
915 * java.lang.Double.doubleToRawLongBits(double value)
916 */
917 address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() {
918 address entry;
919
920 if (UseSSE >= 2) {
921 entry = __ pc();
922
923 // rsi: the sender's SP
924
925 // Skip safepoint check (compiler intrinsic versions of this method
926 // do not perform safepoint checks either).
927
928 // Load the parameter (a floating-point value) into rax.
929 __ movl(rdx, Address(rsp, 2*wordSize));
930 __ movl(rax, Address(rsp, wordSize));
931
932 // Return
933 __ pop(rdi); // get return address
934 __ mov(rsp, rsi); // set rsp to the sender's SP
935 __ jmp(rdi);
936 } else {
937 entry = generate_native_entry(false);
938 }
939
940 return entry;
941 }
942
943 //
944 // Interpreter stub for calling a native method. (asm interpreter)
945 // This sets up a somewhat different looking stack for calling the native method
946 // than the typical interpreter frame setup.
947 //
948
949 address InterpreterGenerator::generate_native_entry(bool synchronized) {
950 // determine code generation flags
951 bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
952
953 // rbx,: Method*
954 // rsi: sender sp
955 // rsi: previous interpreter state (C++ interpreter) must preserve
956 address entry_point = __ pc();
957
958 const Address constMethod (rbx, Method::const_offset());
959 const Address access_flags (rbx, Method::access_flags_offset());
960 const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset());
961
962 // get parameter size (always needed)
1172 // (if result handler is the T_FLOAT or T_DOUBLE handler, result must be in ST0 -
1173 // the check is necessary to avoid potential Intel FPU overflow problems by saving/restoring 'empty' FPU registers)
1174 // It is safe to do this push because state is _thread_in_native and return address will be found
1175 // via _last_native_pc and not via _last_jave_sp
1176
1177 // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
1178 // If the order changes or anything else is added to the stack the code in
1179 // interpreter_frame_result will have to be changed.
1180
1181 { Label L;
1182 Label push_double;
1183 ExternalAddress float_handler(AbstractInterpreter::result_handler(T_FLOAT));
1184 ExternalAddress double_handler(AbstractInterpreter::result_handler(T_DOUBLE));
1185 __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize),
1186 float_handler.addr());
1187 __ jcc(Assembler::equal, push_double);
1188 __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize),
1189 double_handler.addr());
1190 __ jcc(Assembler::notEqual, L);
1191 __ bind(push_double);
1192 __ push_d(); // FP values are returned using the FPU, so push FPU contents (even if UseSSE > 0).
1193 __ bind(L);
1194 }
1195 __ push(ltos);
1196
1197 // change thread state
1198 __ get_thread(thread);
1199 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1200 if(os::is_MP()) {
1201 if (UseMembar) {
1202 // Force this write out before the read below
1203 __ membar(Assembler::Membar_mask_bits(
1204 Assembler::LoadLoad | Assembler::LoadStore |
1205 Assembler::StoreLoad | Assembler::StoreStore));
1206 } else {
1207 // Write serialization page so VM thread can do a pseudo remote membar.
1208 // We use the current thread pointer to calculate a thread specific
1209 // offset to write to within the page. This minimizes bus traffic
1210 // due to cache line collision.
1211 __ serialize_memory(thread, rcx);
1212 }
|