45
46 #define __ masm->
47
48 class RegisterSaver {
49 public:
50
51 // Special registers:
52 // 32-bit ARM 64-bit ARM
53 // Rthread: R10 R28
54 // LR: R14 R30
55
56 // Rthread is callee saved in the C ABI and never changed by compiled code:
57 // no need to save it.
58
59 // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
60 // The one at LR_offset is a return address that is needed by stack walking.
61 // A c2 method uses LR as a standard register so it may be live when we
62 // branch to the runtime. The slot at R14/R30_offset is for the value of LR
63 // in case it's live in the method we are coming from.
64
65 #ifdef AARCH64
66
67 //
68 // On AArch64 registers save area has the following layout:
69 //
70 // |---------------------|
71 // | return address (LR) |
72 // | FP |
73 // |---------------------|
74 // | V31 |
75 // | ... |
76 // | V0 |
77 // |---------------------|
78 // | padding |
79 // | R30 (LR live value) |
80 // |---------------------|
81 // | R27 |
82 // | ... |
83 // | R0 |
84 // |---------------------| <-- SP
85 //
86
87 enum RegisterLayout {
88 number_of_saved_gprs = 28,
89 number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
90 words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
91
92 R0_offset = 0,
93 R30_offset = R0_offset + number_of_saved_gprs,
94 D0_offset = R30_offset + 2,
95 FP_offset = D0_offset + number_of_saved_fprs * words_per_fpr,
96 LR_offset = FP_offset + 1,
97
98 reg_save_size = LR_offset + 1,
99 };
100
101 static const int Rmethod_offset;
102 static const int Rtemp_offset;
103
104 #else
105
106 enum RegisterLayout {
107 fpu_save_size = FloatRegisterImpl::number_of_registers,
108 #ifndef __SOFTFP__
109 D0_offset = 0,
110 #endif
111 R0_offset = fpu_save_size,
112 R1_offset,
113 R2_offset,
114 R3_offset,
115 R4_offset,
116 R5_offset,
117 R6_offset,
118 #if (FP_REG_NUM != 7)
119 // if not saved as FP
120 R7_offset,
121 #endif
122 R8_offset,
123 R9_offset,
124 #if (FP_REG_NUM != 11)
125 // if not saved as FP
126 R11_offset,
127 #endif
128 R12_offset,
129 R14_offset,
130 FP_offset,
131 LR_offset,
132 reg_save_size,
133
134 Rmethod_offset = R9_offset,
135 Rtemp_offset = R12_offset,
136 };
137
138 // all regs but Rthread (R10), FP (R7 or R11), SP and PC
139 // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
140 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
141
142 #endif // AARCH64
143
144 // When LR may be live in the nmethod from which we are comming
145 // then lr_saved is true, the return address is saved before the
146 // call to save_live_register by the caller and LR contains the
147 // live value.
148
149 static OopMap* save_live_registers(MacroAssembler* masm,
150 int* total_frame_words,
151 bool lr_saved = false);
152 static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
153
154 };
155
156
157 #ifdef AARCH64
158 const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
159 const int RegisterSaver::Rtemp_offset = RegisterSaver::R0_offset + Rtemp->encoding();
160 #endif // AARCH64
161
162
163 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
164 int* total_frame_words,
165 bool lr_saved) {
166 *total_frame_words = reg_save_size;
167
168 OopMapSet *oop_maps = new OopMapSet();
169 OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
170
171 #ifdef AARCH64
172 assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
173
174 if (lr_saved) {
175 // LR was stashed here, so that jump could use it as a scratch reg
176 __ ldr(LR, Address(SP, 0));
177 // There are two words on the stack top:
178 // [SP + 0]: placeholder for FP
179 // [SP + wordSize]: saved return address
180 __ str(FP, Address(SP, 0));
181 } else {
182 __ raw_push(FP, LR);
183 }
184
185 __ sub(SP, SP, (reg_save_size - 2) * wordSize);
186
187 for (int i = 0; i < number_of_saved_gprs; i += 2) {
188 int offset = R0_offset + i;
189 __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
190 map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
191 map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
192 }
193
194 __ str(R30, Address(SP, R30_offset * wordSize));
195 map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
196
197 for (int i = 0; i < number_of_saved_fprs; i += 2) {
198 int offset1 = D0_offset + i * words_per_fpr;
199 int offset2 = offset1 + words_per_fpr;
200 Address base(SP, offset1 * wordSize);
201 if (words_per_fpr == 2) {
202 // pair of "wide" quad vector registers
203 __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
204 } else {
205 // pair of double vector registers
206 __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
207 }
208 map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
209 map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
210 }
211 #else
212 if (lr_saved) {
213 __ push(RegisterSet(FP));
214 } else {
215 __ push(RegisterSet(FP) | RegisterSet(LR));
216 }
217 __ push(SAVED_BASE_REGS);
218 if (HaveVFP) {
219 if (VM_Version::has_vfp3_32()) {
220 __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
221 } else {
222 if (FloatRegisterImpl::number_of_registers > 32) {
223 assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
224 __ sub(SP, SP, 32 * wordSize);
225 }
226 }
227 __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
228 } else {
229 __ sub(SP, SP, fpu_save_size * wordSize);
230 }
231
235 if (j == FP_REG_NUM) {
236 // skip the FP register, managed below.
237 j++;
238 }
239 map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
240 j++;
241 }
242 assert(j == R10->encoding(), "must be");
243 #if (FP_REG_NUM != 11)
244 // add R11, if not managed as FP
245 map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
246 #endif
247 map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
248 map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
249 if (HaveVFP) {
250 for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
251 map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
252 map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
253 }
254 }
255 #endif // AARCH64
256
257 return map;
258 }
259
260 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
261 #ifdef AARCH64
262 for (int i = 0; i < number_of_saved_gprs; i += 2) {
263 __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
264 }
265
266 __ ldr(R30, Address(SP, R30_offset * wordSize));
267
268 for (int i = 0; i < number_of_saved_fprs; i += 2) {
269 Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
270 if (words_per_fpr == 2) {
271 // pair of "wide" quad vector registers
272 __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
273 } else {
274 // pair of double vector registers
275 __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
276 }
277 }
278
279 __ add(SP, SP, (reg_save_size - 2) * wordSize);
280
281 if (restore_lr) {
282 __ raw_pop(FP, LR);
283 } else {
284 __ ldr(FP, Address(SP, 0));
285 }
286 #else
287 if (HaveVFP) {
288 __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
289 if (VM_Version::has_vfp3_32()) {
290 __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
291 } else {
292 if (FloatRegisterImpl::number_of_registers > 32) {
293 assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
294 __ add(SP, SP, 32 * wordSize);
295 }
296 }
297 } else {
298 __ add(SP, SP, fpu_save_size * wordSize);
299 }
300 __ pop(SAVED_BASE_REGS);
301 if (restore_lr) {
302 __ pop(RegisterSet(FP) | RegisterSet(LR));
303 } else {
304 __ pop(RegisterSet(FP));
305 }
306 #endif // AARCH64
307 }
308
309 #ifdef AARCH64
310
311 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
312 if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
313 __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
314 } else {
315 __ raw_push(R0, ZR);
316 }
317 }
318
319 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
320 if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
321 __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
322 } else {
323 __ raw_pop(R0, ZR);
324 }
325 }
326
327 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
328 __ raw_push(R0, R1);
329 __ raw_push(R2, R3);
330 __ raw_push(R4, R5);
331 __ raw_push(R6, R7);
332
333 assert(FPR_PARAMS == 8, "adjust this code");
334 assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
335
336 if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
337 if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
338 if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
339 if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
340 }
341
342 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
343 assert(FPR_PARAMS == 8, "adjust this code");
344 assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
345
346 if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
347 if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
348 if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
349 if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
350
351 __ raw_pop(R6, R7);
352 __ raw_pop(R4, R5);
353 __ raw_pop(R2, R3);
354 __ raw_pop(R0, R1);
355 }
356
357 #else // AARCH64
358
359 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
360 #ifdef __ABI_HARD__
361 if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
362 __ sub(SP, SP, 8);
363 __ fstd(D0, Address(SP));
364 return;
365 }
366 #endif // __ABI_HARD__
367 __ raw_push(R0, R1);
368 }
369
370 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
371 #ifdef __ABI_HARD__
372 if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
373 __ fldd(D0, Address(SP));
374 __ add(SP, SP, 8);
375 return;
376 }
377 #endif // __ABI_HARD__
388 // but there is no way to guarantee that
389 if (fp_regs_in_arguments) {
390 // convert fp_regs_in_arguments to a number of double registers
391 int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
392 __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
393 }
394 #endif // __ ABI_HARD__
395 }
396
397 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
398 #ifdef __ABI_HARD__
399 if (fp_regs_in_arguments) {
400 int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
401 __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
402 }
403 #endif // __ABI_HARD__
404
405 __ pop(RegisterSet(R0, R3));
406 }
407
408 #endif // AARCH64
409
410
411 // Is vector's size (in bytes) bigger than a size saved by default?
412 // All vector registers are saved by default on ARM.
413 bool SharedRuntime::is_wide_vector(int size) {
414 return false;
415 }
416
417 size_t SharedRuntime::trampoline_size() {
418 return 16;
419 }
420
421 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
422 InlinedAddress dest(destination);
423 __ indirect_jump(dest, Rtemp);
424 __ bind_literal(dest);
425 }
426
427 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
428 VMRegPair *regs,
429 VMRegPair *regs2,
430 int total_args_passed) {
431 assert(regs2 == NULL, "not needed on arm");
432 #ifdef AARCH64
433 int slot = 0; // counted in 32-bit VMReg slots
434 int reg = 0;
435 int fp_reg = 0;
436 for (int i = 0; i < total_args_passed; i++) {
437 switch (sig_bt[i]) {
438 case T_SHORT:
439 case T_CHAR:
440 case T_BYTE:
441 case T_BOOLEAN:
442 case T_INT:
443 if (reg < GPR_PARAMS) {
444 Register r = as_Register(reg);
445 regs[i].set1(r->as_VMReg());
446 reg++;
447 } else {
448 regs[i].set1(VMRegImpl::stack2reg(slot));
449 slot+=2;
450 }
451 break;
452 case T_LONG:
453 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
454 // fall through
455 case T_ARRAY:
456 case T_OBJECT:
457 case T_ADDRESS:
458 if (reg < GPR_PARAMS) {
459 Register r = as_Register(reg);
460 regs[i].set2(r->as_VMReg());
461 reg++;
462 } else {
463 regs[i].set2(VMRegImpl::stack2reg(slot));
464 slot+=2;
465 }
466 break;
467 case T_FLOAT:
468 if (fp_reg < FPR_PARAMS) {
469 FloatRegister r = as_FloatRegister(fp_reg);
470 regs[i].set1(r->as_VMReg());
471 fp_reg++;
472 } else {
473 regs[i].set1(VMRegImpl::stack2reg(slot));
474 slot+=2;
475 }
476 break;
477 case T_DOUBLE:
478 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
479 if (fp_reg < FPR_PARAMS) {
480 FloatRegister r = as_FloatRegister(fp_reg);
481 regs[i].set2(r->as_VMReg());
482 fp_reg++;
483 } else {
484 regs[i].set2(VMRegImpl::stack2reg(slot));
485 slot+=2;
486 }
487 break;
488 case T_VOID:
489 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
490 regs[i].set_bad();
491 break;
492 default:
493 ShouldNotReachHere();
494 }
495 }
496 return slot;
497
498 #else // AARCH64
499
500 int slot = 0;
501 int ireg = 0;
502 #ifdef __ABI_HARD__
503 int fp_slot = 0;
504 int single_fpr_slot = 0;
505 #endif // __ABI_HARD__
506 for (int i = 0; i < total_args_passed; i++) {
507 switch (sig_bt[i]) {
508 case T_SHORT:
509 case T_CHAR:
510 case T_BYTE:
511 case T_BOOLEAN:
512 case T_INT:
513 case T_ARRAY:
514 case T_OBJECT:
515 case T_ADDRESS:
516 case T_METADATA:
517 #ifndef __ABI_HARD__
518 case T_FLOAT:
575 case T_DOUBLE:
576 assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
577 if (fp_slot <= 14) {
578 FloatRegister r1 = as_FloatRegister(fp_slot);
579 FloatRegister r2 = as_FloatRegister(fp_slot+1);
580 regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
581 fp_slot += 2;
582 } else {
583 if(slot & 1) slot++;
584 regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
585 slot += 2;
586 single_fpr_slot = 16;
587 }
588 break;
589 #endif // __ABI_HARD__
590 default:
591 ShouldNotReachHere();
592 }
593 }
594 return slot;
595 #endif // AARCH64
596 }
597
598 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
599 VMRegPair *regs,
600 int total_args_passed,
601 int is_outgoing) {
602 #ifdef AARCH64
603 // C calling convention on AArch64 is good enough.
604 return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
605 #else
606 #ifdef __SOFTFP__
607 // soft float is the same as the C calling convention.
608 return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
609 #endif // __SOFTFP__
610 (void) is_outgoing;
611 int slot = 0;
612 int ireg = 0;
613 int freg = 0;
614 int single_fpr = 0;
615
616 for (int i = 0; i < total_args_passed; i++) {
617 switch (sig_bt[i]) {
618 case T_SHORT:
619 case T_CHAR:
620 case T_BYTE:
621 case T_BOOLEAN:
622 case T_INT:
623 case T_ARRAY:
624 case T_OBJECT:
625 case T_ADDRESS:
668 Register r2 = as_Register(ireg + 1);
669 regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
670 ireg += 2;
671 } else {
672 if (slot & 1) slot++;
673 regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
674 slot += 2;
675 ireg = 4;
676 }
677 break;
678 case T_VOID:
679 regs[i].set_bad();
680 break;
681 default:
682 ShouldNotReachHere();
683 }
684 }
685
686 if (slot & 1) slot++;
687 return slot;
688 #endif // AARCH64
689 }
690
691 static void patch_callers_callsite(MacroAssembler *masm) {
692 Label skip;
693
694 __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
695 __ cbz(Rtemp, skip);
696
697 #ifdef AARCH64
698 push_param_registers(masm, FPR_PARAMS);
699 __ raw_push(LR, ZR);
700 #else
701 // Pushing an even number of registers for stack alignment.
702 // Selecting R9, which had to be saved anyway for some platforms.
703 __ push(RegisterSet(R0, R3) | R9 | LR);
704 #endif // AARCH64
705
706 __ mov(R0, Rmethod);
707 __ mov(R1, LR);
708 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
709
710 #ifdef AARCH64
711 __ raw_pop(LR, ZR);
712 pop_param_registers(masm, FPR_PARAMS);
713 #else
714 __ pop(RegisterSet(R0, R3) | R9 | LR);
715 #endif // AARCH64
716
717 __ bind(skip);
718 }
719
720 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
721 int total_args_passed, int comp_args_on_stack,
722 const BasicType *sig_bt, const VMRegPair *regs) {
723 // TODO: ARM - May be can use ldm to load arguments
724 const Register tmp = Rtemp; // avoid erasing R5_mh
725
726 // Next assert may not be needed but safer. Extra analysis required
727 // if this there is not enough free registers and we need to use R5 here.
728 assert_different_registers(tmp, R5_mh);
729
730 // 6243940 We might end up in handle_wrong_method if
731 // the callee is deoptimized as we race thru here. If that
732 // happens we don't want to take a safepoint because the
733 // caller frame will look interpreted and arguments are now
734 // "compiled" so it is much better to make this transition
735 // invisible to the stack walking code. Unfortunately if
736 // we try and find the callee by normal means a safepoint
737 // is possible. So we stash the desired callee in the thread
738 // and the vm will find there should this case occur.
739 Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
740 __ str(Rmethod, callee_target_addr);
741
742 #ifdef AARCH64
743
744 assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
745 assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
746
747 if (comp_args_on_stack) {
748 __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
749 }
750
751 for (int i = 0; i < total_args_passed; i++) {
752 if (sig_bt[i] == T_VOID) {
753 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
754 continue;
755 }
756 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
757
758 int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
759 Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
760
761 VMReg r = regs[i].first();
762 bool full_word = regs[i].second()->is_valid();
763
764 if (r->is_stack()) {
765 if (full_word) {
766 __ ldr(tmp, source_addr);
767 __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
768 } else {
769 __ ldr_w(tmp, source_addr);
770 __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
771 }
772 } else if (r->is_Register()) {
773 if (full_word) {
774 __ ldr(r->as_Register(), source_addr);
775 } else {
776 __ ldr_w(r->as_Register(), source_addr);
777 }
778 } else if (r->is_FloatRegister()) {
779 if (sig_bt[i] == T_DOUBLE) {
780 __ ldr_d(r->as_FloatRegister(), source_addr);
781 } else {
782 __ ldr_s(r->as_FloatRegister(), source_addr);
783 }
784 } else {
785 assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
786 }
787 }
788
789 __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
790 __ br(tmp);
791
792 #else
793
794 assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
795
796 const Register initial_sp = Rmethod; // temporarily scratched
797
798 // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
799 assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
800
801 __ mov(initial_sp, SP);
802
803 if (comp_args_on_stack) {
804 __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
805 }
806 __ bic(SP, SP, StackAlignmentInBytes - 1);
807
808 for (int i = 0; i < total_args_passed; i++) {
809 if (sig_bt[i] == T_VOID) {
810 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
811 continue;
812 }
834 __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
835 }
836 } else if (r_1->is_FloatRegister()) {
837 #ifdef __SOFTFP__
838 ShouldNotReachHere();
839 #endif // __SOFTFP__
840 if (!r_2->is_valid()) {
841 __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
842 } else {
843 __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
844 }
845 } else {
846 assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
847 }
848 }
849
850 // restore Rmethod (scratched for initial_sp)
851 __ ldr(Rmethod, callee_target_addr);
852 __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
853
854 #endif // AARCH64
855 }
856
857 static void gen_c2i_adapter(MacroAssembler *masm,
858 int total_args_passed, int comp_args_on_stack,
859 const BasicType *sig_bt, const VMRegPair *regs,
860 Label& skip_fixup) {
861 // TODO: ARM - May be can use stm to deoptimize arguments
862 const Register tmp = Rtemp;
863
864 patch_callers_callsite(masm);
865 __ bind(skip_fixup);
866
867 __ mov(Rsender_sp, SP); // not yet saved
868
869 #ifdef AARCH64
870
871 int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
872 if (extraspace) {
873 __ sub(SP, SP, extraspace);
874 }
875
876 for (int i = 0; i < total_args_passed; i++) {
877 if (sig_bt[i] == T_VOID) {
878 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
879 continue;
880 }
881
882 int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
883 Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
884
885 VMReg r = regs[i].first();
886 bool full_word = regs[i].second()->is_valid();
887
888 if (r->is_stack()) {
889 if (full_word) {
890 __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
891 __ str(tmp, dest_addr);
892 } else {
893 __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
894 __ str_w(tmp, dest_addr);
895 }
896 } else if (r->is_Register()) {
897 if (full_word) {
898 __ str(r->as_Register(), dest_addr);
899 } else {
900 __ str_w(r->as_Register(), dest_addr);
901 }
902 } else if (r->is_FloatRegister()) {
903 if (sig_bt[i] == T_DOUBLE) {
904 __ str_d(r->as_FloatRegister(), dest_addr);
905 } else {
906 __ str_s(r->as_FloatRegister(), dest_addr);
907 }
908 } else {
909 assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
910 }
911 }
912
913 __ mov(Rparams, SP);
914
915 __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
916 __ br(tmp);
917
918 #else
919
920 int extraspace = total_args_passed * Interpreter::stackElementSize;
921 if (extraspace) {
922 __ sub_slow(SP, SP, extraspace);
923 }
924
925 for (int i = 0; i < total_args_passed; i++) {
926 if (sig_bt[i] == T_VOID) {
927 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
928 continue;
929 }
930 int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
931
932 VMReg r_1 = regs[i].first();
933 VMReg r_2 = regs[i].second();
934 if (r_1->is_stack()) {
935 int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
936 if (!r_2->is_valid()) {
937 __ ldr(tmp, Address(SP, arg_offset));
938 __ str(tmp, Address(SP, stack_offset));
948 } else {
949 __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
950 __ str(r_2->as_Register(), Address(SP, stack_offset));
951 }
952 } else if (r_1->is_FloatRegister()) {
953 #ifdef __SOFTFP__
954 ShouldNotReachHere();
955 #endif // __SOFTFP__
956 if (!r_2->is_valid()) {
957 __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
958 } else {
959 __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
960 }
961 } else {
962 assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
963 }
964 }
965
966 __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
967
968 #endif // AARCH64
969 }
970
971 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
972 int total_args_passed,
973 int comp_args_on_stack,
974 const BasicType *sig_bt,
975 const VMRegPair *regs,
976 AdapterFingerPrint* fingerprint) {
977 address i2c_entry = __ pc();
978 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
979
980 address c2i_unverified_entry = __ pc();
981 Label skip_fixup;
982 const Register receiver = R0;
983 const Register holder_klass = Rtemp; // XXX should be OK for C2 but not 100% sure
984 const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
985
986 __ load_klass(receiver_klass, receiver);
987 __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
988 __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
989 __ cmp(receiver_klass, holder_klass);
990
991 #ifdef AARCH64
992 Label ic_miss;
993 __ b(ic_miss, ne);
994 __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
995 __ cbz(Rtemp, skip_fixup);
996 __ bind(ic_miss);
997 __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
998 #else
999 __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
1000 __ cmp(Rtemp, 0, eq);
1001 __ b(skip_fixup, eq);
1002 __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
1003 #endif // AARCH64
1004
1005 address c2i_entry = __ pc();
1006 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1007
1008 __ flush();
1009 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1010 }
1011
1012
1013 static int reg2offset_in(VMReg r) {
1014 // Account for saved FP and LR
1015 return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
1016 }
1017
1018 static int reg2offset_out(VMReg r) {
1019 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1020 }
1021
1022
1023 static void verify_oop_args(MacroAssembler* masm,
1184 int lock_slot_fp_offset = stack_size - 2 * wordSize -
1185 lock_slot_offset * VMRegImpl::stack_slot_size;
1186
1187 // Unverified entry point
1188 address start = __ pc();
1189
1190 // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
1191 const Register receiver = R0; // see receiverOpr()
1192 __ load_klass(Rtemp, receiver);
1193 __ cmp(Rtemp, Ricklass);
1194 Label verified;
1195
1196 __ b(verified, eq); // jump over alignment no-ops too
1197 __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1198 __ align(CodeEntryAlignment);
1199
1200 // Verified entry point
1201 __ bind(verified);
1202 int vep_offset = __ pc() - start;
1203
1204 #ifdef AARCH64
1205 // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
1206 __ nop();
1207 #endif // AARCH64
1208
1209 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
1210 // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
1211 // instead of doing a full VM transition once it's been computed.
1212 Label slow_case;
1213 const Register obj_reg = R0;
1214
1215 // Unlike for Object.hashCode, System.identityHashCode is static method and
1216 // gets object as argument instead of the receiver.
1217 if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
1218 assert(method->is_static(), "method should be static");
1219 // return 0 for null reference input, return val = R0 = obj_reg = 0
1220 #ifdef AARCH64
1221 Label Continue;
1222 __ cbnz(obj_reg, Continue);
1223 __ ret();
1224 __ bind(Continue);
1225 #else
1226 __ cmp(obj_reg, 0);
1227 __ bx(LR, eq);
1228 #endif
1229 }
1230
1231 __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1232
1233 assert(markOopDesc::unlocked_value == 1, "adjust this code");
1234 __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
1235
1236 if (UseBiasedLocking) {
1237 assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
1238 __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
1239 }
1240
1241 #ifdef AARCH64
1242 __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
1243 __ b(slow_case, eq);
1244 __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
1245 __ ret();
1246 #else
1247 __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
1248 __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
1249 __ bx(LR, ne);
1250 #endif // AARCH64
1251
1252 __ bind(slow_case);
1253 }
1254
1255 // Bang stack pages
1256 __ arm_stack_overflow_check(stack_size, Rtemp);
1257
1258 // Setup frame linkage
1259 __ raw_push(FP, LR);
1260 __ mov(FP, SP);
1261 __ sub_slow(SP, SP, stack_size - 2*wordSize);
1262
1263 int frame_complete = __ pc() - start;
1264
1265 OopMapSet* oop_maps = new OopMapSet();
1266 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1267 const int extra_args = is_static ? 2 : 1;
1268 int receiver_offset = -1;
1269 int fp_regs_in_arguments = 0;
1270
1271 for (i = total_in_args; --i >= 0; ) {
1272 switch (in_sig_bt[i]) {
1273 case T_ARRAY:
1274 case T_OBJECT: {
1275 VMReg src = in_regs[i].first();
1276 VMReg dst = out_regs[i + extra_args].first();
1277 if (src->is_stack()) {
1278 assert(dst->is_stack(), "must be");
1279 assert(i != 0, "Incoming receiver is always in a register");
1280 __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1281 __ cmp(Rtemp, 0);
1282 #ifdef AARCH64
1283 __ add(Rtemp, FP, reg2offset_in(src));
1284 __ csel(Rtemp, ZR, Rtemp, eq);
1285 #else
1286 __ add(Rtemp, FP, reg2offset_in(src), ne);
1287 #endif // AARCH64
1288 __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1289 int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1290 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1291 } else {
1292 int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1293 __ str(src->as_Register(), Address(SP, offset));
1294 map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1295 if ((i == 0) && (!is_static)) {
1296 receiver_offset = offset;
1297 }
1298 oop_handle_offset += VMRegImpl::slots_per_word;
1299
1300 #ifdef AARCH64
1301 __ cmp(src->as_Register(), 0);
1302 __ add(Rtemp, SP, offset);
1303 __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
1304 if (dst->is_stack()) {
1305 __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1306 }
1307 #else
1308 if (dst->is_stack()) {
1309 __ movs(Rtemp, src->as_Register());
1310 __ add(Rtemp, SP, offset, ne);
1311 __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1312 } else {
1313 __ movs(dst->as_Register(), src->as_Register());
1314 __ add(dst->as_Register(), SP, offset, ne);
1315 }
1316 #endif // AARCH64
1317 }
1318 }
1319
1320 case T_VOID:
1321 break;
1322
1323 #ifdef AARCH64
1324 case T_FLOAT:
1325 case T_DOUBLE: {
1326 VMReg src = in_regs[i].first();
1327 VMReg dst = out_regs[i + extra_args].first();
1328 if (src->is_stack()) {
1329 assert(dst->is_stack(), "must be");
1330 __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1331 __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1332 } else {
1333 assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
1334 assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
1335 fp_regs_in_arguments++;
1336 }
1337 break;
1338 }
1339 #else // AARCH64
1340
1341 #ifdef __SOFTFP__
1342 case T_DOUBLE:
1343 #endif
1344 case T_LONG: {
1345 VMReg src_1 = in_regs[i].first();
1346 VMReg src_2 = in_regs[i].second();
1347 VMReg dst_1 = out_regs[i + extra_args].first();
1348 VMReg dst_2 = out_regs[i + extra_args].second();
1349 #if (ALIGN_WIDE_ARGUMENTS == 0)
1350 // C convention can mix a register and a stack slot for a
1351 // 64-bits native argument.
1352
1353 // Note: following code should work independently of whether
1354 // the Java calling convention follows C convention or whether
1355 // it aligns 64-bit values.
1356 if (dst_2->is_Register()) {
1357 if (src_1->as_Register() != dst_1->as_Register()) {
1358 assert(src_1->as_Register() != dst_2->as_Register() &&
1359 src_2->as_Register() != dst_2->as_Register(), "must be");
1492 if (src_1->is_stack()) {
1493 if (dst_1->is_stack()) {
1494 assert(dst_2->is_stack(), "must be");
1495 __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1496 __ ldr(LR, Address(FP, reg2offset_in(src_2)));
1497 __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1498 __ str(LR, Address(SP, reg2offset_out(dst_2)));
1499 } else {
1500 // C2 Java calling convention does not populate S14 and S15, therefore
1501 // those need to be loaded from stack here
1502 __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1503 fp_regs_in_arguments += 2;
1504 }
1505 } else {
1506 assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1507 fp_regs_in_arguments += 2;
1508 }
1509 break;
1510 }
1511 #endif // __ABI_HARD__
1512 #endif // AARCH64
1513
1514 default: {
1515 assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1516 VMReg src = in_regs[i].first();
1517 VMReg dst = out_regs[i + extra_args].first();
1518 if (src->is_stack()) {
1519 assert(dst->is_stack(), "must be");
1520 __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1521 __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1522 } else if (dst->is_stack()) {
1523 __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1524 } else {
1525 assert(src->is_Register() && dst->is_Register(), "must be");
1526 __ mov(dst->as_Register(), src->as_Register());
1527 }
1528 }
1529 }
1530 }
1531
1532 // Get Klass mirror
1533 int klass_offset = -1;
1534 if (is_static) {
1535 klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1536 __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1537 __ add(c_rarg1, SP, klass_offset);
1538 __ str(Rtemp, Address(SP, klass_offset));
1539 map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1540 }
1541
1542 // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1543 int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1544 assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1545 oop_maps->add_gc_map(pc_offset, map);
1546
1547 #ifndef AARCH64
1548 // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1549 __ membar(MacroAssembler::StoreStore, Rtemp);
1550 #endif // !AARCH64
1551
1552 // RedefineClasses() tracing support for obsolete method entry
1553 if (log_is_enabled(Trace, redefine, class, obsolete)) {
1554 #ifdef AARCH64
1555 __ NOT_TESTED();
1556 #endif
1557 __ save_caller_save_registers();
1558 __ mov(R0, Rthread);
1559 __ mov_metadata(R1, method());
1560 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1561 __ restore_caller_save_registers();
1562 }
1563
1564 const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
1565 const Register sync_obj = AARCH64_ONLY(R21) NOT_AARCH64(R6);
1566 const Register disp_hdr = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
1567 const Register tmp = AARCH64_ONLY(R23) NOT_AARCH64(R8);
1568
1569 Label slow_lock, slow_lock_biased, lock_done, fast_lock;
1570 if (method->is_synchronized()) {
1571 // The first argument is a handle to sync object (a class or an instance)
1572 __ ldr(sync_obj, Address(R1));
1573 // Remember the handle for the unlocking code
1574 __ mov(sync_handle, R1);
1575
1576 if(UseBiasedLocking) {
1577 __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1578 }
1579
1580 const Register mark = tmp;
1581 #ifdef AARCH64
1582 __ sub(disp_hdr, FP, lock_slot_fp_offset);
1583 assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
1584
1585 __ ldr(mark, sync_obj);
1586
1587 // Test if object is already locked
1588 assert(markOopDesc::unlocked_value == 1, "adjust this code");
1589 __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
1590
1591 // Check for recursive lock
1592 // See comments in InterpreterMacroAssembler::lock_object for
1593 // explanations on the fast recursive locking check.
1594 __ mov(Rtemp, SP);
1595 __ sub(Rtemp, mark, Rtemp);
1596 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
1597 Assembler::LogicalImmediate imm(mask, false);
1598 __ ands(Rtemp, Rtemp, imm);
1599 __ b(slow_lock, ne);
1600
1601 // Recursive locking: store 0 into a lock record
1602 __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1603 __ b(lock_done);
1604
1605 __ bind(fast_lock);
1606 __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1607
1608 __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1609 #else
1610 // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1611 // That would be acceptable as either CAS or slow case path is taken in that case
1612
1613 __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1614 __ sub(disp_hdr, FP, lock_slot_fp_offset);
1615 __ tst(mark, markOopDesc::unlocked_value);
1616 __ b(fast_lock, ne);
1617
1618 // Check for recursive lock
1619 // See comments in InterpreterMacroAssembler::lock_object for
1620 // explanations on the fast recursive locking check.
1621 // Check independently the low bits and the distance to SP
1622 // -1- test low 2 bits
1623 __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1624 // -2- test (hdr - SP) if the low two bits are 0
1625 __ sub(Rtemp, mark, SP, eq);
1626 __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1627 // If still 'eq' then recursive locking OK: set displaced header to 0
1628 __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1629 __ b(lock_done, eq);
1630 __ b(slow_lock);
1631
1632 __ bind(fast_lock);
1633 __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1634
1635 __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1636 #endif // AARCH64
1637
1638 __ bind(lock_done);
1639 }
1640
1641 // Get JNIEnv*
1642 __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1643
1644 // Perform thread state transition
1645 __ mov(Rtemp, _thread_in_native);
1646 #ifdef AARCH64
1647 // stlr instruction is used to force all preceding writes to be observed prior to thread state change
1648 __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
1649 __ stlr_w(Rtemp, Rtemp2);
1650 #else
1651 __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1652 #endif // AARCH64
1653
1654 // Finally, call the native method
1655 __ call(method->native_function());
1656
1657 // Set FPSCR/FPCR to a known state
1658 if (AlwaysRestoreFPU) {
1659 __ restore_default_fp_mode();
1660 }
1661
1662 // Do a safepoint check while thread is in transition state
1663 InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1664 Label call_safepoint_runtime, return_to_java;
1665 __ mov(Rtemp, _thread_in_native_trans);
1666 __ ldr_literal(R2, safepoint_state);
1667 __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1668
1669 // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1670 __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1671
1672 __ ldr_s32(R2, Address(R2));
1692
1693 if(UseBiasedLocking) {
1694 __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1695 // disp_hdr may not have been saved on entry with biased locking
1696 __ sub(disp_hdr, FP, lock_slot_fp_offset);
1697 }
1698
1699 // See C1_MacroAssembler::unlock_object() for more comments
1700 __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1701 __ cbz(R2, unlock_done);
1702
1703 __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1704
1705 __ bind(unlock_done);
1706 }
1707
1708 // Set last java frame and handle block to zero
1709 __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1710 __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1711
1712 #ifdef AARCH64
1713 __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1714 if (CheckJNICalls) {
1715 __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1716 }
1717
1718
1719 switch (ret_type) {
1720 case T_BOOLEAN:
1721 __ tst(R0, 0xff);
1722 __ cset(R0, ne);
1723 break;
1724 case T_CHAR : __ zero_extend(R0, R0, 16); break;
1725 case T_BYTE : __ sign_extend(R0, R0, 8); break;
1726 case T_SHORT : __ sign_extend(R0, R0, 16); break;
1727 case T_INT : // fall through
1728 case T_LONG : // fall through
1729 case T_VOID : // fall through
1730 case T_FLOAT : // fall through
1731 case T_DOUBLE : /* nothing to do */ break;
1732 case T_OBJECT : // fall through
1733 case T_ARRAY : break; // See JNIHandles::resolve below
1734 default:
1735 ShouldNotReachHere();
1736 }
1737 #else
1738 __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1739 if (CheckJNICalls) {
1740 __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1741 }
1742 #endif // AARCH64
1743
1744 // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1745 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1746 __ resolve_jobject(R0, // value
1747 Rtemp, // tmp1
1748 R1_tmp); // tmp2
1749 }
1750
1751 // Any exception pending?
1752 __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1753 __ mov(SP, FP);
1754
1755 #ifdef AARCH64
1756 Label except;
1757 __ cbnz(Rtemp, except);
1758 __ raw_pop(FP, LR);
1759 __ ret();
1760
1761 __ bind(except);
1762 // Pop the frame and forward the exception. Rexception_pc contains return address.
1763 __ raw_pop(FP, Rexception_pc);
1764 #else
1765 __ cmp(Rtemp, 0);
1766 // Pop the frame and return if no exception pending
1767 __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1768 // Pop the frame and forward the exception. Rexception_pc contains return address.
1769 __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1770 __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1771 #endif // AARCH64
1772 __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1773
1774 // Safepoint operation and/or pending suspend request is in progress.
1775 // Save the return values and call the runtime function by hand.
1776 __ bind(call_safepoint_runtime);
1777 push_result_registers(masm, ret_type);
1778 __ mov(R0, Rthread);
1779 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1780 pop_result_registers(masm, ret_type);
1781 __ b(return_to_java);
1782
1783 __ bind_literal(safepoint_state);
1784
1785 // Reguard stack pages. Save native results around a call to C runtime.
1786 __ bind(reguard);
1787 push_result_registers(masm, ret_type);
1788 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1789 pop_result_registers(masm, ret_type);
1790 __ b(reguard_done);
1791
1831
1832 __ b(unlock_done);
1833 }
1834
1835 __ flush();
1836 return nmethod::new_native_nmethod(method,
1837 compile_id,
1838 masm->code(),
1839 vep_offset,
1840 frame_complete,
1841 stack_slots / VMRegImpl::slots_per_word,
1842 in_ByteSize(is_static ? klass_offset : receiver_offset),
1843 in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1844 oop_maps);
1845 }
1846
1847 // this function returns the adjust size (in number of words) to a c2i adapter
1848 // activation for use during deoptimization
1849 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1850 int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1851 #ifdef AARCH64
1852 extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
1853 #endif // AARCH64
1854 return extra_locals_size;
1855 }
1856
1857
1858 uint SharedRuntime::out_preserve_stack_slots() {
1859 return 0;
1860 }
1861
1862
1863 //------------------------------generate_deopt_blob----------------------------
1864 void SharedRuntime::generate_deopt_blob() {
1865 ResourceMark rm;
1866 #ifdef AARCH64
1867 CodeBuffer buffer("deopt_blob", 1024+256, 1);
1868 #else
1869 CodeBuffer buffer("deopt_blob", 1024, 1024);
1870 #endif
1871 int frame_size_in_words;
1872 OopMapSet* oop_maps;
1873 int reexecute_offset;
1874 int exception_in_tls_offset;
1875 int exception_offset;
1876
1877 MacroAssembler* masm = new MacroAssembler(&buffer);
1878 Label cont;
1879 const Register Rkind = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
1880 const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
1881 const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
1882 assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1883
1884 address start = __ pc();
1885
1886 oop_maps = new OopMapSet();
1887 // LR saved by caller (can be live in c2 method)
1888
1889 // A deopt is a case where LR may be live in the c2 nmethod. So it's
1890 // not possible to call the deopt blob from the nmethod and pass the
1891 // address of the deopt handler of the nmethod in LR. What happens
1892 // now is that the caller of the deopt blob pushes the current
1893 // address so the deopt blob doesn't have to do it. This way LR can
1894 // be preserved, contains the live value from the nmethod and is
1895 // saved at R14/R30_offset here.
1896 OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1897 __ mov(Rkind, Deoptimization::Unpack_deopt);
1898 __ b(cont);
1899
1900 exception_offset = __ pc() - start;
1901
1951 // assert that exception_pc is zero in tls
1952 { Label L;
1953 __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1954 __ cbz(Rexception_pc, L);
1955 __ stop("exception pc should be null");
1956 __ bind(L);
1957 }
1958 #endif
1959 __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1960 __ verify_oop(Rexception_obj);
1961 {
1962 const Register Rzero = __ zero_register(Rtemp);
1963 __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1964 }
1965
1966 __ bind(noException);
1967
1968 // This frame is going away. Fetch return value, so we can move it to
1969 // a new frame.
1970 __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1971 #ifndef AARCH64
1972 __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1973 #endif // !AARCH64
1974 #ifndef __SOFTFP__
1975 __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1976 #endif
1977 // pop frame
1978 __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1979
1980 // Set initial stack state before pushing interpreter frames
1981 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1982 __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1983 __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1984
1985 #ifdef AARCH64
1986 // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
1987 // They are needed for correct stack walking during stack overflow handling.
1988 // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
1989 __ sub(Rtemp, Rtemp, 2*wordSize);
1990 __ add(SP, SP, Rtemp, ex_uxtx);
1991 __ raw_pop(FP, LR);
1992
1993 #ifdef ASSERT
1994 { Label L;
1995 __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1996 __ cmp(FP, Rtemp);
1997 __ b(L, eq);
1998 __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
1999 __ bind(L);
2000 }
2001 { Label L;
2002 __ ldr(Rtemp, Address(R2));
2003 __ cmp(LR, Rtemp);
2004 __ b(L, eq);
2005 __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2006 __ bind(L);
2007 }
2008 #endif // ASSERT
2009
2010 #else
2011 __ add(SP, SP, Rtemp);
2012 #endif // AARCH64
2013
2014 #ifdef ASSERT
2015 // Compilers generate code that bang the stack by as much as the
2016 // interpreter would need. So this stack banging should never
2017 // trigger a fault. Verify that it does not on non product builds.
2018 // See if it is enough stack to push deoptimized frames
2019 if (UseStackBanging) {
2020 #ifndef AARCH64
2021 // The compiled method that we are deoptimizing was popped from the stack.
2022 // If the stack bang results in a stack overflow, we don't return to the
2023 // method that is being deoptimized. The stack overflow exception is
2024 // propagated to the caller of the deoptimized method. Need to get the pc
2025 // from the caller in LR and restore FP.
2026 __ ldr(LR, Address(R2, 0));
2027 __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2028 #endif // !AARCH64
2029 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2030 __ arm_stack_overflow_check(R8, Rtemp);
2031 }
2032 #endif
2033 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2034
2035 #ifndef AARCH64
2036 // Pick up the initial fp we should save
2037 // XXX Note: was ldr(FP, Address(FP));
2038
2039 // The compiler no longer uses FP as a frame pointer for the
2040 // compiled code. It can be used by the allocator in C2 or to
2041 // memorize the original SP for JSR292 call sites.
2042
2043 // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
2044 // Deoptimization::fetch_unroll_info computes the right FP value and
2045 // stores it in Rublock.initial_info. This has been activated for ARM.
2046 __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2047 #endif // !AARCH64
2048
2049 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2050 __ mov(Rsender, SP);
2051 #ifdef AARCH64
2052 __ sub(SP, SP, Rtemp, ex_uxtx);
2053 #else
2054 __ sub(SP, SP, Rtemp);
2055 #endif // AARCH64
2056
2057 // Push interpreter frames in a loop
2058 Label loop;
2059 __ bind(loop);
2060 __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc
2061 __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size
2062
2063 __ raw_push(FP, LR); // create new frame
2064 __ mov(FP, SP);
2065 __ sub(Rtemp, Rtemp, 2*wordSize);
2066
2067 #ifdef AARCH64
2068 __ sub(SP, SP, Rtemp, ex_uxtx);
2069 #else
2070 __ sub(SP, SP, Rtemp);
2071 #endif // AARCH64
2072
2073 __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2074 #ifdef AARCH64
2075 __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2076 #else
2077 __ mov(LR, 0);
2078 __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2079 #endif // AARCH64
2080
2081 __ subs(R8, R8, 1); // decrement counter
2082 __ mov(Rsender, SP);
2083 __ b(loop, ne);
2084
2085 // Re-push self-frame
2086 __ ldr(LR, Address(R2));
2087 __ raw_push(FP, LR);
2088 __ mov(FP, SP);
2089 __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
2090
2091 // Restore frame locals after moving the frame
2092 __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2093 #ifndef AARCH64
2094 __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2095 #endif // !AARCH64
2096
2097 #ifndef __SOFTFP__
2098 __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2099 #endif // !__SOFTFP__
2100
2101 #ifndef AARCH64
2102 #ifdef ASSERT
2103 // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
2104 { Label L;
2105 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2106 __ cmp_32(Rkind, Rtemp);
2107 __ b(L, eq);
2108 __ stop("Rkind was overwritten");
2109 __ bind(L);
2110 }
2111 #endif
2112 #endif
2113
2114 // Call unpack_frames with proper arguments
2115 __ mov(R0, Rthread);
2116 __ mov(R1, Rkind);
2117
2118 pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2119 assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2120 __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2121 if (pc_offset == -1) {
2122 pc_offset = __ offset();
2123 }
2124 oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
2125 __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2126
2127 // Collect return values, pop self-frame and jump to interpreter
2128 __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2129 #ifndef AARCH64
2130 __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2131 #endif // !AARCH64
2132 // Interpreter floats controlled by __SOFTFP__, but compiler
2133 // float return value registers controlled by __ABI_HARD__
2134 // This matters for vfp-sflt builds.
2135 #ifndef __SOFTFP__
2136 // Interpreter hard float
2137 #ifdef __ABI_HARD__
2138 // Compiler float return value in FP registers
2139 __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2140 #else
2141 // Compiler float return value in integer registers,
2142 // copy to D0 for interpreter (S0 <-- R0)
2143 __ fmdrr(D0_tos, R0, R1);
2144 #endif
2145 #endif // !__SOFTFP__
2146 __ mov(SP, FP);
2147
2148 #ifdef AARCH64
2149 __ raw_pop(FP, LR);
2150 __ ret();
2151 #else
2152 __ pop(RegisterSet(FP) | RegisterSet(PC));
2153 #endif // AARCH64
2154
2155 __ flush();
2156
2157 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2158 reexecute_offset, frame_size_in_words);
2159 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2160 }
2161
2162 #ifdef COMPILER2
2163
2164 //------------------------------generate_uncommon_trap_blob--------------------
2165 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2166 // instead.
2167 void SharedRuntime::generate_uncommon_trap_blob() {
2168 // allocate space for the code
2169 ResourceMark rm;
2170
2171 // setup code generation tools
2172 int pad = VerifyThread ? 512 : 0;
2173 #ifdef _LP64
2174 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
2175 #else
2176 // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
2177 // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
2178 CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
2179 #endif
2180 // bypassed when code generation useless
2181 MacroAssembler* masm = new MacroAssembler(&buffer);
2182 const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
2183 const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
2184 assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
2185
2186 //
2187 // This is the entry point for all traps the compiler takes when it thinks
2188 // it cannot handle further execution of compilation code. The frame is
2189 // deoptimized in these cases and converted into interpreter frames for
2190 // execution
2191 // The steps taken by this frame are as follows:
2192 // - push a fake "unpack_frame"
2193 // - call the C routine Deoptimization::uncommon_trap (this function
2194 // packs the current compiled frame into vframe arrays and returns
2195 // information about the number and size of interpreter frames which
2196 // are equivalent to the frame which is being deoptimized)
2197 // - deallocate the "unpack_frame"
2198 // - deallocate the deoptimization frame
2199 // - in a loop using the information returned in the previous step
2200 // push interpreter frames;
2201 // - create a dummy "unpack_frame"
2202 // - call the C routine: Deoptimization::unpack_frames (this function
2203 // lays out values on the interpreter frame which was just created)
2219 __ mov(Rublock, R0);
2220 __ reset_last_Java_frame(Rtemp);
2221 __ raw_pop(FP, LR);
2222
2223 #ifdef ASSERT
2224 { Label L;
2225 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2226 __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
2227 __ b(L, eq);
2228 __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
2229 __ bind(L);
2230 }
2231 #endif
2232
2233
2234 // Set initial stack state before pushing interpreter frames
2235 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2236 __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2237 __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2238
2239 #ifdef AARCH64
2240 // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
2241 // They are needed for correct stack walking during stack overflow handling.
2242 // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
2243 __ sub(Rtemp, Rtemp, 2*wordSize);
2244 __ add(SP, SP, Rtemp, ex_uxtx);
2245 __ raw_pop(FP, LR);
2246
2247 #ifdef ASSERT
2248 { Label L;
2249 __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2250 __ cmp(FP, Rtemp);
2251 __ b(L, eq);
2252 __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2253 __ bind(L);
2254 }
2255 { Label L;
2256 __ ldr(Rtemp, Address(R2));
2257 __ cmp(LR, Rtemp);
2258 __ b(L, eq);
2259 __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2260 __ bind(L);
2261 }
2262 #endif // ASSERT
2263
2264 #else
2265 __ add(SP, SP, Rtemp);
2266 #endif //AARCH64
2267
2268 // See if it is enough stack to push deoptimized frames
2269 #ifdef ASSERT
2270 // Compilers generate code that bang the stack by as much as the
2271 // interpreter would need. So this stack banging should never
2272 // trigger a fault. Verify that it does not on non product builds.
2273 if (UseStackBanging) {
2274 #ifndef AARCH64
2275 // The compiled method that we are deoptimizing was popped from the stack.
2276 // If the stack bang results in a stack overflow, we don't return to the
2277 // method that is being deoptimized. The stack overflow exception is
2278 // propagated to the caller of the deoptimized method. Need to get the pc
2279 // from the caller in LR and restore FP.
2280 __ ldr(LR, Address(R2, 0));
2281 __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2282 #endif // !AARCH64
2283 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2284 __ arm_stack_overflow_check(R8, Rtemp);
2285 }
2286 #endif
2287 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2288 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2289 __ mov(Rsender, SP);
2290 #ifdef AARCH64
2291 __ sub(SP, SP, Rtemp, ex_uxtx);
2292 #else
2293 __ sub(SP, SP, Rtemp);
2294 #endif
2295 #ifndef AARCH64
2296 // __ ldr(FP, Address(FP));
2297 __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2298 #endif // AARCH64
2299
2300 // Push interpreter frames in a loop
2301 Label loop;
2302 __ bind(loop);
2303 __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc
2304 __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size
2305
2306 __ raw_push(FP, LR); // create new frame
2307 __ mov(FP, SP);
2308 __ sub(Rtemp, Rtemp, 2*wordSize);
2309
2310 #ifdef AARCH64
2311 __ sub(SP, SP, Rtemp, ex_uxtx);
2312 #else
2313 __ sub(SP, SP, Rtemp);
2314 #endif // AARCH64
2315
2316 __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2317 #ifdef AARCH64
2318 __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2319 #else
2320 __ mov(LR, 0);
2321 __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2322 #endif // AARCH64
2323 __ subs(R8, R8, 1); // decrement counter
2324 __ mov(Rsender, SP);
2325 __ b(loop, ne);
2326
2327 // Re-push self-frame
2328 __ ldr(LR, Address(R2));
2329 __ raw_push(FP, LR);
2330 __ mov(FP, SP);
2331
2332 // Call unpack_frames with proper arguments
2333 __ mov(R0, Rthread);
2334 __ mov(R1, Deoptimization::Unpack_uncommon_trap);
2335 __ set_last_Java_frame(SP, FP, false, Rtemp);
2336 __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2337 // oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
2338 __ reset_last_Java_frame(Rtemp);
2339
2340 __ mov(SP, FP);
2341 #ifdef AARCH64
2342 __ raw_pop(FP, LR);
2343 __ ret();
2344 #else
2345 __ pop(RegisterSet(FP) | RegisterSet(PC));
2346 #endif
2347
2348 masm->flush();
2349 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
2350 }
2351
2352 #endif // COMPILER2
2353
2354 //------------------------------generate_handler_blob------
2355 //
2356 // Generate a special Compile2Runtime blob that saves all registers,
2357 // setup oopmap, and calls safepoint code to stop the compiled code for
2358 // a safepoint.
2359 //
2360 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2361 assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2362
2363 ResourceMark rm;
2364 CodeBuffer buffer("handler_blob", 256, 256);
2365 int frame_size_words;
2366 OopMapSet* oop_maps;
2367
2368 bool cause_return = (poll_type == POLL_AT_RETURN);
2369
2370 MacroAssembler* masm = new MacroAssembler(&buffer);
2371 address start = __ pc();
2372 oop_maps = new OopMapSet();
2373
2374 if (!cause_return) {
2375 #ifdef AARCH64
2376 __ raw_push(LR, LR);
2377 #else
2378 __ sub(SP, SP, 4); // make room for LR which may still be live
2379 // here if we are coming from a c2 method
2380 #endif // AARCH64
2381 }
2382
2383 OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
2384 if (!cause_return) {
2385 // update saved PC with correct value
2386 // need 2 steps because LR can be live in c2 method
2387 __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
2388 __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
2389 }
2390
2391 __ mov(R0, Rthread);
2392 int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
2393 assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2394 __ call(call_ptr);
2395 if (pc_offset == -1) {
2396 pc_offset = __ offset();
2397 }
2398 oop_maps->add_gc_map(pc_offset, map);
2399 __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2400
2401 // Check for pending exception
2402 __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
2403 __ cmp(Rtemp, 0);
2404
2405 #ifdef AARCH64
2406 RegisterSaver::restore_live_registers(masm, cause_return);
2407 Register ret_addr = cause_return ? LR : Rtemp;
2408 if (!cause_return) {
2409 __ raw_pop(FP, ret_addr);
2410 }
2411
2412 Label throw_exception;
2413 __ b(throw_exception, ne);
2414 __ br(ret_addr);
2415
2416 __ bind(throw_exception);
2417 __ mov(Rexception_pc, ret_addr);
2418 #else // AARCH64
2419 if (!cause_return) {
2420 RegisterSaver::restore_live_registers(masm, false);
2421 __ pop(PC, eq);
2422 __ pop(Rexception_pc);
2423 } else {
2424 RegisterSaver::restore_live_registers(masm);
2425 __ bx(LR, eq);
2426 __ mov(Rexception_pc, LR);
2427 }
2428 #endif // AARCH64
2429
2430 __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2431
2432 __ flush();
2433
2434 return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
2435 }
2436
2437 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2438 assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2439
2440 ResourceMark rm;
2441 CodeBuffer buffer(name, 1000, 512);
2442 int frame_size_words;
2443 OopMapSet *oop_maps;
2444 int frame_complete;
2445
2446 MacroAssembler* masm = new MacroAssembler(&buffer);
2447 Label pending_exception;
2448
|
45
46 #define __ masm->
47
48 class RegisterSaver {
49 public:
50
51 // Special registers:
52 // 32-bit ARM 64-bit ARM
53 // Rthread: R10 R28
54 // LR: R14 R30
55
56 // Rthread is callee saved in the C ABI and never changed by compiled code:
57 // no need to save it.
58
59 // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
60 // The one at LR_offset is a return address that is needed by stack walking.
61 // A c2 method uses LR as a standard register so it may be live when we
62 // branch to the runtime. The slot at R14/R30_offset is for the value of LR
63 // in case it's live in the method we are coming from.
64
65
66 enum RegisterLayout {
67 fpu_save_size = FloatRegisterImpl::number_of_registers,
68 #ifndef __SOFTFP__
69 D0_offset = 0,
70 #endif
71 R0_offset = fpu_save_size,
72 R1_offset,
73 R2_offset,
74 R3_offset,
75 R4_offset,
76 R5_offset,
77 R6_offset,
78 #if (FP_REG_NUM != 7)
79 // if not saved as FP
80 R7_offset,
81 #endif
82 R8_offset,
83 R9_offset,
84 #if (FP_REG_NUM != 11)
85 // if not saved as FP
86 R11_offset,
87 #endif
88 R12_offset,
89 R14_offset,
90 FP_offset,
91 LR_offset,
92 reg_save_size,
93
94 Rmethod_offset = R9_offset,
95 Rtemp_offset = R12_offset,
96 };
97
98 // all regs but Rthread (R10), FP (R7 or R11), SP and PC
99 // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
100 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
101
102
103 // When LR may be live in the nmethod from which we are comming
104 // then lr_saved is true, the return address is saved before the
105 // call to save_live_register by the caller and LR contains the
106 // live value.
107
108 static OopMap* save_live_registers(MacroAssembler* masm,
109 int* total_frame_words,
110 bool lr_saved = false);
111 static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
112
113 };
114
115
116
117
118 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
119 int* total_frame_words,
120 bool lr_saved) {
121 *total_frame_words = reg_save_size;
122
123 OopMapSet *oop_maps = new OopMapSet();
124 OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
125
126 if (lr_saved) {
127 __ push(RegisterSet(FP));
128 } else {
129 __ push(RegisterSet(FP) | RegisterSet(LR));
130 }
131 __ push(SAVED_BASE_REGS);
132 if (HaveVFP) {
133 if (VM_Version::has_vfp3_32()) {
134 __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
135 } else {
136 if (FloatRegisterImpl::number_of_registers > 32) {
137 assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
138 __ sub(SP, SP, 32 * wordSize);
139 }
140 }
141 __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
142 } else {
143 __ sub(SP, SP, fpu_save_size * wordSize);
144 }
145
149 if (j == FP_REG_NUM) {
150 // skip the FP register, managed below.
151 j++;
152 }
153 map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
154 j++;
155 }
156 assert(j == R10->encoding(), "must be");
157 #if (FP_REG_NUM != 11)
158 // add R11, if not managed as FP
159 map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
160 #endif
161 map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
162 map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
163 if (HaveVFP) {
164 for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
165 map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
166 map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
167 }
168 }
169
170 return map;
171 }
172
173 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
174 if (HaveVFP) {
175 __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
176 if (VM_Version::has_vfp3_32()) {
177 __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
178 } else {
179 if (FloatRegisterImpl::number_of_registers > 32) {
180 assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
181 __ add(SP, SP, 32 * wordSize);
182 }
183 }
184 } else {
185 __ add(SP, SP, fpu_save_size * wordSize);
186 }
187 __ pop(SAVED_BASE_REGS);
188 if (restore_lr) {
189 __ pop(RegisterSet(FP) | RegisterSet(LR));
190 } else {
191 __ pop(RegisterSet(FP));
192 }
193 }
194
195
196 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
197 #ifdef __ABI_HARD__
198 if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
199 __ sub(SP, SP, 8);
200 __ fstd(D0, Address(SP));
201 return;
202 }
203 #endif // __ABI_HARD__
204 __ raw_push(R0, R1);
205 }
206
207 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
208 #ifdef __ABI_HARD__
209 if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
210 __ fldd(D0, Address(SP));
211 __ add(SP, SP, 8);
212 return;
213 }
214 #endif // __ABI_HARD__
225 // but there is no way to guarantee that
226 if (fp_regs_in_arguments) {
227 // convert fp_regs_in_arguments to a number of double registers
228 int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
229 __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
230 }
231 #endif // __ ABI_HARD__
232 }
233
234 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
235 #ifdef __ABI_HARD__
236 if (fp_regs_in_arguments) {
237 int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
238 __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
239 }
240 #endif // __ABI_HARD__
241
242 __ pop(RegisterSet(R0, R3));
243 }
244
245
246
247 // Is vector's size (in bytes) bigger than a size saved by default?
248 // All vector registers are saved by default on ARM.
249 bool SharedRuntime::is_wide_vector(int size) {
250 return false;
251 }
252
253 size_t SharedRuntime::trampoline_size() {
254 return 16;
255 }
256
257 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
258 InlinedAddress dest(destination);
259 __ indirect_jump(dest, Rtemp);
260 __ bind_literal(dest);
261 }
262
263 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
264 VMRegPair *regs,
265 VMRegPair *regs2,
266 int total_args_passed) {
267 assert(regs2 == NULL, "not needed on arm");
268
269 int slot = 0;
270 int ireg = 0;
271 #ifdef __ABI_HARD__
272 int fp_slot = 0;
273 int single_fpr_slot = 0;
274 #endif // __ABI_HARD__
275 for (int i = 0; i < total_args_passed; i++) {
276 switch (sig_bt[i]) {
277 case T_SHORT:
278 case T_CHAR:
279 case T_BYTE:
280 case T_BOOLEAN:
281 case T_INT:
282 case T_ARRAY:
283 case T_OBJECT:
284 case T_ADDRESS:
285 case T_METADATA:
286 #ifndef __ABI_HARD__
287 case T_FLOAT:
344 case T_DOUBLE:
345 assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
346 if (fp_slot <= 14) {
347 FloatRegister r1 = as_FloatRegister(fp_slot);
348 FloatRegister r2 = as_FloatRegister(fp_slot+1);
349 regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
350 fp_slot += 2;
351 } else {
352 if(slot & 1) slot++;
353 regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
354 slot += 2;
355 single_fpr_slot = 16;
356 }
357 break;
358 #endif // __ABI_HARD__
359 default:
360 ShouldNotReachHere();
361 }
362 }
363 return slot;
364 }
365
366 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
367 VMRegPair *regs,
368 int total_args_passed,
369 int is_outgoing) {
370 #ifdef __SOFTFP__
371 // soft float is the same as the C calling convention.
372 return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
373 #endif // __SOFTFP__
374 (void) is_outgoing;
375 int slot = 0;
376 int ireg = 0;
377 int freg = 0;
378 int single_fpr = 0;
379
380 for (int i = 0; i < total_args_passed; i++) {
381 switch (sig_bt[i]) {
382 case T_SHORT:
383 case T_CHAR:
384 case T_BYTE:
385 case T_BOOLEAN:
386 case T_INT:
387 case T_ARRAY:
388 case T_OBJECT:
389 case T_ADDRESS:
432 Register r2 = as_Register(ireg + 1);
433 regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
434 ireg += 2;
435 } else {
436 if (slot & 1) slot++;
437 regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
438 slot += 2;
439 ireg = 4;
440 }
441 break;
442 case T_VOID:
443 regs[i].set_bad();
444 break;
445 default:
446 ShouldNotReachHere();
447 }
448 }
449
450 if (slot & 1) slot++;
451 return slot;
452 }
453
454 static void patch_callers_callsite(MacroAssembler *masm) {
455 Label skip;
456
457 __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
458 __ cbz(Rtemp, skip);
459
460 // Pushing an even number of registers for stack alignment.
461 // Selecting R9, which had to be saved anyway for some platforms.
462 __ push(RegisterSet(R0, R3) | R9 | LR);
463
464 __ mov(R0, Rmethod);
465 __ mov(R1, LR);
466 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
467
468 __ pop(RegisterSet(R0, R3) | R9 | LR);
469
470 __ bind(skip);
471 }
472
473 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
474 int total_args_passed, int comp_args_on_stack,
475 const BasicType *sig_bt, const VMRegPair *regs) {
476 // TODO: ARM - May be can use ldm to load arguments
477 const Register tmp = Rtemp; // avoid erasing R5_mh
478
479 // Next assert may not be needed but safer. Extra analysis required
480 // if this there is not enough free registers and we need to use R5 here.
481 assert_different_registers(tmp, R5_mh);
482
483 // 6243940 We might end up in handle_wrong_method if
484 // the callee is deoptimized as we race thru here. If that
485 // happens we don't want to take a safepoint because the
486 // caller frame will look interpreted and arguments are now
487 // "compiled" so it is much better to make this transition
488 // invisible to the stack walking code. Unfortunately if
489 // we try and find the callee by normal means a safepoint
490 // is possible. So we stash the desired callee in the thread
491 // and the vm will find there should this case occur.
492 Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
493 __ str(Rmethod, callee_target_addr);
494
495
496 assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
497
498 const Register initial_sp = Rmethod; // temporarily scratched
499
500 // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
501 assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
502
503 __ mov(initial_sp, SP);
504
505 if (comp_args_on_stack) {
506 __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
507 }
508 __ bic(SP, SP, StackAlignmentInBytes - 1);
509
510 for (int i = 0; i < total_args_passed; i++) {
511 if (sig_bt[i] == T_VOID) {
512 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
513 continue;
514 }
536 __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
537 }
538 } else if (r_1->is_FloatRegister()) {
539 #ifdef __SOFTFP__
540 ShouldNotReachHere();
541 #endif // __SOFTFP__
542 if (!r_2->is_valid()) {
543 __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
544 } else {
545 __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
546 }
547 } else {
548 assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
549 }
550 }
551
552 // restore Rmethod (scratched for initial_sp)
553 __ ldr(Rmethod, callee_target_addr);
554 __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
555
556 }
557
558 static void gen_c2i_adapter(MacroAssembler *masm,
559 int total_args_passed, int comp_args_on_stack,
560 const BasicType *sig_bt, const VMRegPair *regs,
561 Label& skip_fixup) {
562 // TODO: ARM - May be can use stm to deoptimize arguments
563 const Register tmp = Rtemp;
564
565 patch_callers_callsite(masm);
566 __ bind(skip_fixup);
567
568 __ mov(Rsender_sp, SP); // not yet saved
569
570
571 int extraspace = total_args_passed * Interpreter::stackElementSize;
572 if (extraspace) {
573 __ sub_slow(SP, SP, extraspace);
574 }
575
576 for (int i = 0; i < total_args_passed; i++) {
577 if (sig_bt[i] == T_VOID) {
578 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
579 continue;
580 }
581 int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
582
583 VMReg r_1 = regs[i].first();
584 VMReg r_2 = regs[i].second();
585 if (r_1->is_stack()) {
586 int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
587 if (!r_2->is_valid()) {
588 __ ldr(tmp, Address(SP, arg_offset));
589 __ str(tmp, Address(SP, stack_offset));
599 } else {
600 __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
601 __ str(r_2->as_Register(), Address(SP, stack_offset));
602 }
603 } else if (r_1->is_FloatRegister()) {
604 #ifdef __SOFTFP__
605 ShouldNotReachHere();
606 #endif // __SOFTFP__
607 if (!r_2->is_valid()) {
608 __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
609 } else {
610 __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
611 }
612 } else {
613 assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
614 }
615 }
616
617 __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
618
619 }
620
621 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
622 int total_args_passed,
623 int comp_args_on_stack,
624 const BasicType *sig_bt,
625 const VMRegPair *regs,
626 AdapterFingerPrint* fingerprint) {
627 address i2c_entry = __ pc();
628 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
629
630 address c2i_unverified_entry = __ pc();
631 Label skip_fixup;
632 const Register receiver = R0;
633 const Register holder_klass = Rtemp; // XXX should be OK for C2 but not 100% sure
634 const Register receiver_klass = R4;
635
636 __ load_klass(receiver_klass, receiver);
637 __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
638 __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
639 __ cmp(receiver_klass, holder_klass);
640
641 __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
642 __ cmp(Rtemp, 0, eq);
643 __ b(skip_fixup, eq);
644 __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
645
646 address c2i_entry = __ pc();
647 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
648
649 __ flush();
650 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
651 }
652
653
654 static int reg2offset_in(VMReg r) {
655 // Account for saved FP and LR
656 return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
657 }
658
659 static int reg2offset_out(VMReg r) {
660 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
661 }
662
663
664 static void verify_oop_args(MacroAssembler* masm,
825 int lock_slot_fp_offset = stack_size - 2 * wordSize -
826 lock_slot_offset * VMRegImpl::stack_slot_size;
827
828 // Unverified entry point
829 address start = __ pc();
830
831 // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
832 const Register receiver = R0; // see receiverOpr()
833 __ load_klass(Rtemp, receiver);
834 __ cmp(Rtemp, Ricklass);
835 Label verified;
836
837 __ b(verified, eq); // jump over alignment no-ops too
838 __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
839 __ align(CodeEntryAlignment);
840
841 // Verified entry point
842 __ bind(verified);
843 int vep_offset = __ pc() - start;
844
845
846 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
847 // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
848 // instead of doing a full VM transition once it's been computed.
849 Label slow_case;
850 const Register obj_reg = R0;
851
852 // Unlike for Object.hashCode, System.identityHashCode is static method and
853 // gets object as argument instead of the receiver.
854 if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
855 assert(method->is_static(), "method should be static");
856 // return 0 for null reference input, return val = R0 = obj_reg = 0
857 __ cmp(obj_reg, 0);
858 __ bx(LR, eq);
859 }
860
861 __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
862
863 assert(markOopDesc::unlocked_value == 1, "adjust this code");
864 __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
865
866 if (UseBiasedLocking) {
867 assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
868 __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
869 }
870
871 __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
872 __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
873 __ bx(LR, ne);
874
875 __ bind(slow_case);
876 }
877
878 // Bang stack pages
879 __ arm_stack_overflow_check(stack_size, Rtemp);
880
881 // Setup frame linkage
882 __ raw_push(FP, LR);
883 __ mov(FP, SP);
884 __ sub_slow(SP, SP, stack_size - 2*wordSize);
885
886 int frame_complete = __ pc() - start;
887
888 OopMapSet* oop_maps = new OopMapSet();
889 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
890 const int extra_args = is_static ? 2 : 1;
891 int receiver_offset = -1;
892 int fp_regs_in_arguments = 0;
893
894 for (i = total_in_args; --i >= 0; ) {
895 switch (in_sig_bt[i]) {
896 case T_ARRAY:
897 case T_OBJECT: {
898 VMReg src = in_regs[i].first();
899 VMReg dst = out_regs[i + extra_args].first();
900 if (src->is_stack()) {
901 assert(dst->is_stack(), "must be");
902 assert(i != 0, "Incoming receiver is always in a register");
903 __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
904 __ cmp(Rtemp, 0);
905 __ add(Rtemp, FP, reg2offset_in(src), ne);
906 __ str(Rtemp, Address(SP, reg2offset_out(dst)));
907 int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
908 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
909 } else {
910 int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
911 __ str(src->as_Register(), Address(SP, offset));
912 map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
913 if ((i == 0) && (!is_static)) {
914 receiver_offset = offset;
915 }
916 oop_handle_offset += VMRegImpl::slots_per_word;
917
918 if (dst->is_stack()) {
919 __ movs(Rtemp, src->as_Register());
920 __ add(Rtemp, SP, offset, ne);
921 __ str(Rtemp, Address(SP, reg2offset_out(dst)));
922 } else {
923 __ movs(dst->as_Register(), src->as_Register());
924 __ add(dst->as_Register(), SP, offset, ne);
925 }
926 }
927 }
928
929 case T_VOID:
930 break;
931
932
933 #ifdef __SOFTFP__
934 case T_DOUBLE:
935 #endif
936 case T_LONG: {
937 VMReg src_1 = in_regs[i].first();
938 VMReg src_2 = in_regs[i].second();
939 VMReg dst_1 = out_regs[i + extra_args].first();
940 VMReg dst_2 = out_regs[i + extra_args].second();
941 #if (ALIGN_WIDE_ARGUMENTS == 0)
942 // C convention can mix a register and a stack slot for a
943 // 64-bits native argument.
944
945 // Note: following code should work independently of whether
946 // the Java calling convention follows C convention or whether
947 // it aligns 64-bit values.
948 if (dst_2->is_Register()) {
949 if (src_1->as_Register() != dst_1->as_Register()) {
950 assert(src_1->as_Register() != dst_2->as_Register() &&
951 src_2->as_Register() != dst_2->as_Register(), "must be");
1084 if (src_1->is_stack()) {
1085 if (dst_1->is_stack()) {
1086 assert(dst_2->is_stack(), "must be");
1087 __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1088 __ ldr(LR, Address(FP, reg2offset_in(src_2)));
1089 __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1090 __ str(LR, Address(SP, reg2offset_out(dst_2)));
1091 } else {
1092 // C2 Java calling convention does not populate S14 and S15, therefore
1093 // those need to be loaded from stack here
1094 __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1095 fp_regs_in_arguments += 2;
1096 }
1097 } else {
1098 assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1099 fp_regs_in_arguments += 2;
1100 }
1101 break;
1102 }
1103 #endif // __ABI_HARD__
1104
1105 default: {
1106 assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1107 VMReg src = in_regs[i].first();
1108 VMReg dst = out_regs[i + extra_args].first();
1109 if (src->is_stack()) {
1110 assert(dst->is_stack(), "must be");
1111 __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1112 __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1113 } else if (dst->is_stack()) {
1114 __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1115 } else {
1116 assert(src->is_Register() && dst->is_Register(), "must be");
1117 __ mov(dst->as_Register(), src->as_Register());
1118 }
1119 }
1120 }
1121 }
1122
1123 // Get Klass mirror
1124 int klass_offset = -1;
1125 if (is_static) {
1126 klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1127 __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1128 __ add(c_rarg1, SP, klass_offset);
1129 __ str(Rtemp, Address(SP, klass_offset));
1130 map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1131 }
1132
1133 // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1134 int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1135 assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1136 oop_maps->add_gc_map(pc_offset, map);
1137
1138 // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1139 __ membar(MacroAssembler::StoreStore, Rtemp);
1140
1141 // RedefineClasses() tracing support for obsolete method entry
1142 if (log_is_enabled(Trace, redefine, class, obsolete)) {
1143 __ save_caller_save_registers();
1144 __ mov(R0, Rthread);
1145 __ mov_metadata(R1, method());
1146 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1147 __ restore_caller_save_registers();
1148 }
1149
1150 const Register sync_handle = R5;
1151 const Register sync_obj = R6;
1152 const Register disp_hdr = altFP_7_11;
1153 const Register tmp = R8;
1154
1155 Label slow_lock, slow_lock_biased, lock_done, fast_lock;
1156 if (method->is_synchronized()) {
1157 // The first argument is a handle to sync object (a class or an instance)
1158 __ ldr(sync_obj, Address(R1));
1159 // Remember the handle for the unlocking code
1160 __ mov(sync_handle, R1);
1161
1162 if(UseBiasedLocking) {
1163 __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1164 }
1165
1166 const Register mark = tmp;
1167 // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1168 // That would be acceptable as either CAS or slow case path is taken in that case
1169
1170 __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1171 __ sub(disp_hdr, FP, lock_slot_fp_offset);
1172 __ tst(mark, markOopDesc::unlocked_value);
1173 __ b(fast_lock, ne);
1174
1175 // Check for recursive lock
1176 // See comments in InterpreterMacroAssembler::lock_object for
1177 // explanations on the fast recursive locking check.
1178 // Check independently the low bits and the distance to SP
1179 // -1- test low 2 bits
1180 __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1181 // -2- test (hdr - SP) if the low two bits are 0
1182 __ sub(Rtemp, mark, SP, eq);
1183 __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1184 // If still 'eq' then recursive locking OK: set displaced header to 0
1185 __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1186 __ b(lock_done, eq);
1187 __ b(slow_lock);
1188
1189 __ bind(fast_lock);
1190 __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1191
1192 __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1193
1194 __ bind(lock_done);
1195 }
1196
1197 // Get JNIEnv*
1198 __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1199
1200 // Perform thread state transition
1201 __ mov(Rtemp, _thread_in_native);
1202 __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1203
1204 // Finally, call the native method
1205 __ call(method->native_function());
1206
1207 // Set FPSCR/FPCR to a known state
1208 if (AlwaysRestoreFPU) {
1209 __ restore_default_fp_mode();
1210 }
1211
1212 // Do a safepoint check while thread is in transition state
1213 InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1214 Label call_safepoint_runtime, return_to_java;
1215 __ mov(Rtemp, _thread_in_native_trans);
1216 __ ldr_literal(R2, safepoint_state);
1217 __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1218
1219 // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1220 __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1221
1222 __ ldr_s32(R2, Address(R2));
1242
1243 if(UseBiasedLocking) {
1244 __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1245 // disp_hdr may not have been saved on entry with biased locking
1246 __ sub(disp_hdr, FP, lock_slot_fp_offset);
1247 }
1248
1249 // See C1_MacroAssembler::unlock_object() for more comments
1250 __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1251 __ cbz(R2, unlock_done);
1252
1253 __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1254
1255 __ bind(unlock_done);
1256 }
1257
1258 // Set last java frame and handle block to zero
1259 __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1260 __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1261
1262 __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1263 if (CheckJNICalls) {
1264 __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1265 }
1266
1267 // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1268 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1269 __ resolve_jobject(R0, // value
1270 Rtemp, // tmp1
1271 R1_tmp); // tmp2
1272 }
1273
1274 // Any exception pending?
1275 __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1276 __ mov(SP, FP);
1277
1278 __ cmp(Rtemp, 0);
1279 // Pop the frame and return if no exception pending
1280 __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1281 // Pop the frame and forward the exception. Rexception_pc contains return address.
1282 __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1283 __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1284 __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1285
1286 // Safepoint operation and/or pending suspend request is in progress.
1287 // Save the return values and call the runtime function by hand.
1288 __ bind(call_safepoint_runtime);
1289 push_result_registers(masm, ret_type);
1290 __ mov(R0, Rthread);
1291 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1292 pop_result_registers(masm, ret_type);
1293 __ b(return_to_java);
1294
1295 __ bind_literal(safepoint_state);
1296
1297 // Reguard stack pages. Save native results around a call to C runtime.
1298 __ bind(reguard);
1299 push_result_registers(masm, ret_type);
1300 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1301 pop_result_registers(masm, ret_type);
1302 __ b(reguard_done);
1303
1343
1344 __ b(unlock_done);
1345 }
1346
1347 __ flush();
1348 return nmethod::new_native_nmethod(method,
1349 compile_id,
1350 masm->code(),
1351 vep_offset,
1352 frame_complete,
1353 stack_slots / VMRegImpl::slots_per_word,
1354 in_ByteSize(is_static ? klass_offset : receiver_offset),
1355 in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1356 oop_maps);
1357 }
1358
1359 // this function returns the adjust size (in number of words) to a c2i adapter
1360 // activation for use during deoptimization
1361 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1362 int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1363 return extra_locals_size;
1364 }
1365
1366
1367 uint SharedRuntime::out_preserve_stack_slots() {
1368 return 0;
1369 }
1370
1371
1372 //------------------------------generate_deopt_blob----------------------------
1373 void SharedRuntime::generate_deopt_blob() {
1374 ResourceMark rm;
1375 CodeBuffer buffer("deopt_blob", 1024, 1024);
1376 int frame_size_in_words;
1377 OopMapSet* oop_maps;
1378 int reexecute_offset;
1379 int exception_in_tls_offset;
1380 int exception_offset;
1381
1382 MacroAssembler* masm = new MacroAssembler(&buffer);
1383 Label cont;
1384 const Register Rkind = R9; // caller-saved
1385 const Register Rublock = R6;
1386 const Register Rsender = altFP_7_11;
1387 assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1388
1389 address start = __ pc();
1390
1391 oop_maps = new OopMapSet();
1392 // LR saved by caller (can be live in c2 method)
1393
1394 // A deopt is a case where LR may be live in the c2 nmethod. So it's
1395 // not possible to call the deopt blob from the nmethod and pass the
1396 // address of the deopt handler of the nmethod in LR. What happens
1397 // now is that the caller of the deopt blob pushes the current
1398 // address so the deopt blob doesn't have to do it. This way LR can
1399 // be preserved, contains the live value from the nmethod and is
1400 // saved at R14/R30_offset here.
1401 OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1402 __ mov(Rkind, Deoptimization::Unpack_deopt);
1403 __ b(cont);
1404
1405 exception_offset = __ pc() - start;
1406
1456 // assert that exception_pc is zero in tls
1457 { Label L;
1458 __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1459 __ cbz(Rexception_pc, L);
1460 __ stop("exception pc should be null");
1461 __ bind(L);
1462 }
1463 #endif
1464 __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1465 __ verify_oop(Rexception_obj);
1466 {
1467 const Register Rzero = __ zero_register(Rtemp);
1468 __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1469 }
1470
1471 __ bind(noException);
1472
1473 // This frame is going away. Fetch return value, so we can move it to
1474 // a new frame.
1475 __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1476 __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1477 #ifndef __SOFTFP__
1478 __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1479 #endif
1480 // pop frame
1481 __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1482
1483 // Set initial stack state before pushing interpreter frames
1484 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1485 __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1486 __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1487
1488 __ add(SP, SP, Rtemp);
1489
1490 #ifdef ASSERT
1491 // Compilers generate code that bang the stack by as much as the
1492 // interpreter would need. So this stack banging should never
1493 // trigger a fault. Verify that it does not on non product builds.
1494 // See if it is enough stack to push deoptimized frames
1495 if (UseStackBanging) {
1496 // The compiled method that we are deoptimizing was popped from the stack.
1497 // If the stack bang results in a stack overflow, we don't return to the
1498 // method that is being deoptimized. The stack overflow exception is
1499 // propagated to the caller of the deoptimized method. Need to get the pc
1500 // from the caller in LR and restore FP.
1501 __ ldr(LR, Address(R2, 0));
1502 __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1503 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1504 __ arm_stack_overflow_check(R8, Rtemp);
1505 }
1506 #endif
1507 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1508
1509 // Pick up the initial fp we should save
1510 // XXX Note: was ldr(FP, Address(FP));
1511
1512 // The compiler no longer uses FP as a frame pointer for the
1513 // compiled code. It can be used by the allocator in C2 or to
1514 // memorize the original SP for JSR292 call sites.
1515
1516 // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
1517 // Deoptimization::fetch_unroll_info computes the right FP value and
1518 // stores it in Rublock.initial_info. This has been activated for ARM.
1519 __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1520
1521 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1522 __ mov(Rsender, SP);
1523 __ sub(SP, SP, Rtemp);
1524
1525 // Push interpreter frames in a loop
1526 Label loop;
1527 __ bind(loop);
1528 __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc
1529 __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size
1530
1531 __ raw_push(FP, LR); // create new frame
1532 __ mov(FP, SP);
1533 __ sub(Rtemp, Rtemp, 2*wordSize);
1534
1535 __ sub(SP, SP, Rtemp);
1536
1537 __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1538 __ mov(LR, 0);
1539 __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1540
1541 __ subs(R8, R8, 1); // decrement counter
1542 __ mov(Rsender, SP);
1543 __ b(loop, ne);
1544
1545 // Re-push self-frame
1546 __ ldr(LR, Address(R2));
1547 __ raw_push(FP, LR);
1548 __ mov(FP, SP);
1549 __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
1550
1551 // Restore frame locals after moving the frame
1552 __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1553 __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1554
1555 #ifndef __SOFTFP__
1556 __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1557 #endif // !__SOFTFP__
1558
1559 #ifdef ASSERT
1560 // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
1561 { Label L;
1562 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1563 __ cmp_32(Rkind, Rtemp);
1564 __ b(L, eq);
1565 __ stop("Rkind was overwritten");
1566 __ bind(L);
1567 }
1568 #endif
1569
1570 // Call unpack_frames with proper arguments
1571 __ mov(R0, Rthread);
1572 __ mov(R1, Rkind);
1573
1574 pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
1575 assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1576 __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1577 if (pc_offset == -1) {
1578 pc_offset = __ offset();
1579 }
1580 oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
1581 __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1582
1583 // Collect return values, pop self-frame and jump to interpreter
1584 __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1585 __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1586 // Interpreter floats controlled by __SOFTFP__, but compiler
1587 // float return value registers controlled by __ABI_HARD__
1588 // This matters for vfp-sflt builds.
1589 #ifndef __SOFTFP__
1590 // Interpreter hard float
1591 #ifdef __ABI_HARD__
1592 // Compiler float return value in FP registers
1593 __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1594 #else
1595 // Compiler float return value in integer registers,
1596 // copy to D0 for interpreter (S0 <-- R0)
1597 __ fmdrr(D0_tos, R0, R1);
1598 #endif
1599 #endif // !__SOFTFP__
1600 __ mov(SP, FP);
1601
1602 __ pop(RegisterSet(FP) | RegisterSet(PC));
1603
1604 __ flush();
1605
1606 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
1607 reexecute_offset, frame_size_in_words);
1608 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
1609 }
1610
1611 #ifdef COMPILER2
1612
1613 //------------------------------generate_uncommon_trap_blob--------------------
1614 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
1615 // instead.
1616 void SharedRuntime::generate_uncommon_trap_blob() {
1617 // allocate space for the code
1618 ResourceMark rm;
1619
1620 // setup code generation tools
1621 int pad = VerifyThread ? 512 : 0;
1622 #ifdef _LP64
1623 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
1624 #else
1625 // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
1626 // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
1627 CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
1628 #endif
1629 // bypassed when code generation useless
1630 MacroAssembler* masm = new MacroAssembler(&buffer);
1631 const Register Rublock = R6;
1632 const Register Rsender = altFP_7_11;
1633 assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
1634
1635 //
1636 // This is the entry point for all traps the compiler takes when it thinks
1637 // it cannot handle further execution of compilation code. The frame is
1638 // deoptimized in these cases and converted into interpreter frames for
1639 // execution
1640 // The steps taken by this frame are as follows:
1641 // - push a fake "unpack_frame"
1642 // - call the C routine Deoptimization::uncommon_trap (this function
1643 // packs the current compiled frame into vframe arrays and returns
1644 // information about the number and size of interpreter frames which
1645 // are equivalent to the frame which is being deoptimized)
1646 // - deallocate the "unpack_frame"
1647 // - deallocate the deoptimization frame
1648 // - in a loop using the information returned in the previous step
1649 // push interpreter frames;
1650 // - create a dummy "unpack_frame"
1651 // - call the C routine: Deoptimization::unpack_frames (this function
1652 // lays out values on the interpreter frame which was just created)
1668 __ mov(Rublock, R0);
1669 __ reset_last_Java_frame(Rtemp);
1670 __ raw_pop(FP, LR);
1671
1672 #ifdef ASSERT
1673 { Label L;
1674 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1675 __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
1676 __ b(L, eq);
1677 __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
1678 __ bind(L);
1679 }
1680 #endif
1681
1682
1683 // Set initial stack state before pushing interpreter frames
1684 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1685 __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1686 __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1687
1688 __ add(SP, SP, Rtemp);
1689
1690 // See if it is enough stack to push deoptimized frames
1691 #ifdef ASSERT
1692 // Compilers generate code that bang the stack by as much as the
1693 // interpreter would need. So this stack banging should never
1694 // trigger a fault. Verify that it does not on non product builds.
1695 if (UseStackBanging) {
1696 // The compiled method that we are deoptimizing was popped from the stack.
1697 // If the stack bang results in a stack overflow, we don't return to the
1698 // method that is being deoptimized. The stack overflow exception is
1699 // propagated to the caller of the deoptimized method. Need to get the pc
1700 // from the caller in LR and restore FP.
1701 __ ldr(LR, Address(R2, 0));
1702 __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1703 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1704 __ arm_stack_overflow_check(R8, Rtemp);
1705 }
1706 #endif
1707 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1708 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1709 __ mov(Rsender, SP);
1710 __ sub(SP, SP, Rtemp);
1711 // __ ldr(FP, Address(FP));
1712 __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1713
1714 // Push interpreter frames in a loop
1715 Label loop;
1716 __ bind(loop);
1717 __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc
1718 __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size
1719
1720 __ raw_push(FP, LR); // create new frame
1721 __ mov(FP, SP);
1722 __ sub(Rtemp, Rtemp, 2*wordSize);
1723
1724 __ sub(SP, SP, Rtemp);
1725
1726 __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1727 __ mov(LR, 0);
1728 __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1729 __ subs(R8, R8, 1); // decrement counter
1730 __ mov(Rsender, SP);
1731 __ b(loop, ne);
1732
1733 // Re-push self-frame
1734 __ ldr(LR, Address(R2));
1735 __ raw_push(FP, LR);
1736 __ mov(FP, SP);
1737
1738 // Call unpack_frames with proper arguments
1739 __ mov(R0, Rthread);
1740 __ mov(R1, Deoptimization::Unpack_uncommon_trap);
1741 __ set_last_Java_frame(SP, FP, false, Rtemp);
1742 __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1743 // oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
1744 __ reset_last_Java_frame(Rtemp);
1745
1746 __ mov(SP, FP);
1747 __ pop(RegisterSet(FP) | RegisterSet(PC));
1748
1749 masm->flush();
1750 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
1751 }
1752
1753 #endif // COMPILER2
1754
1755 //------------------------------generate_handler_blob------
1756 //
1757 // Generate a special Compile2Runtime blob that saves all registers,
1758 // setup oopmap, and calls safepoint code to stop the compiled code for
1759 // a safepoint.
1760 //
1761 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
1762 assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1763
1764 ResourceMark rm;
1765 CodeBuffer buffer("handler_blob", 256, 256);
1766 int frame_size_words;
1767 OopMapSet* oop_maps;
1768
1769 bool cause_return = (poll_type == POLL_AT_RETURN);
1770
1771 MacroAssembler* masm = new MacroAssembler(&buffer);
1772 address start = __ pc();
1773 oop_maps = new OopMapSet();
1774
1775 if (!cause_return) {
1776 __ sub(SP, SP, 4); // make room for LR which may still be live
1777 // here if we are coming from a c2 method
1778 }
1779
1780 OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
1781 if (!cause_return) {
1782 // update saved PC with correct value
1783 // need 2 steps because LR can be live in c2 method
1784 __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
1785 __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
1786 }
1787
1788 __ mov(R0, Rthread);
1789 int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1790 assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1791 __ call(call_ptr);
1792 if (pc_offset == -1) {
1793 pc_offset = __ offset();
1794 }
1795 oop_maps->add_gc_map(pc_offset, map);
1796 __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1797
1798 // Check for pending exception
1799 __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1800 __ cmp(Rtemp, 0);
1801
1802 if (!cause_return) {
1803 RegisterSaver::restore_live_registers(masm, false);
1804 __ pop(PC, eq);
1805 __ pop(Rexception_pc);
1806 } else {
1807 RegisterSaver::restore_live_registers(masm);
1808 __ bx(LR, eq);
1809 __ mov(Rexception_pc, LR);
1810 }
1811
1812 __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1813
1814 __ flush();
1815
1816 return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
1817 }
1818
1819 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
1820 assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1821
1822 ResourceMark rm;
1823 CodeBuffer buffer(name, 1000, 512);
1824 int frame_size_words;
1825 OopMapSet *oop_maps;
1826 int frame_complete;
1827
1828 MacroAssembler* masm = new MacroAssembler(&buffer);
1829 Label pending_exception;
1830
|