2284 // printf("start = %lx\n", start);
2285 int byteCount = aarch64_stub_prolog_size();
2286 // printf("byteCount = %x\n", byteCount);
2287 int instructionCount = (byteCount + 3)/ 4;
2288 // printf("instructionCount = %x\n", instructionCount);
2289 for (int i = 0; i < instructionCount; i++) {
2290 nop();
2291 }
2292
2293 memcpy(start, (void*)aarch64_stub_prolog, byteCount);
2294
2295 // write the address of the setup routine and the call format at the
2296 // end of into the copied code
2297 u_int64_t *patch_end = (u_int64_t *)(start + byteCount);
2298 if (prolog_ptr)
2299 patch_end[-2] = (u_int64_t)prolog_ptr;
2300 patch_end[-1] = calltype;
2301 }
2302 #endif
2303
2304 void MacroAssembler::push_CPU_state(bool save_vectors) {
2305 push(0x3fffffff, sp); // integer registers except lr & sp
2306
2307 if (!save_vectors) {
2308 for (int i = 30; i >= 0; i -= 2)
2309 stpd(as_FloatRegister(i), as_FloatRegister(i+1),
2310 Address(pre(sp, -2 * wordSize)));
2311 } else {
2312 for (int i = 30; i >= 0; i -= 2)
2313 stpq(as_FloatRegister(i), as_FloatRegister(i+1),
2314 Address(pre(sp, -4 * wordSize)));
2315 }
2316 }
2317
2318 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
2319 if (!restore_vectors) {
2320 for (int i = 0; i < 32; i += 2)
2321 ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
2322 Address(post(sp, 2 * wordSize)));
2323 } else {
3082 void MacroAssembler::store_check(Register obj, Address dst) {
3083 store_check(obj);
3084 }
3085
3086 void MacroAssembler::store_check(Register obj) {
3087 // Does a store check for the oop in register obj. The content of
3088 // register obj is destroyed afterwards.
3089
3090 BarrierSet* bs = Universe::heap()->barrier_set();
3091 assert(bs->kind() == BarrierSet::CardTableForRS ||
3092 bs->kind() == BarrierSet::CardTableExtension,
3093 "Wrong barrier set kind");
3094
3095 CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
3096 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
3097
3098 lsr(obj, obj, CardTableModRefBS::card_shift);
3099
3100 assert(CardTableModRefBS::dirty_card_val() == 0, "must be");
3101
3102 {
3103 ExternalAddress cardtable((address) ct->byte_map_base);
3104 unsigned long offset;
3105 adrp(rscratch1, cardtable, offset);
3106 assert(offset == 0, "byte_map_base is misaligned");
3107 }
3108
3109 if (UseCondCardMark) {
3110 Label L_already_dirty;
3111 membar(StoreLoad);
3112 ldrb(rscratch2, Address(obj, rscratch1));
3113 cbz(rscratch2, L_already_dirty);
3114 strb(zr, Address(obj, rscratch1));
3115 bind(L_already_dirty);
3116 } else {
3117 if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
3118 membar(StoreStore);
3119 }
3120 strb(zr, Address(obj, rscratch1));
3121 }
3122 }
3123
3124 void MacroAssembler::load_klass(Register dst, Register src) {
3125 if (UseCompressedClassPointers) {
3126 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3127 decode_klass_not_null(dst);
3579 Label runtime;
3580
3581 // Does store cross heap regions?
3582
3583 eor(tmp, store_addr, new_val);
3584 lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
3585 cbz(tmp, done);
3586
3587 // crosses regions, storing NULL?
3588
3589 cbz(new_val, done);
3590
3591 // storing region crossing non-NULL, is card already dirty?
3592
3593 ExternalAddress cardtable((address) ct->byte_map_base);
3594 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
3595 const Register card_addr = tmp;
3596
3597 lsr(card_addr, store_addr, CardTableModRefBS::card_shift);
3598
3599 unsigned long offset;
3600 adrp(tmp2, cardtable, offset);
3601
3602 // get the address of the card
3603 add(card_addr, card_addr, tmp2);
3604 ldrb(tmp2, Address(card_addr, offset));
3605 cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
3606 br(Assembler::EQ, done);
3607
3608 assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
3609
3610 membar(Assembler::StoreLoad);
3611
3612 ldrb(tmp2, Address(card_addr, offset));
3613 cbzw(tmp2, done);
3614
3615 // storing a region crossing, non-NULL oop, card is clean.
3616 // dirty card and log.
3617
3618 strb(zr, Address(card_addr, offset));
3619
3620 ldr(rscratch1, queue_index);
3621 cbz(rscratch1, runtime);
3622 sub(rscratch1, rscratch1, wordSize);
3623 str(rscratch1, queue_index);
3624
3625 ldr(tmp2, buffer);
3626 str(card_addr, Address(tmp2, rscratch1));
3627 b(done);
3628
3629 bind(runtime);
3630 // save the live input values
3631 push(store_addr->bit(true) | new_val->bit(true), sp);
3632 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
3633 pop(store_addr->bit(true) | new_val->bit(true), sp);
3634
3635 bind(done);
3636 }
3637
3638 #endif // INCLUDE_ALL_GCS
3954 code_section()->relocate(inst_mark(), rtype);
3955 ldrw(zr, Address(r, off));
3956 return inst_mark();
3957 }
3958
3959 address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
3960 InstructionMark im(this);
3961 code_section()->relocate(inst_mark(), rtype);
3962 ldrw(zr, Address(r, 0));
3963 return inst_mark();
3964 }
3965
3966 void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
3967 relocInfo::relocType rtype = dest.rspec().reloc()->type();
3968 unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12;
3969 unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12;
3970 unsigned long dest_page = (unsigned long)dest.target() >> 12;
3971 long offset_low = dest_page - low_page;
3972 long offset_high = dest_page - high_page;
3973
3974 InstructionMark im(this);
3975 code_section()->relocate(inst_mark(), dest.rspec());
3976 // 8143067: Ensure that the adrp can reach the dest from anywhere within
3977 // the code cache so that if it is relocated we know it will still reach
3978 if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
3979 _adrp(reg1, dest.target());
3980 } else {
3981 unsigned long pc_page = (unsigned long)pc() >> 12;
3982 long offset = dest_page - pc_page;
3983 offset = (offset & ((1<<20)-1)) << 12;
3984 _adrp(reg1, pc()+offset);
3985 movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32);
3986 }
3987 byte_offset = (unsigned long)dest.target() & 0xfff;
3988 }
3989
3990 void MacroAssembler::build_frame(int framesize) {
3991 assert(framesize > 0, "framesize must be > 0");
3992 if (framesize < ((1 << 9) + 2 * wordSize)) {
3993 sub(sp, sp, framesize);
3994 stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
3995 if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
3996 } else {
3997 stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
3998 if (PreserveFramePointer) mov(rfp, sp);
3999 if (framesize < ((1 << 12) + 2 * wordSize))
4000 sub(sp, sp, framesize - 2 * wordSize);
4001 else {
4002 mov(rscratch1, framesize - 2 * wordSize);
4003 sub(sp, sp, rscratch1);
4004 }
4005 }
4006 }
4007
|
2284 // printf("start = %lx\n", start);
2285 int byteCount = aarch64_stub_prolog_size();
2286 // printf("byteCount = %x\n", byteCount);
2287 int instructionCount = (byteCount + 3)/ 4;
2288 // printf("instructionCount = %x\n", instructionCount);
2289 for (int i = 0; i < instructionCount; i++) {
2290 nop();
2291 }
2292
2293 memcpy(start, (void*)aarch64_stub_prolog, byteCount);
2294
2295 // write the address of the setup routine and the call format at the
2296 // end of into the copied code
2297 u_int64_t *patch_end = (u_int64_t *)(start + byteCount);
2298 if (prolog_ptr)
2299 patch_end[-2] = (u_int64_t)prolog_ptr;
2300 patch_end[-1] = calltype;
2301 }
2302 #endif
2303
2304 void MacroAssembler::push_call_clobbered_registers() {
2305 push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2306
2307 // Push v0-v7, v16-v31.
2308 for (int i = 30; i >= 0; i -= 2) {
2309 if (i <= v7->encoding() || i >= v16->encoding()) {
2310 stpd(as_FloatRegister(i), as_FloatRegister(i+1),
2311 Address(pre(sp, -2 * wordSize)));
2312 }
2313 }
2314 }
2315
2316 void MacroAssembler::pop_call_clobbered_registers() {
2317
2318 for (int i = 0; i < 32; i += 2) {
2319 if (i <= v7->encoding() || i >= v16->encoding()) {
2320 ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
2321 Address(post(sp, 2 * wordSize)));
2322 }
2323 }
2324
2325 pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2326 }
2327
2328 void MacroAssembler::push_CPU_state(bool save_vectors) {
2329 push(0x3fffffff, sp); // integer registers except lr & sp
2330
2331 if (!save_vectors) {
2332 for (int i = 30; i >= 0; i -= 2)
2333 stpd(as_FloatRegister(i), as_FloatRegister(i+1),
2334 Address(pre(sp, -2 * wordSize)));
2335 } else {
2336 for (int i = 30; i >= 0; i -= 2)
2337 stpq(as_FloatRegister(i), as_FloatRegister(i+1),
2338 Address(pre(sp, -4 * wordSize)));
2339 }
2340 }
2341
2342 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
2343 if (!restore_vectors) {
2344 for (int i = 0; i < 32; i += 2)
2345 ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
2346 Address(post(sp, 2 * wordSize)));
2347 } else {
3106 void MacroAssembler::store_check(Register obj, Address dst) {
3107 store_check(obj);
3108 }
3109
3110 void MacroAssembler::store_check(Register obj) {
3111 // Does a store check for the oop in register obj. The content of
3112 // register obj is destroyed afterwards.
3113
3114 BarrierSet* bs = Universe::heap()->barrier_set();
3115 assert(bs->kind() == BarrierSet::CardTableForRS ||
3116 bs->kind() == BarrierSet::CardTableExtension,
3117 "Wrong barrier set kind");
3118
3119 CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
3120 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
3121
3122 lsr(obj, obj, CardTableModRefBS::card_shift);
3123
3124 assert(CardTableModRefBS::dirty_card_val() == 0, "must be");
3125
3126 load_byte_map_base(rscratch1);
3127
3128 if (UseCondCardMark) {
3129 Label L_already_dirty;
3130 membar(StoreLoad);
3131 ldrb(rscratch2, Address(obj, rscratch1));
3132 cbz(rscratch2, L_already_dirty);
3133 strb(zr, Address(obj, rscratch1));
3134 bind(L_already_dirty);
3135 } else {
3136 if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
3137 membar(StoreStore);
3138 }
3139 strb(zr, Address(obj, rscratch1));
3140 }
3141 }
3142
3143 void MacroAssembler::load_klass(Register dst, Register src) {
3144 if (UseCompressedClassPointers) {
3145 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3146 decode_klass_not_null(dst);
3598 Label runtime;
3599
3600 // Does store cross heap regions?
3601
3602 eor(tmp, store_addr, new_val);
3603 lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
3604 cbz(tmp, done);
3605
3606 // crosses regions, storing NULL?
3607
3608 cbz(new_val, done);
3609
3610 // storing region crossing non-NULL, is card already dirty?
3611
3612 ExternalAddress cardtable((address) ct->byte_map_base);
3613 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
3614 const Register card_addr = tmp;
3615
3616 lsr(card_addr, store_addr, CardTableModRefBS::card_shift);
3617
3618 // get the address of the card
3619 load_byte_map_base(tmp2);
3620 add(card_addr, card_addr, tmp2);
3621 ldrb(tmp2, Address(card_addr));
3622 cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
3623 br(Assembler::EQ, done);
3624
3625 assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
3626
3627 membar(Assembler::StoreLoad);
3628
3629 ldrb(tmp2, Address(card_addr));
3630 cbzw(tmp2, done);
3631
3632 // storing a region crossing, non-NULL oop, card is clean.
3633 // dirty card and log.
3634
3635 strb(zr, Address(card_addr));
3636
3637 ldr(rscratch1, queue_index);
3638 cbz(rscratch1, runtime);
3639 sub(rscratch1, rscratch1, wordSize);
3640 str(rscratch1, queue_index);
3641
3642 ldr(tmp2, buffer);
3643 str(card_addr, Address(tmp2, rscratch1));
3644 b(done);
3645
3646 bind(runtime);
3647 // save the live input values
3648 push(store_addr->bit(true) | new_val->bit(true), sp);
3649 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
3650 pop(store_addr->bit(true) | new_val->bit(true), sp);
3651
3652 bind(done);
3653 }
3654
3655 #endif // INCLUDE_ALL_GCS
3971 code_section()->relocate(inst_mark(), rtype);
3972 ldrw(zr, Address(r, off));
3973 return inst_mark();
3974 }
3975
3976 address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
3977 InstructionMark im(this);
3978 code_section()->relocate(inst_mark(), rtype);
3979 ldrw(zr, Address(r, 0));
3980 return inst_mark();
3981 }
3982
3983 void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
3984 relocInfo::relocType rtype = dest.rspec().reloc()->type();
3985 unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12;
3986 unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12;
3987 unsigned long dest_page = (unsigned long)dest.target() >> 12;
3988 long offset_low = dest_page - low_page;
3989 long offset_high = dest_page - high_page;
3990
3991 assert(is_valid_AArch64_address(dest.target()), "bad address");
3992 assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
3993
3994 InstructionMark im(this);
3995 code_section()->relocate(inst_mark(), dest.rspec());
3996 // 8143067: Ensure that the adrp can reach the dest from anywhere within
3997 // the code cache so that if it is relocated we know it will still reach
3998 if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
3999 _adrp(reg1, dest.target());
4000 } else {
4001 unsigned long pc_page = (unsigned long)pc() >> 12;
4002 long offset = dest_page - pc_page;
4003 offset = (offset & ((1<<20)-1)) << 12;
4004 _adrp(reg1, pc()+offset);
4005 movk(reg1, (unsigned long)dest.target() >> 32, 32);
4006 }
4007 byte_offset = (unsigned long)dest.target() & 0xfff;
4008 }
4009
4010 void MacroAssembler::load_byte_map_base(Register reg) {
4011 jbyte *byte_map_base =
4012 ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base;
4013
4014 if (is_valid_AArch64_address((address)byte_map_base)) {
4015 // Strictly speaking the byte_map_base isn't an address at all,
4016 // and it might even be negative.
4017 unsigned long offset;
4018 adrp(reg, ExternalAddress((address)byte_map_base), offset);
4019 assert(offset == 0, "misaligned card table base");
4020 } else {
4021 mov(reg, (uint64_t)byte_map_base);
4022 }
4023 }
4024
4025 void MacroAssembler::build_frame(int framesize) {
4026 assert(framesize > 0, "framesize must be > 0");
4027 if (framesize < ((1 << 9) + 2 * wordSize)) {
4028 sub(sp, sp, framesize);
4029 stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4030 if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
4031 } else {
4032 stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
4033 if (PreserveFramePointer) mov(rfp, sp);
4034 if (framesize < ((1 << 12) + 2 * wordSize))
4035 sub(sp, sp, framesize - 2 * wordSize);
4036 else {
4037 mov(rscratch1, framesize - 2 * wordSize);
4038 sub(sp, sp, rscratch1);
4039 }
4040 }
4041 }
4042
|