--- old/src/cpu/aarch32/vm/assembler_aarch32.cpp 2016-08-26 13:07:32.000000000 +0300 +++ new/src/cpu/aarch32/vm/assembler_aarch32.cpp 2016-08-26 13:07:32.000000000 +0300 @@ -1375,7 +1375,7 @@ if (rtype == relocInfo::none) __ mov(r, target()); else - __ movptr(r, (u_int32_t)target()); + __ movptr(r, (uint32_t)target()); break; } default: @@ -1693,10 +1693,10 @@ } //This should really be in the macroassembler -void Assembler::mov_immediate32(Register dst, u_int32_t imm32, Condition cond, bool s) +void Assembler::mov_immediate32(Register dst, uint32_t imm32, Condition cond, bool s) { - // Need to move a full 32 bit immediate, for example if we're loading an address that - // might change later and therefore need to be updated. + // Need to move a full 32 bit immediate, for example if we're loading an address that + // might change later and therefore need to be updated. if (VM_Version::features() & (FT_ARMV7 | FT_ARMV6T2)) { //Use a movw and a movt Assembler::movw_i(dst, (unsigned)(imm32 & 0xffff), cond); @@ -1735,7 +1735,7 @@ //Try plan B - a mov first - need to have destination that is not an arg assert(Rd != Rn, "Can't use imm and can't do a mov. I'm in a jam."); - mov_immediate(Rd, (u_int32_t)uabs(imm), cond, s); + mov_immediate(Rd, (uint32_t)uabs(imm), cond, s); //Now do the non immediate version - copied from the immediate encodings { starti; --- old/src/cpu/aarch32/vm/assembler_aarch32.hpp 2016-08-26 13:07:33.000000000 +0300 +++ new/src/cpu/aarch32/vm/assembler_aarch32.hpp 2016-08-26 13:07:33.000000000 +0300 @@ -874,10 +874,10 @@ protected: // Mov data to destination register in the shortest number of instructions // possible. - void mov_immediate(Register dst, u_int32_t imm32, Condition cond, bool s); + void mov_immediate(Register dst, uint32_t imm32, Condition cond, bool s); // Mov data to destination register but always emit enough instructions that would // permit any 32-bit constant to be loaded. (Allow for rewriting later). - void mov_immediate32(Register dst, u_int32_t imm32, Condition cond, bool s); + void mov_immediate32(Register dst, uint32_t imm32, Condition cond, bool s); void add_sub_imm(int decode, Register Rd, Register Rn, int imm, Condition cond, bool s); @@ -1210,7 +1210,7 @@ NAME(Rt, Address(r15_pc, offset), cond); \ } else if(isload){ /* Plan B */ \ /* TODO check we don't have to relocate this*/ \ - mov_immediate(Rt, (u_int32_t)dest, cond, false); \ + mov_immediate(Rt, (uint32_t)dest, cond, false); \ NAME(Rt, Address(Rt, 0), cond); \ } else { /* There is no plan C */ \ ShouldNotReachHere(); \ @@ -1596,7 +1596,7 @@ sync_instr(0b0100, option); } void dmb(enum barrier option) { - sync_instr(0b0100, option); + sync_instr(0b0101, option); } void bkpt(); void isb() { --- old/src/cpu/aarch32/vm/globals_aarch32.hpp 2016-08-26 13:07:34.000000000 +0300 +++ new/src/cpu/aarch32/vm/globals_aarch32.hpp 2016-08-26 13:07:34.000000000 +0300 @@ -67,8 +67,6 @@ define_pd_global(bool, RewriteBytecodes, true); define_pd_global(bool, RewriteFrequentPairs, true); -define_pd_global(bool, UseMembar, true); - define_pd_global(bool, PreserveFramePointer, false); // GC Ergo Flags @@ -81,9 +79,13 @@ define_pd_global(intx, InlineSmallCode, 1000); //#endif +// Define it instead providing as option, inlining the constant significantly +// improves perfromance. The option is disabled for AARCH32 in globals.hpp too. +#define UseMembar true + #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ - \ - product(bool, NearCpool, true, \ + \ + product(bool, NearCpool, true, \ "constant pool is close to instructions") \ \ product(bool, UseBarriersForVolatile, false, \ --- old/src/cpu/aarch32/vm/icBuffer_aarch32.cpp 2016-08-26 13:07:34.000000000 +0300 +++ new/src/cpu/aarch32/vm/icBuffer_aarch32.cpp 2016-08-26 13:07:34.000000000 +0300 @@ -35,7 +35,7 @@ #include "oops/oop.inline.hpp" int InlineCacheBuffer::ic_stub_code_size() { - return 5 * NativeInstruction::arm_insn_sz; + return (MacroAssembler::far_branches() ? 5 : 3) * NativeInstruction::arm_insn_sz; } #define __ masm-> --- old/src/cpu/aarch32/vm/macroAssembler_aarch32.cpp 2016-08-26 13:07:35.000000000 +0300 +++ new/src/cpu/aarch32/vm/macroAssembler_aarch32.cpp 2016-08-26 13:07:35.000000000 +0300 @@ -159,7 +159,7 @@ if(0b000 == opc2) { // movw, movt (only on newer ARMs) assert(nativeInstruction_at(&insn_buf[1])->is_movt(), "wrong insns in patch"); - u_int32_t addr; + uint32_t addr; addr = Instruction_aarch32::extract(insn_buf[1], 19, 16) << 28; addr |= Instruction_aarch32::extract(insn_buf[1], 11, 0) << 16; addr |= Instruction_aarch32::extract(insn_buf[0], 19, 16) << 12; @@ -170,7 +170,7 @@ assert(nativeInstruction_at(&insn_buf[1])->is_orr(), "wrong insns in patch"); assert(nativeInstruction_at(&insn_buf[2])->is_orr(), "wrong insns in patch"); assert(nativeInstruction_at(&insn_buf[3])->is_orr(), "wrong insns in patch"); - u_int32_t addr; + uint32_t addr; addr = Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[0], 11, 0)); addr |= Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[1], 11, 0)); addr |= Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[2], 11, 0)); @@ -209,12 +209,12 @@ } //Correct offset for PC offset -= 8; - return address(((u_int32_t)insn_addr + offset)); + return address(((uint32_t)insn_addr + offset)); } void MacroAssembler::serialize_memory(Register thread, Register tmp) { - dsb(Assembler::SY); + dmb(Assembler::ISH); } @@ -304,7 +304,7 @@ "destination of far call not found in code cache"); // TODO performance issue: if intented to patch later, // generate mov rX, imm; bl rX far call (to reserve space) - if (entry.rspec().type() != relocInfo::none || far_branches()) { + if (far_branches()) { lea(tmp, entry); if (cbuf) cbuf->set_insts_mark(); bl(tmp); @@ -318,9 +318,7 @@ assert(CodeCache::find_blob(entry.target()) != NULL, "destination of far call not found in code cache"); assert(!external_word_Relocation::is_reloc_index((intptr_t)entry.target()), "can't far jump to reloc index)"); - // TODO performance issue: if intented to patch later, - // generate mov rX, imm; bl rX far call (to reserve space) - if (entry.rspec().type() != relocInfo::none || far_branches()) { + if (far_branches()) { lea(tmp, entry); if (cbuf) cbuf->set_insts_mark(); b(tmp); @@ -591,7 +589,10 @@ ldr(rscratch2, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); Label ok; cbz(rscratch2, ok); + lea(rscratch2, RuntimeAddress(StubRoutines::forward_exception_entry())); + // forward_exception uses LR to choose exception handler but LR is trashed by previous code + // since we used to get here from interpreted code BL is acceptable way to acquire correct LR (see StubGenerator::generate_forward_exception) bl(rscratch2); bind(ok); } @@ -615,23 +616,23 @@ || entry.rspec().type() == relocInfo::static_call_type || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); - //FIXME This block - bool compile_in_scratch_emit_size = false; - #ifdef COMPILER2 - compile_in_scratch_emit_size = Compile::current()->in_scratch_emit_size(); - #endif - - if (cbuf) cbuf->set_insts_mark(); - relocate(entry.rspec()); - - // Have make trampline such way: destination address should be raw 4 byte value, - // so it's patching could be done atomically. - add(lr, r15_pc, NativeCall::instruction_size - 2 * NativeInstruction::arm_insn_sz); - ldr(r15_pc, Address(r15_pc, 4)); // Address does correction for offset from pc base - emit_int32((uintptr_t) entry.target()); - // possibly pad the call to the NativeCall size to make patching happy - for (int i = NativeCall::instruction_size; i > 3 * NativeInstruction::arm_insn_sz; i -= NativeInstruction::arm_insn_sz) - nop(); + if (cbuf) { + cbuf->set_insts_mark(); + } + + if (far_branches()) { + // Have make trampline such way: destination address should be raw 4 byte value, + // so it's patching could be done atomically. + relocate(entry.rspec()); + add(lr, r15_pc, NativeCall::instruction_size - 2 * NativeInstruction::arm_insn_sz); + ldr(r15_pc, Address(r15_pc, 4)); + emit_int32((uintptr_t) entry.target()); + // possibly pad the call to the NativeCall size to make patching happy + for (int i = NativeCall::instruction_size; i > 3 * NativeInstruction::arm_insn_sz; i -= NativeInstruction::arm_insn_sz) + nop(); + } else { + bl(entry); + } } void MacroAssembler::ic_call(address entry) { @@ -1741,23 +1742,7 @@ void MacroAssembler::store_check(Register obj) { // Does a store check for the oop in register obj. The content of // register obj is destroyed afterwards. - store_check_part_1(obj); - store_check_part_2(obj); -} -void MacroAssembler::store_check(Register obj, Address dst) { - store_check(obj); -} - - -// split the store check operation so that other instructions can be scheduled inbetween -void MacroAssembler::store_check_part_1(Register obj) { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - lsr(obj, obj, CardTableModRefBS::card_shift); -} - -void MacroAssembler::store_check_part_2(Register obj) { BarrierSet* bs = Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); CardTableModRefBS* ct = (CardTableModRefBS*)bs; @@ -1772,8 +1757,21 @@ // don't bother to check, but it could save an instruction. intptr_t disp = (intptr_t) ct->byte_map_base; mov(rscratch1, disp); - mov(rscratch2, 0); - strb(rscratch2, Address(obj, rscratch1)); + assert((disp & 0xff) == 0, "fix store char 0 below"); + strb(rscratch1, Address(rscratch1, obj, lsr((int) CardTableModRefBS::card_shift))); +} + +void MacroAssembler::store_check(Register obj, Address dst) { + store_check(obj); +} + +// split the store check operation so that other instructions can be scheduled inbetween +void MacroAssembler::store_check_part_1(Register obj) { + ShouldNotCallThis(); +} + +void MacroAssembler::store_check_part_2(Register obj) { + ShouldNotCallThis(); } void MacroAssembler::load_klass(Register dst, Register src) { --- old/src/cpu/aarch32/vm/macroAssembler_aarch32.hpp 2016-08-26 13:07:36.000000000 +0300 +++ new/src/cpu/aarch32/vm/macroAssembler_aarch32.hpp 2016-08-26 13:07:35.000000000 +0300 @@ -159,19 +159,19 @@ // TODO: Do Address end up as address and then passing through this method, after // being marked for relocation elsewhere? If not (as I suspect) then this can // be relaxed to mov_immediate to potentially produce shorter code sequences. - mov_immediate32(dst, (u_int32_t)addr, cond, false); + mov_immediate32(dst, (uint32_t)addr, cond, false); } inline void mov(Register dst, long l, Condition cond = C_DFLT) { - mov(dst, (u_int32_t)l, cond); + mov(dst, (uint32_t)l, cond); } inline void mov(Register dst, unsigned long l, Condition cond = C_DFLT) { - mov(dst, (u_int32_t)l, cond); + mov(dst, (uint32_t)l, cond); } inline void mov(Register dst, int i, Condition cond = C_DFLT) { - mov(dst, (u_int32_t)i, cond); + mov(dst, (uint32_t)i, cond); } - inline void mov(Register dst, u_int32_t i, Condition cond = C_DFLT) { + inline void mov(Register dst, uint32_t i, Condition cond = C_DFLT) { mov_immediate(dst, i, cond, false); } @@ -590,9 +590,14 @@ void bang_stack_with_offset(int offset) { // stack grows down, caller passes positive offset assert(offset > 0, "must bang with negative offset"); - mov(rscratch2, -offset); - // bang with random number from r0 - str(r0, Address(sp, rscratch2)); + // bang with random value from r0 + if (operand_valid_for_add_sub_immediate(offset)) { + sub(rscratch2, sp, offset); + strb(r0, Address(rscratch2)); + } else { + mov(rscratch2, offset); + strb(r0, Address(sp, rscratch2, Assembler::lsl(), Address::SUB)); + } } // Writes to stack successive pages until offset reached to check for @@ -653,7 +658,11 @@ static int far_branch_size() { // TODO performance issue: always generate real far jumps - return 3 * 4; // movw, movt, br + if (far_branches()) { + return 3 * 4; // movw, movt, br + } else { + return 4; + } } // Emit the CompiledIC call idiom --- old/src/cpu/aarch32/vm/nativeInst_aarch32.cpp 2016-08-26 13:07:36.000000000 +0300 +++ new/src/cpu/aarch32/vm/nativeInst_aarch32.cpp 2016-08-26 13:07:36.000000000 +0300 @@ -92,10 +92,8 @@ // and see valid destination value) if (NativeImmCall::is_at(addr())) { - assert(false, "could be patched mt_safe way, but should not be requested to. " - "Known mt_safe requests have arbitrary destination offset. " - "Use trampoline_call for this."); - ShouldNotCallThis(); + NativeImmCall::from(addr())->set_destination(dest); + ICache::invalidate_word(addr()); } else if (NativeTrampolineCall::is_at(addr())) { NativeTrampolineCall::from(addr())->set_destination_mt_safe(dest); } else { --- old/src/cpu/aarch32/vm/relocInfo_aarch32.cpp 2016-08-26 13:07:37.000000000 +0300 +++ new/src/cpu/aarch32/vm/relocInfo_aarch32.cpp 2016-08-26 13:07:37.000000000 +0300 @@ -32,15 +32,16 @@ #include "runtime/safepoint.hpp" void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { - if (verify_only) - return; - - int bytes = 0; + int bytes; NativeInstruction *ni = NativeInstruction::from(addr()); if (ni->is_mov_const_reg()) { NativeMovConstReg *nm = NativeMovConstReg::from(addr()); - nm->set_data((uintptr_t) x); + if (verify_only) { + assert(nm->data() == (intptr_t) x, "instructions must match"); + return; + } + nm->set_data((intptr_t) x); bytes = nm->next_instruction_address() - nm->addr(); } else { ShouldNotReachHere(); @@ -59,24 +60,34 @@ NativeInstruction *ni = NativeInstruction::from(addr()); - if (ni->is_call()) { + // Checking from shortest encoding size to longets, + // to avoid access beyond CodeCache boundary + if (NativeImmCall::is_at(addr())) { + return NativeImmCall::from(addr())->destination() + adj; + } else if (NativeImmJump::is_at(addr())) { + return NativeImmJump::from(addr())->destination() + adj; + } else if (NativeCall::is_at(addr())) { return NativeCall::from(addr())->destination(); - } else if (ni->is_jump()) { + } else if (NativeJump::is_at(addr())) { return NativeJump::from(addr())->jump_destination(); } ShouldNotReachHere(); - - return NULL; } void Relocation::pd_set_call_destination(address x) { assert(addr() != x, "call instruction in an infinite loop"); // FIXME what's wrong to _generate_ loop? NativeInstruction *ni = NativeInstruction::from(addr()); - if (ni->is_call()) { + // Checking from shortest encoding size to longets, + // to avoid access beyond CodeCache boundary + if (NativeImmCall::is_at(addr())) { + NativeImmCall::from(addr())->set_destination(x); + } else if (NativeImmJump::is_at(addr())) { + NativeImmJump::from(addr())->set_destination(x); + } else if (NativeCall::is_at(addr())) { NativeCall::from(addr())->set_destination(x); - } else if (ni->is_jump()) { + } else if (NativeJump::is_at(addr())) { NativeJump::from(addr())->set_jump_destination(x); } else { ShouldNotReachHere(); --- old/src/cpu/aarch32/vm/sharedRuntime_aarch32.cpp 2016-08-26 13:07:37.000000000 +0300 +++ new/src/cpu/aarch32/vm/sharedRuntime_aarch32.cpp 2016-08-26 13:07:37.000000000 +0300 @@ -1046,11 +1046,13 @@ } } -static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { +static int save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { RegSet x; + int saved_slots = 0; for ( int i = first_arg ; i < arg_count ; i++ ) { if (args[i].first()->is_Register()) { x = x + args[i].first()->as_Register(); + ++saved_slots; } else if (args[i].first()->is_FloatRegister()) { FloatRegister fr = args[i].first()->as_FloatRegister(); @@ -1058,13 +1060,16 @@ assert(args[i].is_single_phys_reg(), "doubles should be 2 consequents float regs"); __ decrement(sp, 2 * wordSize); __ vstr_f64(fr, Address(sp)); + saved_slots += 2; } else { __ decrement(sp, wordSize); __ vstr_f32(fr, Address(sp)); + ++saved_slots; } } } __ push(x, sp); + return saved_slots; } static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { @@ -1765,7 +1770,7 @@ { SkipIfEqual skip(masm, &DTraceMethodProbes, false); // protect the args we've loaded - save_args(masm, total_c_args, c_arg, out_regs); + (void) save_args(masm, total_c_args, c_arg, out_regs); __ mov_metadata(c_rarg1, method()); __ call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), @@ -1777,7 +1782,7 @@ // RedefineClasses() tracing support for obsolete method entry if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { // protect the args we've loaded - save_args(masm, total_c_args, c_arg, out_regs); + (void) save_args(masm, total_c_args, c_arg, out_regs); __ mov_metadata(c_rarg1, method()); __ call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), @@ -1794,11 +1799,44 @@ if (method->is_synchronized()) { assert(!is_critical_native, "unhandled"); - // TODO Fast path disabled as requires at least 4 registers, which already contain arguments prepared for call + + // registers below are not used to pass parameters + // and they are caller save in C1 + // => safe to use as temporary here +#ifdef COMPILER2 + stop("fix temporary register set below"); +#endif + const Register swap_reg = r5; + const Register obj_reg = r6; // Will contain the oop + const Register lock_reg = r7; // Address of compiler lock object (BasicLock) + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); // Get the handle (the 2nd argument) __ mov(oop_handle_reg, c_rarg1); - __ b(slow_path_lock); + + // Get address of the box + + __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + + // Load the oop from the handle + __ ldr(obj_reg, Address(oop_handle_reg, 0)); + + if (UseBiasedLocking) { + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, lock_done, &slow_path_lock); + } + + // Load (object->mark() | 1) into swap_reg %r0 + __ ldr(swap_reg, Address(obj_reg, 0)); + __ orr(swap_reg, swap_reg, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + __ str(swap_reg, Address(lock_reg, mark_word_offset)); + + // src -> dest iff dest == r0 else r0 <- dest + { Label here; + __ cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, lock_done, &slow_path_lock); + } // Slow path will re-enter here __ bind(lock_done); @@ -1856,7 +1894,7 @@ if(os::is_MP()) { if (UseMembar) { // Force this write out before the read below - __ dmb(Assembler::SY); + __ membar(Assembler::AnyAny); } else { // Write serialization page so VM thread can do a pseudo remote membar. // We use the current thread pointer to calculate a thread specific @@ -1929,8 +1967,29 @@ Label unlock_done; Label slow_path_unlock; if (method->is_synchronized()) { - // TODO fast path disabled as requires at least 4 registers, but r0,r1 contains result - __ b(slow_path_unlock); + const Register obj_reg = r2; // Will contain the oop + const Register lock_reg = rscratch1; // Address of compiler lock object (BasicLock) + const Register old_hdr = r3; // value of old header at unlock time + + // Get locked oop from the handle we passed to jni + __ ldr(obj_reg, Address(oop_handle_reg, 0)); + + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, old_hdr, unlock_done); + } + + // Simple recursive lock? + // get address of the stack lock + __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + + // get old displaced header + __ ldr(old_hdr, Address(lock_reg, 0)); + __ cbz(old_hdr, unlock_done); + + // Atomic swap old header if oop still contains the stack lock + Label succeed; + __ cmpxchgptr(lock_reg, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock); + __ bind(succeed); // slow path re-enters here __ bind(unlock_done); @@ -1997,10 +2056,10 @@ // args are (oop obj, BasicLock* lock, JavaThread* thread) // protect the args we've loaded - save_args(masm, total_c_args, c_arg, out_regs); + const int extra_words = save_args(masm, total_c_args, c_arg, out_regs); __ ldr(c_rarg0, Address(oop_handle_reg)); - __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ lea(c_rarg1, Address(sp, (extra_words + lock_slot_offset) * VMRegImpl::stack_slot_size)); __ mov(c_rarg2, rthread); // Not a leaf but we have last_Java_frame setup as we want --- old/src/cpu/aarch32/vm/stubGenerator_aarch32.cpp 2016-08-26 13:07:38.000000000 +0300 +++ new/src/cpu/aarch32/vm/stubGenerator_aarch32.cpp 2016-08-26 13:07:38.000000000 +0300 @@ -1216,7 +1216,7 @@ // if they expect all registers to be preserved. // n.b. aarch32 asserts that frame::arg_reg_save_area_bytes == 0 enum layout { - rfp_off = frame::arg_reg_save_area_bytes/BytesPerInt, + rfp_off = 0, return_off, framesize // inclusive of return address }; @@ -1237,9 +1237,6 @@ __ enter(); // Save FP and LR before call - // lr and fp are already in place - assert(frame::arg_reg_save_area_bytes == 0, "please modify this code"); - // __ sub(sp, rfp, frame::arg_reg_save_area_bytes + wordSize); // prolog assert(is_even(framesize), "sp not 8-byte aligned"); int frame_complete = __ pc() - start; @@ -1288,7 +1285,7 @@ RuntimeStub::new_runtime_stub(name, &code, frame_complete, - (framesize >> (LogBytesPerWord - LogBytesPerInt)), + framesize, oop_maps, false); return stub->entry_point(); } --- old/src/cpu/aarch32/vm/templateInterpreter_aarch32.cpp 2016-08-26 13:07:39.000000000 +0300 +++ new/src/cpu/aarch32/vm/templateInterpreter_aarch32.cpp 2016-08-26 13:07:38.000000000 +0300 @@ -142,6 +142,12 @@ // expression stack must be empty before entering the VM if an // exception happened __ empty_expression_stack(); + // FIXME shouldn't it be in rest of generate_* ? + // rdispatch assumed to cache dispatch table. This code can be called from + // signal handler, so it can't assume execption caller preserved the register, + // so restore it here + __ get_dispatch(); + // FIXME shouldn't get_method be here ? // setup parameters __ lea(c_rarg1, Address((address)name)); if (pass_oop) { @@ -508,7 +514,9 @@ __ b(after_frame_check, Assembler::HI); // Remove the incoming args, peeling the machine SP back to where it - // was in the caller. + // was in the caller. This is not strictly necessary, but unless we + // do so the stack frame may have a garbage FP; this ensures a + // correct call stack that we can always unwind. __ mov(sp, r4); // Note: the restored frame is not necessarily interpreted. @@ -1039,7 +1047,7 @@ if (os::is_MP()) { if (UseMembar) { // Force this write out before the read below - __ dsb(Assembler::SY); + __ membar(Assembler::AnyAny); } else { // Write serialization page so VM thread can do a pseudo remote membar. // We use the current thread pointer to calculate a thread specific @@ -1992,17 +2000,16 @@ address TemplateInterpreterGenerator::generate_trace_code(TosState state) { address entry = __ pc(); - __ push(lr); __ push(state); - __ push(RegSet::range(r0, r12), sp); + // Save all registers on stack, so omit SP and PC + __ push(RegSet::range(r0, r12) + lr, sp); __ mov(c_rarg2, r0); // Pass itos __ mov(c_rarg3, r1); // Pass ltos/dtos high part __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3); - __ pop(RegSet::range(r0, r12), sp); + __ pop(RegSet::range(r0, r12) + lr, sp); __ pop(state); - __ pop(lr); __ b(lr); // return from result handler return entry; --- old/src/share/vm/c1/c1_LIR.hpp 2016-08-26 13:07:39.000000000 +0300 +++ new/src/share/vm/c1/c1_LIR.hpp 2016-08-26 13:07:39.000000000 +0300 @@ -619,7 +619,7 @@ LIR_OprDesc::single_size); } #if defined(C1_LIR_MD_HPP) # include C1_LIR_MD_HPP -#elif defined(SPARC) +#elif defined(SPARC) || defined(AARCH32) static LIR_Opr double_fpu(int reg1, int reg2) { return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | (reg2 << LIR_OprDesc::reg2_shift) | LIR_OprDesc::double_type | --- old/src/share/vm/c1/c1_LinearScan.cpp 2016-08-26 13:07:40.000000000 +0300 +++ new/src/share/vm/c1/c1_LinearScan.cpp 2016-08-26 13:07:40.000000000 +0300 @@ -2141,7 +2141,7 @@ assert(interval->assigned_regHi() >= pd_first_fpu_reg && interval->assigned_regHi() <= pd_last_fpu_reg, "no fpu register"); assert(assigned_reg % 2 == 0 && assigned_reg + 1 == interval->assigned_regHi(), "must be sequential and even"); LIR_Opr result = LIR_OprFact::double_fpu(interval->assigned_regHi() - pd_first_fpu_reg, assigned_reg - pd_first_fpu_reg); -#elif defined(ARM32) +#elif defined(ARM32) || defined(AARCH32) assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register"); assert(interval->assigned_regHi() >= pd_first_fpu_reg && interval->assigned_regHi() <= pd_last_fpu_reg, "no fpu register"); assert(assigned_reg % 2 == 0 && assigned_reg + 1 == interval->assigned_regHi(), "must be sequential and even"); @@ -2730,9 +2730,9 @@ #ifdef SPARC assert(opr->fpu_regnrLo() == opr->fpu_regnrHi() + 1, "assumed in calculation (only fpu_regnrHi is used)"); #endif -#ifdef ARM32 +#if defined(ARM32) || defined(AARCH32) assert(opr->fpu_regnrHi() == opr->fpu_regnrLo() + 1, "assumed in calculation (only fpu_regnrLo is used)"); -#endif +#endif // ARM32 || AARCH32 #ifdef PPC assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation (only fpu_regnrHi is used)"); #endif --- old/src/share/vm/c1/c1_Runtime1.cpp 2016-08-26 13:07:41.000000000 +0300 +++ new/src/share/vm/c1/c1_Runtime1.cpp 2016-08-26 13:07:41.000000000 +0300 @@ -1049,7 +1049,7 @@ ShouldNotReachHere(); } -#if defined(SPARC) || defined(PPC) +#if defined(SPARC) || defined(PPC) || defined(AARCH32) if (load_klass_or_mirror_patch_id || stub_id == Runtime1::load_appendix_patching_id) { // Update the location in the nmethod with the proper @@ -1086,7 +1086,7 @@ if (do_patch) { // replace instructions // first replace the tail, then the call -#ifdef ARM +#if defined(ARM) && !defined(AARCH32) if((load_klass_or_mirror_patch_id || stub_id == Runtime1::load_appendix_patching_id) && nativeMovConstReg_at(copy_buff)->is_pc_relative()) { @@ -1134,12 +1134,14 @@ nmethod* nm = CodeCache::find_nmethod(instr_pc); assert(nm != NULL, "invalid nmethod_pc"); +#if !defined(AARCH32) // The old patch site is now a move instruction so update // the reloc info so that it will get updated during // future GCs. RelocIterator iter(nm, (address)instr_pc, (address)(instr_pc + 1)); relocInfo::change_reloc_info_for_address(&iter, (address) instr_pc, relocInfo::none, rtype); +#endif #ifdef SPARC // Sparc takes two relocations for an metadata so update the second one. address instr_pc2 = instr_pc + NativeMovConstReg::add_offset; --- old/src/share/vm/runtime/globals.hpp 2016-08-26 13:07:41.000000000 +0300 +++ new/src/share/vm/runtime/globals.hpp 2016-08-26 13:07:41.000000000 +0300 @@ -536,8 +536,9 @@ /* UseMembar is theoretically a temp flag used for memory barrier \ * removal testing. It was supposed to be removed before FCS but has \ * been re-added (see 6401008) */ \ + NOT_AARCH32( \ product_pd(bool, UseMembar, \ - "(Unstable) Issues membars on thread state transitions") \ + "(Unstable) Issues membars on thread state transitions")) \ \ develop(bool, CleanChunkPoolAsync, falseInEmbedded, \ "Clean the chunk pool asynchronously") \ --- /dev/null 2016-08-26 13:07:43.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_CodeStubs_aarch32.cpp 2016-08-26 13:07:43.000000000 +0300 @@ -0,0 +1,533 @@ +/* + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#include "precompiled.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "nativeInst_aarch32.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch32.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#endif + +#define __ ce->masm()-> + +#define should_not_reach_here() should_not_reach_here_line(__FILE__, __LINE__) + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_method->as_register(), 1); + ce->store_parameter(_bci, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, + bool throw_index_out_of_bounds_exception) + : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception) + , _index(index) +{ + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + if (_index->is_cpu_register()) { + __ mov(rscratch1, _index->as_register()); + } else { + __ mov(rscratch1, _index->as_jint()); + } + Runtime1::StubID stub_id; + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { + stub_id = Runtime1::throw_range_check_failed_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(stub_id)), NULL, rscratch2); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + + + +// Implementation of NewInstanceStub + +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + + + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ mov(r3, _klass_reg->as_register()); + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0,"); + __ b(_continuation); +} + + +// Implementation of NewTypeArrayStub + +// Implementation of NewTypeArrayStub + +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == r6, "length must in r6,"); + assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0"); + __ b(_continuation); +} + + +// Implementation of NewObjectArrayStub + +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == r6, "length must in r6"); + assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0"); + __ b(_continuation); +} +// Implementation of MonitorAccessStubs + +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) +: MonitorAccessStub(obj_reg, lock_reg) +{ + _info = new CodeEmitInfo(info); +} + + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); + ce->store_parameter(_lock_reg->as_register(), 0); + Runtime1::StubID enter_id; + if (ce->compilation()->has_fpu_code()) { + enter_id = Runtime1::monitorenter_id; + } else { + enter_id = Runtime1::monitorenter_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it + ce->monitor_address(_monitor_ix, _lock_reg); + } + ce->store_parameter(_lock_reg->as_register(), 0); + // note: non-blocking leaf routine => no call info needed + Runtime1::StubID exit_id; + if (ce->compilation()->has_fpu_code()) { + exit_id = Runtime1::monitorexit_id; + } else { + exit_id = Runtime1::monitorexit_nofpu_id; + } + __ adr(lr, _continuation); + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); +} + + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + +int PatchingStub::_patch_info_offset = 0; + +void PatchingStub::align_patch_site(MacroAssembler* masm) { +} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + // NativeCall::instruction_size is dynamically calculated based on CPU, + // armv7 -> 3 instructions, armv6 -> 5 instructions. Initialize _patch_info_offset + // here, when CPU is determined already. + if (!_patch_info_offset) + _patch_info_offset = -NativeCall::instruction_size; + assert(_patch_info_offset == -NativeCall::instruction_size, "must not change"); + assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, "not enough room for call"); + + Label call_patch; + + // static field accesses have special semantics while the class + // initializer is being run so we emit a test which can be used to + // check that this code is being executed by the initializing + // thread. + address being_initialized_entry = __ pc(); + if (CommentedAssembly) { + __ block_comment(" patch template"); + } + if (_id == load_klass_id) { + // produce a copy of the load klass instruction for use by the being initialized case +#ifdef ASSERT + address start = __ pc(); +#endif + Metadata* o = NULL; + __ mov_metadata(_obj, o); + __ nop(); // added to call site by LIR_Assembler::patching_epilog +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + address ptr = (address)(_pc_start + i); + int a_byte = (*ptr) & 0xFF; + assert(a_byte == *start++, "should be the same code"); + } +#endif + } else if (_id == load_mirror_id || _id == load_appendix_id) { + // produce a copy of the load mirror instruction for use by the being + // initialized case +#ifdef ASSERT + address start = __ pc(); +#endif + jobject o = NULL; + __ movoop(_obj, o, true); + __ nop(); // added to call site by LIR_Assembler::patching_epilog +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + address ptr = (address)(_pc_start + i); + int a_byte = (*ptr) & 0xFF; + assert(a_byte == *start++, "should be the same code"); + } +#endif + } else { + // make a copy the code which is going to be patched. + assert(_bytes_to_copy % BytesPerWord == 0, "all instructions are 4byte"); + assert(((unsigned long) _pc_start) % BytesPerWord == 0, "patch offset should be aligned"); + const int words_to_copy = _bytes_to_copy / BytesPerWord; + for (int i = 0; i < words_to_copy; i++) { + int *ptr = ((int *) _pc_start) + i; + __ emit_int32(*ptr); + *ptr = 0xe320f000; // make the site look like a nop + } + } + + int bytes_to_skip = _bytes_to_copy; + + // this switch will be patched by NativeGeneralJump::replace_mt_safe, + // it inteded to distinguish enters from by being_initialized_entry and + // from call site + int switch_offset = __ offset(); + Label patching_switch; + __ b(patching_switch); + __ bind(patching_switch); + bytes_to_skip += __ offset() - switch_offset; + + if (_id == load_mirror_id) { + int offset = __ offset(); + if (CommentedAssembly) { + __ block_comment(" being_initialized check"); + } + assert(_obj != noreg, "must be a valid register"); + // Load without verification to keep code size small. We need it because + // begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null. + __ ldr(rscratch1, Address(_obj, java_lang_Class::klass_offset_in_bytes())); + __ ldr(rscratch1, Address(rscratch1, InstanceKlass::init_thread_offset())); + __ cmp(rthread, rscratch1); + __ b(call_patch, Assembler::NE); + + // access_field patches may execute the patched code before it's + // copied back into place so we need to jump back into the main + // code of the nmethod to continue execution. + __ b(_patch_site_continuation); + // make sure this extra code gets skipped + bytes_to_skip += __ offset() - offset; + } + + // Now emit the patch record telling the runtime how to find the + // pieces of the patch. We only need 3 bytes but it has to be + // aligned as an instruction so emit 4 bytes. + int sizeof_patch_record = 4; + bytes_to_skip += sizeof_patch_record; + + // emit the offsets needed to find the code to patch + int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record; + + __ emit_int8(0); + __ emit_int8(being_initialized_entry_offset); + __ emit_int8(bytes_to_skip); + __ emit_int8(0); + + address patch_info_pc = __ pc(); + + address entry = __ pc(); + NativeGeneralJump::insert_unconditional((address)_pc_start, entry); + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; + switch (_id) { + case access_field_id: target = Runtime1::entry_for(Runtime1::access_field_patching_id); break; + case load_klass_id: target = Runtime1::entry_for(Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break; + case load_mirror_id: target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break; + case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break; + default: ShouldNotReachHere(); + } + __ bind(call_patch); + + if (CommentedAssembly) { + __ block_comment("patch entry point"); + } + __ mov(rscratch1, RuntimeAddress(target)); + __ bl(rscratch1); + // pad with nops to globally known upper bound of patch site size + while (patch_info_pc - __ pc() < _patch_info_offset) + __ nop(); + assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change, required by shared code"); + ce->add_call_info_here(_info); + int jmp_off = __ offset(); + __ b(_patch_site_entry); + // Add enough nops so deoptimization can overwrite the jmp above with a call + // and not destroy the world. + for (int j = __ offset() ; j < jmp_off + NativeCall::instruction_size; j += NativeInstruction::arm_insn_sz) { + __ nop(); + } + + if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) { + CodeSection* cs = __ code_section(); + RelocIterator iter(cs, (address)_pc_start, (address)(_pc_start + 1)); + relocInfo::change_reloc_info_for_address(&iter, (address) _pc_start, reloc_type, relocInfo::none); + } +} + + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + + +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); + // pass the object in a scratch register because all other registers + // must be preserved + if (_obj->is_cpu_register()) { + __ mov(rscratch1, _obj->as_register()); + } + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, rscratch2); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + //---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. + // + VMRegPair args[5]; + BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; + SharedRuntime::java_calling_convention(signature, args, 5, true); + + // push parameters + // (src, src_pos, dest, destPos, length) + Register r[5]; + r[0] = src()->as_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int i = 0; i < 5 ; i++ ) { + VMReg r_1 = args[i].first(); + if (r_1->is_stack()) { + int st_off = r_1->reg2stack() * wordSize; + __ str (r[i], Address(sp, st_off)); + } else { + assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); + } + } + + ce->align_call(lir_static_call); + + ce->emit_static_call_stub(); + Address resolve(SharedRuntime::get_resolve_static_call_stub(), + relocInfo::static_call_type); + __ trampoline_call(resolve); + ce->add_call_info_here(info()); + +#ifndef PRODUCT + __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + __ increment(Address(rscratch2)); +#endif + + __ b(_continuation); +} + + +///////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(_entry); + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + if (do_load()) { + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/); + } + __ cbz(pre_val_reg, _continuation); + ce->store_parameter(pre_val()->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id))); + __ b(_continuation); +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register new_val_reg = new_val()->as_register(); + __ cbz(new_val_reg, _continuation); + ce->store_parameter(addr()->as_pointer_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id))); + __ b(_continuation); +} + +#endif // INCLUDE_ALL_GCS +///////////////////////////////////////////////////////////////////////////// + +#undef __ --- /dev/null 2016-08-26 13:07:44.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_Defs_aarch32.hpp 2016-08-26 13:07:44.000000000 +0300 @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#ifndef CPU_AARCH32_VM_C1_DEFS_AARCH32_HPP +#define CPU_AARCH32_VM_C1_DEFS_AARCH32_HPP + +// Native word offsets from memory address (little endian format) +enum { + pd_lo_word_offset_in_bytes = 0, + pd_hi_word_offset_in_bytes = BytesPerWord +}; + +// TODO: We should understand what values are correct for the following 3 flags +// relevant to floating point operations: +// - UseSSE +// Highest supported SSE instruction set on x86/x64. I believe we should +// set it to 0 in VM_Version::initialize(), like other non-x86 ports do. +// - RoundFPResults +// Indicates whether rounding is needed for floating point results +// - pd_strict_fp_requires_explicit_rounding +// The same as above but for the strictfp mode + +// Explicit rounding operations are not required to implement the strictfp mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +// Registers +enum { + // Number of registers used during code emission + pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, + pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, + + // Number of registers killed by calls + pd_nof_caller_save_cpu_regs_frame_map = 8, + pd_nof_caller_save_fpu_regs_frame_map = 32, + + // The following two constants need to be defined since they are referenced + // from c1_FrameMap.hpp, but actually they are never used, so can be set to + // arbitrary values. + pd_nof_cpu_regs_reg_alloc = -1, + pd_nof_fpu_regs_reg_alloc = -1, + + // All the constants below are used by linear scan register allocator only. + // Number of registers visible to register allocator + pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map, + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, + pd_nof_xmm_regs_linearscan = 0, + + // Register allocator specific register numbers corresponding to first/last + // CPU/FPU registers available for allocation + pd_first_cpu_reg = 0, + pd_last_cpu_reg = 7, + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + 31, + + // Register allocator specific register numbers corresponding to first/last + // CPU/FPU callee-saved registers. These constants are used in + // LinearScan::is_caller_save() only. + pd_first_callee_saved_cpu_reg = 4, + pd_last_callee_saved_cpu_reg = 11, + pd_first_callee_saved_fpu_reg = pd_first_fpu_reg + 16, + pd_last_callee_saved_fpu_reg = pd_first_fpu_reg + 31 +}; + +// This flag must be in sync with how the floating point registers are stored +// on the stack by RegisterSaver::save_live_registers() method +// (sharedRuntime_aarch32.cpp) and save_live_registers() function +// (c1_Runtime1_aarch32.cpp). On AArch32 the floating point registers keep +// floats and doubles in their native form. No float to double conversion +// happens when the registers are stored on the stack. This is opposite to +// what happens on x86, where the FPU stack registers are 80 bits wide, +// and storing them in either 4 byte or 8 byte stack slot is a conversion +// operation. +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_AARCH32_VM_C1_DEFS_AARCH32_HPP --- /dev/null 2016-08-26 13:07:46.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_FpuStackSim_aarch32.cpp 2016-08-26 13:07:46.000000000 +0300 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FpuStackSim.hpp" + +// No FPU stack on AArch32 --- /dev/null 2016-08-26 13:07:47.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_FpuStackSim_aarch32.hpp 2016-08-26 13:07:47.000000000 +0300 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C1_FPUSTACKSIM_AARCH32_HPP +#define CPU_AARCH32_VM_C1_FPUSTACKSIM_AARCH32_HPP + +// No FPU stack on AArch32 + +#endif // CPU_AARCH32_VM_C1_FPUSTACKSIM_AARCH32_HPP --- /dev/null 2016-08-26 13:07:49.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_FrameMap_aarch32.cpp 2016-08-26 13:07:49.000000000 +0300 @@ -0,0 +1,258 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch32.inline.hpp" + +LIR_Opr FrameMap::r0_opr; +LIR_Opr FrameMap::r1_opr; +LIR_Opr FrameMap::r2_opr; +LIR_Opr FrameMap::r3_opr; +LIR_Opr FrameMap::r4_opr; +LIR_Opr FrameMap::r5_opr; +LIR_Opr FrameMap::r6_opr; +LIR_Opr FrameMap::r7_opr; +LIR_Opr FrameMap::r8_opr; +LIR_Opr FrameMap::r9_opr; +LIR_Opr FrameMap::r10_opr; +LIR_Opr FrameMap::r11_opr; +LIR_Opr FrameMap::r12_opr; +LIR_Opr FrameMap::r13_opr; +LIR_Opr FrameMap::r14_opr; +LIR_Opr FrameMap::r15_opr; + +LIR_Opr FrameMap::r0_oop_opr; +LIR_Opr FrameMap::r1_oop_opr; +LIR_Opr FrameMap::r2_oop_opr; +LIR_Opr FrameMap::r3_oop_opr; +LIR_Opr FrameMap::r4_oop_opr; +LIR_Opr FrameMap::r5_oop_opr; +LIR_Opr FrameMap::r6_oop_opr; +LIR_Opr FrameMap::r7_oop_opr; +LIR_Opr FrameMap::r8_oop_opr; +LIR_Opr FrameMap::r9_oop_opr; +LIR_Opr FrameMap::r10_oop_opr; +LIR_Opr FrameMap::r11_oop_opr; +LIR_Opr FrameMap::r12_oop_opr; +LIR_Opr FrameMap::r13_oop_opr; +LIR_Opr FrameMap::r14_oop_opr; +LIR_Opr FrameMap::r15_oop_opr; + +LIR_Opr FrameMap::r0_metadata_opr; +LIR_Opr FrameMap::r1_metadata_opr; +LIR_Opr FrameMap::r2_metadata_opr; +LIR_Opr FrameMap::r3_metadata_opr; +LIR_Opr FrameMap::r4_metadata_opr; +LIR_Opr FrameMap::r5_metadata_opr; + +LIR_Opr FrameMap::sp_opr; +LIR_Opr FrameMap::receiver_opr; + +LIR_Opr FrameMap::rscratch1_opr; +LIR_Opr FrameMap::rscratch2_opr; +LIR_Opr FrameMap::rscratch_long_opr; + +LIR_Opr FrameMap::long0_opr; +LIR_Opr FrameMap::long1_opr; +LIR_Opr FrameMap::long2_opr; +LIR_Opr FrameMap::fpu0_float_opr; +LIR_Opr FrameMap::fpu0_double_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + +void FrameMap::initialize() { + assert(!_init_done, "must be called once"); + + int i = 0; + map_register(i, r0); r0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r1); r1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r2); r2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r3); r3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r4); r4_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r5); r5_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r6); r6_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r7); r7_opr = LIR_OprFact::single_cpu(i); i++; + // Mapping lines in this block may be arbitrarily mixed, but all allocatable + // registers should go above this comment, and unallocatable registers - + // below. + map_register(i, r8); r8_opr = LIR_OprFact::single_cpu(i); i++; // rthread + map_register(i, r9); r9_opr = LIR_OprFact::single_cpu(i); i++; // rscratch1 + map_register(i, r10); r10_opr = LIR_OprFact::single_cpu(i); i++; // rmethod + map_register(i, r11); r11_opr = LIR_OprFact::single_cpu(i); i++; // rfp + map_register(i, r12); r12_opr = LIR_OprFact::single_cpu(i); i++; // rscratch2 + map_register(i, r13); r13_opr = LIR_OprFact::single_cpu(i); i++; // sp + map_register(i, r14); r14_opr = LIR_OprFact::single_cpu(i); i++; // lr + map_register(i, r15); r15_opr = LIR_OprFact::single_cpu(i); i++; // r15_pc + + // This flag must be set after all integer registers are mapped but before + // the first use of as_*_opr() methods. + _init_done = true; + + r0_oop_opr = as_oop_opr(r0); + r1_oop_opr = as_oop_opr(r1); + r2_oop_opr = as_oop_opr(r2); + r3_oop_opr = as_oop_opr(r3); + r4_oop_opr = as_oop_opr(r4); + r5_oop_opr = as_oop_opr(r5); + r6_oop_opr = as_oop_opr(r6); + r7_oop_opr = as_oop_opr(r7); + r8_oop_opr = as_oop_opr(r8); + r9_oop_opr = as_oop_opr(r9); + r10_oop_opr = as_oop_opr(r10); + r11_oop_opr = as_oop_opr(r11); + r12_oop_opr = as_oop_opr(r12); + r13_oop_opr = as_oop_opr(r13); + r14_oop_opr = as_oop_opr(r14); + r15_oop_opr = as_oop_opr(r15); + + r0_metadata_opr = as_metadata_opr(r0); + r1_metadata_opr = as_metadata_opr(r1); + r2_metadata_opr = as_metadata_opr(r2); + r3_metadata_opr = as_metadata_opr(r3); + r4_metadata_opr = as_metadata_opr(r4); + r5_metadata_opr = as_metadata_opr(r5); + + sp_opr = as_pointer_opr(sp); + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; + SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + rscratch1_opr = as_opr(rscratch1); + rscratch2_opr = as_opr(rscratch2); + rscratch_long_opr = as_long_opr(rscratch1, rscratch2); + + long0_opr = as_long_opr(r0, r1); + long1_opr = as_long_opr(r2, r3); + long2_opr = as_long_opr(r4, r5); + fpu0_float_opr = LIR_OprFact::single_fpu(0); + fpu0_double_opr = LIR_OprFact::double_fpu(0, 1); + + _caller_save_cpu_regs[0] = r0_opr; + _caller_save_cpu_regs[1] = r1_opr; + _caller_save_cpu_regs[2] = r2_opr; + _caller_save_cpu_regs[3] = r3_opr; + _caller_save_cpu_regs[4] = r4_opr; + _caller_save_cpu_regs[5] = r5_opr; + _caller_save_cpu_regs[6] = r6_opr; + _caller_save_cpu_regs[7] = r7_opr; + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + +LIR_Opr FrameMap::stack_pointer() { + return sp_opr; +} + +// TODO: Make sure that neither method handle intrinsics nor compiled lambda +// forms modify sp register (i.e., vmIntrinsics::{_invokeBasic, _linkToVirtual, +// _linkToStatic, _linkToSpecial, _linkToInterface, _compiledLambdaForm}) +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + return LIR_OprFact::illegalOpr; +} + +// Return LIR_Opr corresponding to the given VMRegPair and data type +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to sp-based address. The calling convention does not + // count the SharedRuntime::out_preserve_stack_slots() value, so we must + // add it in here. + int st_off = + (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * + VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg1 = r_1->as_Register(); + if (type == T_LONG) { + assert(r_2->is_Register(), "wrong VMReg"); + Register reg2 = r_2->as_Register(); + opr = as_long_opr(reg1, reg2); + } else if (type == T_OBJECT || type == T_ARRAY) { + opr = as_oop_opr(reg1); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg1); + } else { + opr = as_opr(reg1); + } + } else if (r_1->is_FloatRegister()) { + assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); + int num = r_1->as_FloatRegister()->encoding(); + if (type == T_FLOAT) { + opr = LIR_OprFact::single_fpu(num); + } else { + assert(is_even(num) && r_2->as_FloatRegister()->encoding() == (num + 1), + "wrong VMReg"); + opr = LIR_OprFact::double_fpu(num, num + 1); + } + } else { + ShouldNotReachHere(); + } + return opr; +} + +// Return VMReg corresponding to the given FPU register number as it is +// encoded in LIR_Opr. The conversion is straightforward because in this +// implementation the encoding of FPU registers in LIR_Opr's is the same as +// in FloatRegister's. +VMReg FrameMap::fpu_regname(int n) { + return as_FloatRegister(n)->as_VMReg(); +} + +// Check that the frame is properly addressable on the platform. The sp-based +// address of every frame slot must have the offset expressible as AArch32's +// imm12 with the separately stored sign. +bool FrameMap::validate_frame() { + int max_offset = in_bytes(framesize_in_bytes()); + int java_index = 0; + for (int i = 0; i < _incoming_arguments->length(); i++) { + LIR_Opr opr = _incoming_arguments->at(i); + if (opr->is_stack()) { + max_offset = MAX2(_argument_locations->at(java_index), max_offset); + } + java_index += type2size[opr->type()]; + } + return Assembler::is_valid_for_offset_imm(max_offset, 12); +} + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + return Address(sp, in_bytes(sp_offset)); +} --- /dev/null 2016-08-26 13:07:50.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_FrameMap_aarch32.hpp 2016-08-26 13:07:50.000000000 +0300 @@ -0,0 +1,174 @@ +/* + * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#ifndef CPU_AARCH32_VM_C1_FRAMEMAP_AARCH32_HPP +#define CPU_AARCH32_VM_C1_FRAMEMAP_AARCH32_HPP + +// The following schema visualizes how a C1 frame looks like on AArch32. +// It corresponds to the case of an unextended frame. Each line of text +// represents one 4-byte slot. Every monitor takes two slots. Positions of +// incoming arguments are determined by the Java calling convention. Spill +// area and monitor area are not required to be 8-byte aligned. The slot +// for deoptimization support is used by frame::deoptimize() method to save +// the original pc before patching in the new one. +// +// When LIR_Opr's reference stack slots, they use virtual stack slot indices. +// They are mapped to the real stack slots by FrameMap::sp_offset_for_slot() +// and FrameMap::sp_offset_for_double_slot() methods. The first _argcount +// virtual stack slots correspond to the real stack slots occupied by the +// incoming arguments. Their mapping is defined by _argument_locations array +// (which is filled in by applying the Java calling convention). All other +// virtual stack slots correspond to spill slots. +// +// Higher addresses +// | incoming | virtual stack slots +// | | [0 ... _arg_count - 1] +// | arguments | +// |====================================|----X- 8-byte aligned +// | previous lr | /|\ address +// rfp ===> |------------------------------------| | +// | previous rfp | | +// |====================================| | +// | alignment slot (if needed) | | +// |====================================| | +// | slot for deoptimization support | | +// |====================================| | +// | monitor [_num_monitors - 1] object | | +// | | | +// | monitor [_num_monitors - 1] lock | | +// |------------------------------------| | +// | | | +// Direction of | ... | | _framesize +// stack growth | | | slots +// | |------------------------------------| | +// V | monitor [0] object | | +// | | | +// | monitor [0] lock | | +// |====================================| | +// | spill slot [_num_spills - 1] | | virtual stack slot +// |------------------------------------| | [_arg_count + _num_spills - 1] +// | ... | | ... +// |------------------------------------| | +// | spill slot [0] | | virtual stack slot +// |====================================| | [_arg_count] +// | reserved argument area for | | +// | ... | | +// | outgoing calls (8-byte aligned) | \|/ +// sp ===> |====================================|----X- 8-byte aligned +// | | address +// Lower addresses + + public: + enum { + first_available_sp_in_frame = 0, + frame_pad_in_bytes = 8 + }; + + public: + static LIR_Opr r0_opr; + static LIR_Opr r1_opr; + static LIR_Opr r2_opr; + static LIR_Opr r3_opr; + static LIR_Opr r4_opr; + static LIR_Opr r5_opr; + static LIR_Opr r6_opr; + static LIR_Opr r7_opr; + static LIR_Opr r8_opr; + static LIR_Opr r9_opr; + static LIR_Opr r10_opr; + static LIR_Opr r11_opr; + static LIR_Opr r12_opr; + static LIR_Opr r13_opr; + static LIR_Opr r14_opr; + static LIR_Opr r15_opr; + + static LIR_Opr r0_oop_opr; + static LIR_Opr r1_oop_opr; + static LIR_Opr r2_oop_opr; + static LIR_Opr r3_oop_opr; + static LIR_Opr r4_oop_opr; + static LIR_Opr r5_oop_opr; + static LIR_Opr r6_oop_opr; + static LIR_Opr r7_oop_opr; + static LIR_Opr r8_oop_opr; + static LIR_Opr r9_oop_opr; + static LIR_Opr r10_oop_opr; + static LIR_Opr r11_oop_opr; + static LIR_Opr r12_oop_opr; + static LIR_Opr r13_oop_opr; + static LIR_Opr r14_oop_opr; + static LIR_Opr r15_oop_opr; + + static LIR_Opr r0_metadata_opr; + static LIR_Opr r1_metadata_opr; + static LIR_Opr r2_metadata_opr; + static LIR_Opr r3_metadata_opr; + static LIR_Opr r4_metadata_opr; + static LIR_Opr r5_metadata_opr; + + static LIR_Opr sp_opr; + static LIR_Opr receiver_opr; + + static LIR_Opr rscratch1_opr; + static LIR_Opr rscratch2_opr; + static LIR_Opr rscratch_long_opr; + + static LIR_Opr long0_opr; + static LIR_Opr long1_opr; + static LIR_Opr long2_opr; + static LIR_Opr fpu0_float_opr; + static LIR_Opr fpu0_double_opr; + + static LIR_Opr as_long_opr(Register r1, Register r2) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r1), cpu_reg2rnr(r2)); + } + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::single_cpu(cpu_reg2rnr(r)); + } + + static VMReg fpu_regname(int n); + + static bool is_caller_save_register(LIR_Opr opr) { + // On AArch32, unlike on SPARC, we never explicitly request the C1 register + // allocator to allocate a callee-saved register. Since the only place this + // method is called is the assert in LinearScan::color_lir_opr(), we can + // safely just always return true here. + return true; + } + static int nof_caller_save_cpu_regs() { + return pd_nof_caller_save_cpu_regs_frame_map; + } + static int last_cpu_reg() { + return pd_last_cpu_reg; + } + +#endif // CPU_AARCH32_VM_C1_FRAMEMAP_AARCH32_HPP --- /dev/null 2016-08-26 13:07:52.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_LIRAssembler_aarch32.cpp 2016-08-26 13:07:52.000000000 +0300 @@ -0,0 +1,3272 @@ +/* + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "gc_interface/collectedHeap.hpp" +#include "memory/barrierSet.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch32.inline.hpp" + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +NEEDS_CLEANUP // remove this definitions ? +const Register IC_Klass = rscratch2; // where the IC klass is cached +const Register SYNC_header = r0; // synchronization header +const Register SHIFT_count = r0; // where count for shift operations must be + +#define __ _masm-> + + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp2 = extra; + } + assert_different_registers(preserve, tmp1, tmp2); +} + + + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2, + Register &tmp3) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp2 = extra; + } else if (tmp3 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp3 = extra; + } + assert_different_registers(preserve, tmp1, tmp2, tmp3); +} + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + +//--------------fpu register translations----------------------- + + +address LIR_Assembler::float_constant(float f) { + address const_addr = __ float_constant(f); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + + +address LIR_Assembler::double_constant(double d) { + address const_addr = __ double_constant(d); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +address LIR_Assembler::int_constant(jlong n) { + address const_addr = __ long_constant(n); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } + +void LIR_Assembler::reset_FPU() { Unimplemented(); } + +void LIR_Assembler::fpop() { Unimplemented(); } + +void LIR_Assembler::fxch(int i) { Unimplemented(); } + +void LIR_Assembler::fld(int i) { Unimplemented(); } + +void LIR_Assembler::ffree(int i) { Unimplemented(); } + +void LIR_Assembler::breakpoint() { __ bkpt(0); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } + +//------------------------------------------- + +static Register as_reg(LIR_Opr op) { + return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + // as_Address(LIR_Address*, Address::InsnDataType) should be used instead + ShouldNotCallThis(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + // as_Address_hi(LIR_Address*, Address::InsnDataType) should be used instead + ShouldNotCallThis(); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + // as_Address_lo(LIR_Address*, Address::InsnDataType) should be used instead + ShouldNotCallThis(); +} + +Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp, Address::InsnDataType type) { + if (addr->base()->is_illegal()) { + assert(addr->index()->is_illegal(), "must be illegal too"); + __ mov(tmp, addr->disp()); + return Address(tmp); // encoding is ok for any data type + } + + Register base = addr->base()->as_pointer_register(); + + if (addr->index()->is_illegal()) { + return Address(base, addr->disp()).safe_for(type, _masm, tmp); + } else if (addr->index()->is_cpu_register()) { + assert(addr->disp() == 0, "must be"); + Register index = addr->index()->as_pointer_register(); + return Address(base, index, lsl(addr->scale())).safe_for(type, _masm, tmp); + } else if (addr->index()->is_constant()) { + intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp(); + return Address(base, addr_offset).safe_for(type, _masm, tmp); + } + + Unimplemented(); + return Address(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr, Address::InsnDataType type) { + assert(type == Address::IDT_INT, "only to be used for accessing high word of jlong"); + + if (addr->base()->is_illegal()) { + assert(addr->index()->is_illegal(), "must be illegal too"); + __ mov(rscratch1, addr->disp() + wordSize); + return Address(rscratch1); // encoding is ok for IDR_INT + } + + Register base = addr->base()->as_pointer_register(); + + if (addr->index()->is_illegal()) { + return Address(base, addr->disp() + wordSize).safe_for(Address::IDT_INT, _masm, rscratch1); + } else if (addr->index()->is_cpu_register()) { + assert(addr->disp() == 0, "must be"); + Register index = addr->index()->as_pointer_register(); + __ add(rscratch1, base, wordSize); + return Address(rscratch1, index, lsl(addr->scale())); // encoding is ok for IDT_INT + } else if (addr->index()->is_constant()) { + intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp() + wordSize; + return Address(base, addr_offset).safe_for(Address::IDT_INT, _masm, rscratch1); + } + + Unimplemented(); + return Address(); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr, Address::InsnDataType type) { + return as_Address(addr, rscratch1, type); +} + + +void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + ValueStack* entry_state = osr_entry->state(); + int number_of_locks = entry_state->locks_size(); + + // we jump here if osr happens with the interpreter + // state set up to continue at the beginning of the + // loop that triggered osr - in particular, we have + // the following registers setup: + // + // r1: osr buffer + // + + // build frame + ciMethod* m = compilation()->method(); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[0..number_of_locks] + // + // locals is a direct copy of the interpreter frame so in the osr buffer + // so first slot in the local array is the last local from the interpreter + // and last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // r1: pointer to osr buffer + // + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_pointer_register(); + { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); +#ifdef ASSERT + // verify the interpreter's monitor has a non-null object + { + Label L; + __ ldr(rscratch1, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ cbnz(rscratch1, L); + __ stop("locked object is NULL"); + __ bind(L); + } +#endif + __ ldr(rscratch1, Address(OSR_buf, slot_offset + 0)); + __ str(rscratch1, frame_map()->address_for_monitor_lock(i)); + __ ldr(rscratch1, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ str(rscratch1, frame_map()->address_for_monitor_object(i)); + } + } +} + + +// inline cache check; done before the frame is built. +int LIR_Assembler::check_icache() { + Register receiver = FrameMap::receiver_opr->as_register(); + Register ic_klass = IC_Klass; + int start_offset = __ offset(); + __ inline_cache_check(receiver, ic_klass); + + // if icache check fails, then jump to runtime routine + // Note: RECEIVER must still contain the receiver! + Label dont; + __ b(dont, Assembler::EQ); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // We align the verified entry point unless the method body + // (including its inline cache check) will fit in a single 64-byte + // icache line. + if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) { + // force alignment after the cache check. + __ align(CodeEntryAlignment); + } + + __ bind(dont); + return start_offset; +} + + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ mov(reg, 0); + } else { + __ movoop(reg, o, /*immediate*/true); + } +} + +void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); + add_call_info_here(info); +} + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + jobject o = NULL; + PatchingStub* patch = new PatchingStub(_masm, patching_id(info)); + __ movoop(reg, o, true); + patching_epilog(patch, lir_patch_normal, reg, info); +} + +// Return sp decrement needed to build a frame +int LIR_Assembler::initial_frame_size_in_bytes() const { + // We need to subtract two words to take into account saved lr and rfp. + return in_bytes(frame_map()->framesize_in_bytes()) - + FrameMap::frame_pad_in_bytes; +} + +int LIR_Assembler::emit_exception_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(exception_handler_size); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + // the exception oop and pc are in r0, and r3 + // no other registers need to be preserved, so invalidate them + __ invalidate_registers(false, true, false); + + // check that there is really an exception + __ verify_not_null_oop(r0); + + // search an exception handler (r0: exception oop, r3: throwing pc) + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); __ should_not_reach_here(); + guarantee(code_offset() - offset <= exception_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + + +// Emit the code to remove the frame from the stack in the exception +// unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ str(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(r0); + + // Preform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r1_opr); + stub = new MonitorExitStub(FrameMap::r1_opr, true, 0); + __ unlock_object(r5, r4, r1, *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + __ call_Unimplemented(); +#if 0 + // FIXME check exception_store is not clobbered below! + __ movptr(Address(rsp, 0), rax); + __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding()); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit))); +#endif + } + + // remove the activation and dispatch to the unwind handler + __ block_comment("remove_frame and dispatch to the unwind handler"); + __ remove_frame(initial_frame_size_in_bytes()); + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + + +int LIR_Assembler::emit_deopt_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(deopt_handler_size); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ adr(lr, pc()); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + guarantee(code_offset() - offset <= deopt_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + + +// This is the fast version of java.lang.String.compare; it has not +// OSR-entry and therefore, we generate a slow version for OSR's +void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info) { + __ mov(r2, (address)__FUNCTION__); + __ call_Unimplemented(); +} + + +void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { + _masm->code_section()->relocate(adr, relocInfo::poll_type); + int pc_offset = code_offset(); + flush_debug_info(pc_offset); + info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); + if (info->exception_handlers() != NULL) { + compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); + } +} + +void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,"); + // Pop the stack before the safepoint code + __ remove_frame(initial_frame_size_in_bytes()); + address polling_page(os::get_polling_page()); + __ read_polling_page(rscratch1, polling_page, relocInfo::poll_return_type); + __ ret(lr); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + address polling_page(os::get_polling_page()); + guarantee(info != NULL, "Shouldn't be NULL"); + assert(os::is_poll_address(polling_page), "should be"); + __ mov(rscratch1, Address(polling_page, relocInfo::poll_type)); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map + __ read_polling_page(rscratch1, relocInfo::poll_type); + return __ offset(); +} + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + if (from_reg != to_reg) { + __ mov(to_reg, from_reg); + } +} + +void LIR_Assembler::swap_reg(Register a, Register b) { + Unimplemented(); +} + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + + switch (c->type()) { + case T_INT: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register(), c->as_jint()); + break; + } + + case T_ADDRESS: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register(), c->as_jint()); + break; + } + + case T_LONG: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register_lo(), c->as_jint_lo()); + __ mov(dest->as_register_hi(), c->as_jint_hi()); + break; + } + + case T_OBJECT: { + if (patch_code == lir_patch_none) { + jobject2reg(c->as_jobject(), dest->as_register()); + } else { + jobject2reg_with_patching(dest->as_register(), info); + } + break; + } + + case T_METADATA: { + if (patch_code != lir_patch_none) { + klass2reg_with_patching(dest->as_register(), info); + } else { + __ mov_metadata(dest->as_register(), c->as_metadata()); + } + break; + } + + case T_FLOAT: { +#ifdef __ARM_PCS_VFP + if (__ operand_valid_for_float_immediate(c->as_jfloat())) { + __ vmov_f32(dest->as_float_reg(), c->as_jfloat()); + } else { + __ lea(rscratch1, InternalAddress(float_constant(c->as_jfloat()))); + __ vldr_f32(dest->as_float_reg(), Address(rscratch1)); + } +#else +#error "unimplemented" +#endif + break; + } + + case T_DOUBLE: { +#ifdef __ARM_PCS_VFP + if (__ operand_valid_for_double_immediate(c->as_jdouble())) { + __ vmov_f64(dest->as_double_reg(), c->as_jdouble()); + } else { + __ lea(rscratch1, InternalAddress(double_constant(c->as_jdouble()))); + __ vldr_f64(dest->as_double_reg(), Address(rscratch1)); + } +#else +#error "unimplemented" +#endif + break; + } + + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + LIR_Const* c = src->as_constant_ptr(); + switch (c->type()) { + case T_OBJECT: + { + if (! c->as_jobject()) { + __ mov(rscratch1, 0); + __ str(rscratch1, frame_map()->address_for_slot(dest->single_stack_ix())); + } else { + const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false); + } + } + break; + case T_ADDRESS: + { + const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false); + } + case T_INT: + case T_FLOAT: + { + __ mov(rscratch1, c->as_jint_bits()); + __ str(rscratch1, frame_map()->address_for_slot(dest->single_stack_ix())); + } + break; + case T_LONG: + case T_DOUBLE: + { + __ mov(rscratch1, c->as_jint_lo()); + __ str(rscratch1, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + if (c->as_jint_lo() != c->as_jint_hi()) + __ mov(rscratch1, c->as_jint_hi()); + __ str(rscratch1, frame_map()->address_for_slot(dest->double_stack_ix(), + hi_word_offset_in_bytes)); + } + break; + default: + ShouldNotReachHere(); + } +} + +/* + * For now this code can load only zero constants as in aarch32. + * It seems like this implementation can break some tests in future. + * TODO: ensure, write test, and rewrite if need. + */ +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { + assert(src->is_constant(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + LIR_Address* to_addr = dest->as_address_ptr(); + + void (Assembler::* insn)(Register Rt, const Address &adr, Assembler::Condition cnd); + + __ mov(rscratch2, 0); + + int null_check_here = code_offset(); + + Address::InsnDataType idt = Address::toInsnDataType(type); + switch (type) { + case T_ADDRESS: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::str; + break; + case T_LONG: { + assert(c->as_jlong() == 0, "should be"); + insn = &Assembler::str; + Address addr = as_Address_hi(to_addr, Address::IDT_INT); + null_check_here = code_offset(); + __ str(rscratch2, addr); + idt = Address::IDT_INT; + break; + } + case T_INT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::str; + break; + case T_OBJECT: + case T_ARRAY: + assert(c->as_jobject() == 0, "should be"); + insn = &Assembler::str; + break; + case T_CHAR: + case T_SHORT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::strh; + break; + case T_BOOLEAN: + case T_BYTE: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::strb; + break; + default: + ShouldNotReachHere(); + } + + (_masm->*insn)(rscratch2, as_Address(to_addr, idt), Assembler::C_DFLT); + if (info) add_debug_info_for_null_check(null_check_here, info); +} + +void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + // move between cpu-registers + if (dest->is_single_cpu()) { + if (src->type() == T_LONG) { + // Can do LONG -> OBJECT + __ stop("investigate how \"LONG -> OBJECT\" works especially when high part is != 0"); + move_regs(src->as_register_lo(), dest->as_register()); + return; + } + assert(src->is_single_cpu(), "must match"); + if (src->type() == T_OBJECT) { + __ verify_oop(src->as_register()); + } + move_regs(src->as_register(), dest->as_register()); + + } else if (dest->is_double_cpu()) { + if (src->type() == T_OBJECT || src->type() == T_ARRAY) { + // Surprising to me but we can see move of a long to t_object + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + __ mov(dest->as_register_hi(), 0); + return; + } + assert(src->is_double_cpu(), "must match"); + Register f_lo = src->as_register_lo(); + Register f_hi = src->as_register_hi(); + Register t_lo = dest->as_register_lo(); + Register t_hi = dest->as_register_hi(); + assert(f_hi != f_lo, "must be different"); + assert(t_hi != t_lo, "must be different"); + check_register_collision(t_lo, &f_hi); + move_regs(f_lo, t_lo); + move_regs(f_hi, t_hi); + } else if (dest->is_single_fpu()) { + __ vmov_f32(dest->as_float_reg(), src->as_float_reg()); + + } else if (dest->is_double_fpu()) { + __ vmov_f64(dest->as_double_reg(), src->as_double_reg()); + + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + if (src->is_single_cpu()) { + if (type == T_ARRAY || type == T_OBJECT) { + __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); + __ verify_oop(src->as_register()); + } else { + __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); + } + + } else if (src->is_double_cpu()) { + Address dest_addr_LO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes); + __ strd(src->as_register_lo(), src->as_register_hi(), dest_addr_LO); + + } else if (src->is_single_fpu()) { + Address dest_addr = frame_map()->address_for_slot(dest->single_stack_ix()); +#ifdef __ARM_PCS_VFP + __ vstr_f32(src->as_float_reg(), dest_addr.safe_for(Address::IDT_FLOAT, _masm, rscratch1)); +#else +#error "unimplemented" +#endif + } else if (src->is_double_fpu()) { + Address dest_addr = frame_map()->address_for_slot(dest->double_stack_ix()); +#ifdef __ARM_PCS_VFP + __ vstr_f64(src->as_double_reg(), dest_addr.safe_for(Address::IDT_DOUBLE, _masm, rscratch1)); +#else +#error "unimplemented" +#endif + } else { + ShouldNotReachHere(); + } + +} + + +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + + if (type == T_ARRAY || type == T_OBJECT) { + __ verify_oop(src->as_register()); + } + + PatchingStub* patch = NULL; + if (patch_code != lir_patch_none) { + assert(to_addr->disp() != 0, "must have"); + + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + __ mov(rscratch1, (address) to_addr->disp()); + patching_epilog(patch, patch_code, to_addr->base()->as_register(), info); + + to_addr = new LIR_Address(to_addr->base(), FrameMap::rscratch1_opr, to_addr->type()); + } + + + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: { +#ifdef __ARM_PCS_VFP + Address addr = as_Address(to_addr, Address::IDT_FLOAT); + null_check_here = code_offset(); + __ vstr_f32(src->as_float_reg(), addr); +#else +#error "unimplemented" +#endif + break; + } + + case T_DOUBLE: { +#ifdef __ARM_PCS_VFP + Address addr = as_Address(to_addr, Address::IDT_DOUBLE); + null_check_here = code_offset(); + __ vstr_f64(src->as_double_reg(), addr); +#else +#error "unimplemented" +#endif + + break; + } + + case T_ARRAY: // fall through + case T_OBJECT: // fall through + case T_ADDRESS: // fall though + case T_INT: { + Address addr = as_Address(to_addr, Address::toInsnDataType(type)); + null_check_here = code_offset(); + __ str(src->as_register(), addr); + break; + } + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); +// __ str(src->as_register(), as_Address(to_addr)); + break; + + case T_LONG: { + Address addr = as_Address_lo(to_addr, Address::IDT_LONG); + null_check_here = code_offset(); + null_check_here += __ strd(src->as_register_lo(), src->as_register_hi(), addr); + break; + } + + case T_BYTE: // fall through + case T_BOOLEAN: { + Address addr = as_Address(to_addr, Address::toInsnDataType(type)); + null_check_here = code_offset(); + __ strb(src->as_register(), addr); + break; + } + case T_CHAR: // fall through + case T_SHORT: { + Address addr = as_Address(to_addr, Address::toInsnDataType(type)); + null_check_here = code_offset(); + __ strh(src->as_register(), addr); + break; + } + default: + ShouldNotReachHere(); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + assert(src->is_stack(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + if (dest->is_single_cpu()) { + if (type == T_ARRAY || type == T_OBJECT) { + __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); + __ verify_oop(dest->as_register()); + } else { + __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); + } + + } else if (dest->is_double_cpu()) { + Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes); + __ ldrd(dest->as_register_lo(), dest->as_register_hi(), src_addr_LO); + + } else if (dest->is_single_fpu()) { +#ifdef __ARM_PCS_VFP + Address src_addr = frame_map()->address_for_slot(src->single_stack_ix()); + __ vldr_f32(dest->as_float_reg(), src_addr.safe_for(Address::IDT_FLOAT, _masm, rscratch1)); +#else +#error "unimplemented" +#endif + } else if (dest->is_double_fpu()) { +#ifdef __ARM_PCS_VFP + Address src_addr = frame_map()->address_for_slot(src->double_stack_ix()); + __ vldr_f64(dest->as_double_reg(), src_addr.safe_for(Address::IDT_DOUBLE, _masm, rscratch1)); +#else +#error "unimplemented" +#endif + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { + Metadata* o = NULL; + PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id); + __ mov_metadata(reg, o); + patching_epilog(patch, lir_patch_normal, reg, info); +} + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + + LIR_Opr temp; + if (type == T_LONG || type == T_DOUBLE) + temp = FrameMap::rscratch_long_opr; + else + temp = FrameMap::rscratch1_opr; + + stack2reg(src, temp, src->type()); + reg2stack(temp, dest, dest->type(), false); +} + + +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + LIR_Address* from_addr = src->as_address_ptr(); + + if (from_addr->base()->type() == T_OBJECT) { + __ verify_oop(from_addr->base()->as_pointer_register()); + } + + PatchingStub* patch = NULL; + if (patch_code != lir_patch_none) { + assert(from_addr->disp() != 0, "must have"); + + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + __ mov(rscratch1, (address) from_addr->disp()); + patching_epilog(patch, patch_code, from_addr->base()->as_register(), info); + + from_addr = new LIR_Address(from_addr->base(), FrameMap::rscratch1_opr, from_addr->type()); + } + + int null_check_here = code_offset(); + + switch (type) { + case T_FLOAT: { +#ifdef __ARM_PCS_VFP + Address addr = as_Address(from_addr, Address::IDT_FLOAT); + null_check_here = code_offset(); + __ vldr_f32(dest->as_float_reg(), addr); +#else +#error "unimplemented" +#endif + break; + } + + case T_DOUBLE: { +#ifdef __ARM_PCS_VFP + Address addr = as_Address(from_addr, Address::IDT_DOUBLE); + null_check_here = code_offset(); + __ vldr_f64(dest->as_double_reg(), addr); +#else +#error "unimplemented" +#endif + break; + } + + case T_ARRAY: // fall through + case T_OBJECT: // fall through + case T_ADDRESS: // fall through + case T_INT: { + Address addr = as_Address(from_addr, Address::toInsnDataType(type)); + null_check_here = code_offset(); + __ ldr(dest->as_register(), addr); + break; + } + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); +// __ ldr(dest->as_register(), as_Address(from_addr)); + break; + + case T_LONG: { + Address addr = as_Address_lo(from_addr, Address::IDT_LONG); + null_check_here = code_offset(); + null_check_here += __ ldrd(dest->as_register_lo(), dest->as_register_hi(), addr); + break; + } + + case T_BYTE: { + Address addr = as_Address(from_addr, Address::IDT_BYTE); + null_check_here = code_offset(); + __ ldrsb(dest->as_register(), addr); + break; + } + case T_BOOLEAN: { + Address addr = as_Address(from_addr, Address::IDT_BOOLEAN); + null_check_here = code_offset(); + __ ldrb(dest->as_register(), addr); + break; + } + + case T_CHAR: { + Address addr = as_Address(from_addr, Address::IDT_CHAR); + null_check_here = code_offset(); + __ ldrh(dest->as_register(), addr); + break; + } + case T_SHORT: { + Address addr = as_Address(from_addr, Address::IDT_SHORT); + null_check_here = code_offset(); + __ ldrsh(dest->as_register(), addr); + break; + } + + default: + ShouldNotReachHere(); + } + + if (type == T_ARRAY || type == T_OBJECT) { + __ verify_oop(dest->as_register()); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + +void LIR_Assembler::prefetchr(LIR_Opr src) { + Unimplemented(); +} + +void LIR_Assembler::prefetchw(LIR_Opr src) { + Unimplemented(); +} + +int LIR_Assembler::array_element_size(BasicType type) const { + int elem_size = type2aelembytes(type); + return exact_log2(elem_size); +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + Register Rdividend = op->in_opr1()->as_register(); + Register Rdivisor = op->in_opr2()->as_register(); + Register Rscratch = op->in_opr3()->as_register(); + Register Rresult = op->result_opr()->as_register(); + int divisor = -1; + + /* + TODO: For some reason, using the Rscratch that gets passed in is + not possible because the register allocator does not see the tmp reg + as used, and assignes it the same register as Rdividend. We use rscratch1 + instead. + + assert(Rdividend != Rscratch, ""); + assert(Rdivisor != Rscratch, ""); + */ + + if (Rdivisor == noreg && is_power_of_2(divisor)) { + // convert division by a power of two into some shifts and logical operations + } + + assert(op->code() == lir_irem || op->code() == lir_idiv, "should be irem or idiv"); + bool want_remainder = op->code() == lir_irem; + + __ divide(Rresult, Rdividend, Rdivisor, 32, want_remainder); +} + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); +#endif + + if (op->cond() == lir_cond_always) { + if (op->info() != NULL) add_debug_info_for_branch(op->info()); + __ b(*(op->label())); + } else { + Assembler::Condition acond; + if (op->code() == lir_cond_float_branch) { + bool is_unordered = (op->ublock() == op->block()); + // Assembler::EQ does not permit unordered branches, so we add + // another branch here. Likewise, Assembler::NE does not permit + // ordered branches. + if (is_unordered && op->cond() == lir_cond_equal + || !is_unordered && op->cond() == lir_cond_notEqual) + __ b(*(op->ublock()->label()), Assembler::VS); + switch(op->cond()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = (is_unordered ? Assembler::LT : Assembler::LO); break; + case lir_cond_lessEqual: acond = (is_unordered ? Assembler::LE : Assembler::LS); break; + case lir_cond_greaterEqual: acond = (is_unordered ? Assembler::HS : Assembler::GE); break; + case lir_cond_greater: acond = (is_unordered ? Assembler::HI : Assembler::GT); break; + default: ShouldNotReachHere(); + } + } else { + switch (op->cond()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = Assembler::LT; break; + case lir_cond_greaterEqual: acond = Assembler::GE; break; + case lir_cond_lessEqual: acond = Assembler::LE; break; + case lir_cond_greater: acond = Assembler::GT; break; + case lir_cond_belowEqual: acond = Assembler::LS; break; + case lir_cond_aboveEqual: acond = Assembler::HS; break; + default: ShouldNotReachHere(); + } + if (op->type() == T_LONG) { + // a special trick here to be able to effectively compare jlongs + // for the lessEqual and greater conditions the jlong operands are swapped + // during comparison and hence should use mirror condition in conditional + // instruction + // see LIR_Assembler::comp_op and LIR_Assembler::cmove + switch (op->cond()) { + case lir_cond_lessEqual: acond = Assembler::GE; break; + case lir_cond_greater: acond = Assembler::LT; break; + } + } + } + __ b(*(op->label()), acond); + } +} + +FloatRegister LIR_Assembler::as_float_reg(LIR_Opr doubleReg) { + assert(doubleReg->is_double_fpu(), "must be f64"); + return as_FloatRegister(doubleReg->fpu_regnrLo()); +} + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + + switch (op->bytecode()) { + case Bytecodes::_i2f: + { + __ vmov_f32(dest->as_float_reg(), src->as_register()); + __ vcvt_f32_s32(dest->as_float_reg(), dest->as_float_reg()); + break; + } + case Bytecodes::_i2d: + { + __ vmov_f32(as_float_reg(dest), src->as_register()); + __ vcvt_f64_s32(dest->as_double_reg(), as_float_reg(dest)); + break; + } + case Bytecodes::_f2d: + { + __ vcvt_f64_f32(dest->as_double_reg(), src->as_float_reg()); + break; + } + case Bytecodes::_d2f: + { + __ vcvt_f32_f64(dest->as_float_reg(), src->as_double_reg()); + break; + } + case Bytecodes::_i2c: + { + __ uxth(dest->as_register(), src->as_register()); + break; + } + case Bytecodes::_i2l: + { + const Register dst_hi = dest->as_register_hi(); + const Register dst_lo = dest->as_register_lo(); + const Register src_lo = as_reg(src); + __ mov(dst_lo, src_lo); + __ asr(dst_hi, src_lo, 31); + break; + } + case Bytecodes::_i2s: + { + __ sxth(dest->as_register(), src->as_register()); + break; + } + case Bytecodes::_i2b: + { + __ sxtb(dest->as_register(), src->as_register()); + break; + } + case Bytecodes::_l2i: + { + assert(dest->is_single_cpu(), "must be single register"); + __ mov(dest->as_register(), src->as_register_lo()); + break; + } + case Bytecodes::_f2i: + { + __ vcvt_s32_f32(src->as_float_reg(), src->as_float_reg()); + __ vmov_f32(dest->as_register(), src->as_float_reg()); + break; + } + case Bytecodes::_d2i: + { + __ vcvt_s32_f64(as_float_reg(src), src->as_double_reg()); + __ vmov_f32(dest->as_register(), as_float_reg(src)); + break; + } + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + __ ldrb(rscratch1, Address(op->klass()->as_register(), + InstanceKlass::init_state_offset())); + __ cmp(rscratch1, InstanceKlass::fully_initialized); + add_debug_info_for_null_check_here(op->stub()->info()); + __ b(*op->stub()->entry(), Assembler::NE); + } + __ allocate_object(op->obj()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->header_size(), + op->object_size(), + op->klass()->as_register(), + *op->stub()->entry()); + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + Register len = as_reg(op->len()); + + if (UseSlowPath || + (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || + (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { + __ b(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + Register tmp3 = op->tmp3()->as_register(); + if (len == tmp1) { + tmp1 = tmp3; + } else if (len == tmp2) { + tmp2 = tmp3; + } else if (len == tmp3) { + // everything is ok + } else { + __ mov(tmp3, len); + } + __ allocate_array(op->obj()->as_register(), + len, + tmp1, + tmp2, + arrayOopDesc::header_size(op->type()), + array_element_size(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done) { + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ ldr(rscratch1, Address(rscratch2)); + __ cmp(recv, rscratch1); + __ b(next_test, Assembler::NE); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + __ b(*update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + __ lea(rscratch2, + Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + Address recv_addr(rscratch2); + __ ldr(rscratch1, recv_addr); + __ cbnz(rscratch1, next_test); + __ str(recv, recv_addr); + __ mov(rscratch1, DataLayout::counter_increment); + __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ str(rscratch1, Address(rscratch2)); + __ b(*update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { + // we always need a stub for the failure case. + CodeStub* stub = op->stub(); + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + ciKlass* k = op->klass(); + Register Rtmp1 = noreg; + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + if (op->should_profile()) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + Label profile_cast_success, profile_cast_failure; + Label *success_target = op->should_profile() ? &profile_cast_success : success; + Label *failure_target = op->should_profile() ? &profile_cast_failure : failure; + + if (obj == k_RInfo) { + k_RInfo = dst; + } else if (obj == klass_RInfo) { + klass_RInfo = dst; + } + if (k->is_loaded()) { + select_different_registers(obj, dst, k_RInfo, klass_RInfo); + } else { + Rtmp1 = op->tmp3()->as_register(); + select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); + } + + assert_different_registers(obj, k_RInfo, klass_RInfo); + + if (op->should_profile()) { + Label not_null; + __ cbnz(obj, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::DataLayout::header_offset()), + LogBytesPerWord); + int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant()); + __ ldr(rscratch1, data_addr); + __ orr(rscratch1, rscratch1, header_bits); + __ str(rscratch1, data_addr); + __ b(*obj_is_null); + __ bind(not_null); + } else { + __ cbz(obj, *obj_is_null); + } + + if (!k->is_loaded()) { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } else { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } + __ verify_oop(obj); + + if (op->fast_check()) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(rscratch1, obj); + __ cmp( rscratch1, k_RInfo); + + __ b(*failure_target, Assembler::NE); + // successful cast, fall through to profile or jump + } else { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit + __ ldr(rscratch1, Address(klass_RInfo, long(k->super_check_offset()))); + __ cmp(k_RInfo, rscratch1); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ b(*failure_target, Assembler::NE); + // successful cast, fall through to profile or jump + } else { + // See if we get an immediate positive hit + __ b(*success_target, Assembler::EQ); + // check for self + __ cmp(klass_RInfo, k_RInfo); + __ b(*success_target, Assembler::EQ); + + __ push(klass_RInfo); + __ push(k_RInfo); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize))); + + // result is a boolean + __ cbz(klass_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } else { + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ push(klass_RInfo); + __ push(k_RInfo); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldr(k_RInfo, Address(__ post(sp, 2 * wordSize))); + + // result is a boolean + __ cbz(k_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, obj); + Label update_done; + type_profile_helper(mdo, md, data, recv, success); + __ b(*success); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, CounterData::count_offset()), + LogBytesPerWord); + __ ldr(rscratch1, counter_addr); + __ sub(rscratch1, rscratch1, DataLayout::counter_increment); + __ str(rscratch1, counter_addr); + __ b(*failure); + } + __ b(*success); +} + + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + LIR_Code code = op->code(); + if (code == lir_store_check) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + + CodeStub* stub = op->stub(); + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + if (op->should_profile()) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = op->should_profile() ? &profile_cast_success : &done; + Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry(); + + if (op->should_profile()) { + Label not_null; + __ cbnz(value, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::header_offset()), + LogBytesPerInt); + int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant()); + __ ldr(rscratch1, data_addr); + __ orr(rscratch1, rscratch1, header_bits); + __ str(rscratch1, data_addr); + __ b(done); + __ bind(not_null); + } else { + __ cbz(value, done); + } + + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + + // get instance klass (it's already uncompressed) + __ ldr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ push(klass_RInfo); + __ push(k_RInfo); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldr(k_RInfo, Address(__ post(sp, 2 * wordSize))); + // result is a boolean + __ cbz(k_RInfo, *failure_target); + // fall through to the success case + + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, value); + type_profile_helper(mdo, md, data, recv, &done); + __ b(done); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ lea(rscratch2, counter_addr); + __ ldr(rscratch1, Address(rscratch2)); + __ sub(rscratch1, rscratch1, DataLayout::counter_increment); + __ str(rscratch1, Address(rscratch2)); + __ b(*stub->entry()); + } + + __ bind(done); + } else if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + if (dst != obj) { + __ mov(dst, obj); + } + } else if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ mov(dst, 0); + __ b(done); + __ bind(success); + __ mov(dst, 1); + __ bind(done); + } else { + ShouldNotReachHere(); + } +} + +// TODO: reuse masm cmpxchgw +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, Register result) { + assert(newval != cmpval, "must be different"); + Label retry_load, nope; + // flush and load exclusive from the memory location + // and fail if it is not what we expect + __ bind(retry_load); + __ ldrex(result, addr); + __ cmp(result, cmpval); + __ mov(result, 1, Assembler::NE); + __ b(nope, Assembler::NE); + // if we store+flush with no intervening write rscratch1 wil be zero + __ strex(result, newval, addr); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + __ cbnz(result, retry_load); + __ membar(__ AnyAny); + __ bind(nope); +} + +void LIR_Assembler::casl(Register addr, Register newval_lo, Register newval_hi, Register cmpval_lo, Register cmpval_hi, Register tmp_lo, Register tmp_hi, Register result) { + assert(newval_lo->successor() == newval_hi, "must be contiguous"); + assert(tmp_lo->successor() == tmp_hi, "must be contiguous"); + assert(tmp_lo->encoding_nocheck() % 2 == 0, "Must be an even register"); + assert_different_registers(newval_lo, newval_hi, cmpval_lo, cmpval_hi, tmp_lo, tmp_hi); + + Label retry_load, nope; + // flush and load exclusive from the memory location + // and fail if it is not what we expect + __ bind(retry_load); + __ mov(result, 1); + __ ldrexd(tmp_lo, addr); + __ cmp(tmp_lo, cmpval_lo); + __ b(nope, Assembler::NE); + __ cmp(tmp_hi, cmpval_hi); + __ b(nope, Assembler::NE); + // if we store+flush with no intervening write rscratch1 wil be zero + __ strexd(result, newval_lo, addr); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + __ cbnz(result, retry_load); + __ membar(__ AnyAny); + __ bind(nope); +} + + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + Register addr = as_reg(op->addr()); + Register result = as_reg(op->result_opr()); + if (op->code() == lir_cas_obj || op->code() == lir_cas_int) { + Register newval = as_reg(op->new_value()); + Register cmpval = as_reg(op->cmp_value()); + casw(addr, newval, cmpval, result); + } else if (op->code() == lir_cas_long){ + Register newval_lo = op->new_value()->as_register_lo(); + Register newval_hi = op->new_value()->as_register_hi(); + Register cmpval_lo = op->cmp_value()->as_register_lo(); + Register cmpval_hi = op->cmp_value()->as_register_hi(); + Register tmp_lo = op->tmp1()->as_register_lo(); + Register tmp_hi = op->tmp1()->as_register_hi(); + casl(addr, newval_lo, newval_hi, cmpval_lo, cmpval_hi, tmp_lo, tmp_hi, result); + } else { + ShouldNotReachHere(); + } +} + +static void patch_condition(address start_insn, address end_insn, Assembler::Condition cond) { + for (uint32_t* insn_p = (uint32_t*) start_insn; (address) insn_p < end_insn; ++insn_p) { + uint32_t insn = *insn_p; + assert((insn >> 28) == Assembler::AL, "instructions in patch" + " should allow conditional form and be in ALWAYS condition"); + *insn_p = (insn & 0x0fffffff) | (cond << 28); + } +} + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { + + Assembler::Condition acond, ncond; + switch (condition) { + case lir_cond_equal: acond = Assembler::EQ; ncond = Assembler::NE; break; + case lir_cond_notEqual: acond = Assembler::NE; ncond = Assembler::EQ; break; + case lir_cond_less: acond = Assembler::LT; ncond = Assembler::GE; break; + case lir_cond_greaterEqual: acond = Assembler::GE; ncond = Assembler::LT; break; + case lir_cond_lessEqual: acond = Assembler::LE; ncond = Assembler::GT; break; + case lir_cond_greater: acond = Assembler::GT; ncond = Assembler::LE; break; + case lir_cond_belowEqual: Unimplemented(); break; + case lir_cond_aboveEqual: Unimplemented(); break; + default: ShouldNotReachHere(); + } + if (type == T_LONG) { + // for the lessEqual and greater conditions the jlong operands are swapped + // during comparison and hence should use mirror condition in conditional + // instruction. see comp_op()) + switch (condition) { + case lir_cond_lessEqual: acond = Assembler::GE; ncond = Assembler::LT; break; + case lir_cond_greater: acond = Assembler::LT; ncond = Assembler::GE; break; + } + } + + address true_instrs = __ pc(); + if (opr1->is_cpu_register()) { + reg2reg(opr1, result); + } else if (opr1->is_stack()) { + stack2reg(opr1, result, result->type()); + } else if (opr1->is_constant()) { + const2reg(opr1, result, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } + patch_condition(true_instrs, __ pc(), acond); + + address false_instrs = __ pc(); + if (opr2->is_cpu_register()) { + reg2reg(opr2, result); + } else if (opr2->is_stack()) { + stack2reg(opr2, result, result->type()); + } else if (opr2->is_constant()) { + const2reg(opr2, result, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } + patch_condition(false_instrs, __ pc(), ncond); +} + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + + if (left->is_single_cpu()) { + Register lreg = left->as_register(); + Register dreg = as_reg(dest); + + if (right->is_single_cpu()) { + // cpu register - cpu register + + assert((left->type() == T_INT || left->type() == T_OBJECT) + && right->type() == T_INT + && dest->type() == T_INT, + "should be"); + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ add (dest->as_register(), lreg, rreg); break; + case lir_sub: __ sub (dest->as_register(), lreg, rreg); break; + case lir_mul: __ mul (dest->as_register(), lreg, rreg); break; + default: ShouldNotReachHere(); + } + + } else if (right->is_double_cpu()) { + ShouldNotReachHere(); // for obj+long op the generator casts long to int before invoking add + } else if (right->is_constant()) { + // cpu register - constant + jint c = right->as_constant_ptr()->as_jint(); + + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c == 0 && dreg == lreg) { + COMMENT("effective nop elided"); + return; + } + + if (Assembler::operand_valid_for_add_sub_immediate(c)) { + switch (code) { + case lir_add: __ add(dreg, lreg, c); break; + case lir_sub: __ sub(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } + } else { + __ mov(rscratch1, c); + switch (code) { + case lir_add: __ add(dreg, lreg, rscratch1); break; + case lir_sub: __ sub(dreg, lreg, rscratch1); break; + default: ShouldNotReachHere(); + } + } + } else { + ShouldNotReachHere(); + } + + } else if (left->is_double_cpu()) { + Register lreg_lo = left->as_register_lo(); + Register lreg_hi = left->as_register_hi(); + + if (right->is_double_cpu()) { + // cpu register - cpu register + Register rreg_lo = right->as_register_lo(); + Register rreg_hi = right->as_register_hi(); + Register dreg_lo = dest->as_register_lo(); + Register dreg_hi = dest->as_register_hi(); + if (code == lir_add || code == lir_sub) { + check_register_collision(dreg_lo, &lreg_hi, &rreg_hi); + } + switch (code) { + case lir_add: __ adds (dreg_lo, lreg_lo, rreg_lo); + __ adc (dreg_hi, lreg_hi, rreg_hi); break; + case lir_sub: __ subs (dreg_lo, lreg_lo, rreg_lo); + __ sbc (dreg_hi, lreg_hi, rreg_hi); break; + case lir_mul: __ mult_long (dreg_lo, dreg_hi, + lreg_lo, lreg_hi, rreg_lo, rreg_hi); break; + default: + ShouldNotReachHere(); + } + + } else if (right->is_constant()) { + const jint c_lo = right->as_constant_ptr()->as_jint_lo_bits(); + const jint c_hi = right->as_constant_ptr()->as_jint_hi_bits(); + const Register dreg_lo = dest->as_register_lo(); + const Register dreg_hi = dest->as_register_hi(); + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c_lo == 0 && c_hi == 0 && dreg_lo == lreg_lo && dreg_hi == lreg_hi) { + COMMENT("effective nop elided"); + return; + } + check_register_collision(dreg_lo, &lreg_hi, NULL, rscratch2); + switch (code) { + case lir_add: + if (Assembler::operand_valid_for_add_sub_immediate(c_lo)) + __ adds(dreg_lo, lreg_lo, c_lo); + else { + __ mov(rscratch1, c_lo); + __ adds(dreg_lo, lreg_lo, rscratch1); + } + if (Assembler::operand_valid_for_add_sub_immediate(c_hi)) + __ adc(dreg_hi, lreg_hi, c_hi); + else { + __ mov(rscratch1, c_hi); + __ adc(dreg_lo, lreg_hi, rscratch1); + } + break; + case lir_sub: + if (Assembler::operand_valid_for_add_sub_immediate(c_lo)) + __ subs(dreg_lo, lreg_lo, c_lo); + else { + __ mov(rscratch1, c_lo); + __ subs(dreg_lo, lreg_lo, rscratch1); + } + if (Assembler::operand_valid_for_add_sub_immediate(c_hi)) + __ sbc(dreg_hi, lreg_hi, c_hi); + else { + __ mov(rscratch1, c_hi); + __ sbc(dreg_hi, lreg_hi, rscratch1); + } + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + } else if (left->is_single_fpu()) { + assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); + switch (code) { + case lir_add: __ vadd_f32 (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ vsub_f32 (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_mul: __ vmul_f32 (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_div: __ vdiv_f32 (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); + } + } else if (left->is_double_fpu()) { + if (right->is_double_fpu()) { + // cpu register - cpu register + switch (code) { + case lir_add: __ vadd_f64 (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ vsub_f64 (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_mul: __ vmul_f64 (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_div: __ vdiv_f64 (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); + } + } else { + if (right->is_constant()) { + ShouldNotReachHere(); + } + ShouldNotReachHere(); + } + } else if (left->is_single_stack() || left->is_address()) { + assert(left == dest, "left and dest must be equal"); + ShouldNotReachHere(); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { + switch(code) { + case lir_abs : __ vabs_f64(dest->as_double_reg(), value->as_double_reg()); break; + case lir_sqrt: __ vsqrt_f64(dest->as_double_reg(), value->as_double_reg()); break; + default : ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { + + assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); + Register Rleft = left->is_single_cpu() ? left->as_register() : + left->as_register_lo(); + if (dst->is_single_cpu()) { + Register Rdst = dst->as_register(); + if (right->is_constant()) { + switch (code) { + case lir_logic_and: __ andr (Rdst, Rleft, right->as_jint()); break; + case lir_logic_or: __ orr (Rdst, Rleft, right->as_jint()); break; + case lir_logic_xor: __ eor (Rdst, Rleft, right->as_jint()); break; + default: ShouldNotReachHere(); break; + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : + right->as_register_lo(); + switch (code) { + case lir_logic_and: __ andr (Rdst, Rleft, Rright); break; + case lir_logic_or: __ orr (Rdst, Rleft, Rright); break; + case lir_logic_xor: __ eor (Rdst, Rleft, Rright); break; + default: ShouldNotReachHere(); break; + } + } + } else { + assert(dst->is_double_cpu(), "mismatched logic op operand size"); + const Register Rdst_lo = dst->as_register_lo(); + const Register Rdst_hi = dst->as_register_hi(); + Register Rleft_hi = left->as_register_hi(); + if (right->is_constant()) { + // LIR generator enforces jlong constants to be valid_immediate12 + // so we know they fit into 32-bit int + switch (code) { + case lir_logic_and: __ andr (Rdst_lo, Rleft, (int)right->as_jlong()); break; + case lir_logic_or: __ orr (Rdst_lo, Rleft, (int)right->as_jlong()); break; + case lir_logic_xor: __ eor (Rdst_lo, Rleft, (int)right->as_jlong()); break; + default: ShouldNotReachHere(); break; + } + } else { + assert(right->is_double_cpu(), "mismatched logic op operand size"); + Register Rright_lo = right->as_register_lo(); + Register Rright_hi = right->as_register_hi(); + check_register_collision(Rdst_lo, &Rleft_hi, &Rright_hi); + switch (code) { + case lir_logic_and: __ andr (Rdst_lo, Rleft, Rright_lo); + __ andr (Rdst_hi, Rleft_hi, Rright_hi); break; + case lir_logic_or: __ orr (Rdst_lo, Rleft, Rright_lo); + __ orr (Rdst_hi, Rleft_hi, Rright_hi); break; + case lir_logic_xor: __ eor (Rdst_lo, Rleft, Rright_lo); + __ eor (Rdst_hi, Rleft_hi, Rright_hi); break; + default: ShouldNotReachHere(); break; + } + } + } +} + + + +void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { Unimplemented(); } + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { + if (opr1->is_single_cpu()) { + Register reg1 = as_reg(opr1); + if (opr2->is_single_cpu()) { + // cpu register - cpu register + Register reg2 = opr2->as_register(); + __ cmp(reg1, reg2); + } else if (opr2->is_constant()) { + LIR_Const* c = opr2->as_constant_ptr(); + if (c->type() == T_INT) { + __ cmp(reg1, c->as_jint(), rscratch1, Assembler::C_DFLT); + } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) { + jobject o = c->as_jobject(); + if (o == NULL) { + __ cmp(reg1, (int32_t)NULL_WORD); + } else { + __ movoop(rscratch1, o); + __ cmpptr(reg1, rscratch1); + } + } else { + fatal(err_msg("unexpected type: %s", basictype_to_str(c->type()))); + } + } else if (opr2->is_address()) { + __ ldr(rscratch2, as_Address(opr2->as_address_ptr(), rscratch1, Address::IDT_INT)); + __ cmp(reg1, rscratch2); + } else { + ShouldNotReachHere(); + } + + } else if (opr1->is_double_cpu()) { + assert(opr1->type() == T_LONG, "expect jlong type"); + assert(opr2->type() == T_LONG, "expect jlong type"); + Register xlo = opr1->as_register_lo(); + Register xhi = opr1->as_register_hi(); + if (opr2->is_double_cpu()) { + // cpu register - cpu register + Register ylo = opr2->as_register_lo(); + Register yhi = opr2->as_register_hi(); + switch (condition) { + case lir_cond_equal: + case lir_cond_notEqual: + case lir_cond_belowEqual: + case lir_cond_aboveEqual: + // these need APSR.ZC. the ops below set them correctly (but not APSR.V) + __ cmp(xhi, yhi); + __ cmp(xlo, ylo, Assembler::EQ); + break; + case lir_cond_less: + case lir_cond_greaterEqual: + __ cmp(xlo, ylo); + __ sbcs(rscratch1, xhi, yhi); + break; + case lir_cond_lessEqual: + case lir_cond_greater: + // here goes a trick: the below operations do not produce the valid + // value for the APSR.Z flag and there is no easy way to set it. so + // we exchange the order of arguments in the comparison and use the + // opposite condition in the conditional statement that follows. + // GE should be used instead of LE and LT in place of GT. + // the comp_op() could only be followed by: emit_opBranch(), cmove() and + // emit_assert(). these are patched to be aware of this trick + __ cmp(ylo, xlo); + __ sbcs(rscratch1, yhi, xhi); + break; + } + } else if (opr2->is_constant()) { + jlong y = opr2->as_jlong(); + assert(Assembler::operand_valid_for_add_sub_immediate(y), "immediate overflow"); + switch (condition) { + case lir_cond_equal: + case lir_cond_notEqual: + case lir_cond_belowEqual: + case lir_cond_aboveEqual: + __ cmp(xhi, (int)(y >> 32)); + __ cmp(xlo, (int)y, Assembler::EQ); + break; + case lir_cond_less: + case lir_cond_greaterEqual: + __ cmp(xlo, (int)y); + __ sbcs(rscratch1, xhi, (int)(y >> 32)); + break; + case lir_cond_lessEqual: + case lir_cond_greater: + __ rsbs(rscratch1, xlo, (int)y); + __ rscs(rscratch1, xhi, (int)(y >> 32)); + break; + } + } else { + ShouldNotReachHere(); + } + } else if (opr1->is_single_fpu()) { + FloatRegister reg1 = opr1->as_float_reg(); + assert(opr2->is_single_fpu(), "expect single float register"); + FloatRegister reg2 = opr2->as_float_reg(); + __ vcmp_f32(reg1, reg2); + __ get_fpsr(); + } else if (opr1->is_double_fpu()) { + FloatRegister reg1 = opr1->as_double_reg(); + assert(opr2->is_double_fpu(), "expect double float register"); + FloatRegister reg2 = opr2->as_double_reg(); + __ vcmp_f64(reg1, reg2); + __ get_fpsr(); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + bool is_unordered_less = (code == lir_ucmp_fd2i); + if (left->is_single_fpu()) { + __ float_cmp(true, is_unordered_less ? -1 : 1, left->as_float_reg(), right->as_float_reg(), dst->as_register()); + } else if (left->is_double_fpu()) { + __ float_cmp(false, is_unordered_less ? -1 : 1, left->as_double_reg(), right->as_double_reg(), dst->as_register()); + } else { + ShouldNotReachHere(); + } + } else if (code == lir_cmp_l2i) { + __ mov(dst->as_register(), 1); + __ subs(rscratch1, left->as_register_lo(), right->as_register_lo()); + __ sbc(rscratch2, left->as_register_hi(), right->as_register_hi()); + __ orrs(rscratch1, rscratch1, rscratch2); + __ mov(dst->as_register(), -1, Assembler::MI); + __ mov(dst->as_register(), 0, Assembler::EQ); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::align_call(LIR_Code code) { } + + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + __ trampoline_call(Address(op->addr(), rtype)); + add_call_info(code_offset(), op->info()); +} + + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + __ ic_call(op->addr()); + add_call_info(code_offset(), op->info()); +} + + +/* Currently, vtable-dispatch is only enabled for sparc platforms */ +void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { + ShouldNotReachHere(); +} + + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + address stub = __ start_a_stub(call_stub_size); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + + __ relocate(static_stub_Relocation::spec(call_pc)); + __ mov_metadata(rmethod, (Metadata*)NULL); + __ movptr(rscratch1, 0); + __ b(rscratch1); + + assert(__ offset() - start <= call_stub_size, "stub too big"); + __ end_a_stub(); +} + + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == r0, "must match"); + assert(exceptionPC->as_register() == r3, "must match"); + + // exception object is not added to oop map by LinearScan + // (LinearScan assumes that no oops are in fixed registers) + info->add_register_oop(exceptionOop); + Runtime1::StubID unwind_id; + + // get current pc information + // pc is only needed if the method has an exception handler, the unwind code does not need it. + int pc_for_athrow_offset = __ offset(); + __ add(exceptionPC->as_register(), r15_pc, -8); + add_call_info(pc_for_athrow_offset, info); // for exception handler + + __ verify_not_null_oop(r0); + // search an exception handler (r0: exception oop, r3: throwing pc) + if (compilation()->has_fpu_code()) { + unwind_id = Runtime1::handle_exception_id; + } else { + unwind_id = Runtime1::handle_exception_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id))); + + // FIXME: enough room for two byte trap ???? + __ nop(); +} + + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == r0, "must match"); + + __ b(_unwind_handler_entry); +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + + switch (left->type()) { + case T_INT: + case T_ADDRESS: + case T_OBJECT: + __ andr(rscratch1, count->as_register(), 0x1f); + switch (code) { + case lir_shl: __ lsl(dreg, lreg, rscratch1); break; + case lir_shr: __ asr(dreg, lreg, rscratch1); break; + case lir_ushr: __ lsr(dreg, lreg, rscratch1); break; + default: + ShouldNotReachHere(); + break; + } + break; + case T_LONG: + { + Register lreg_hi = left->as_register_hi(); + Register dreg_hi = dest->as_register_hi(); + const int word_bits = 8 * wordSize; + + if (code == lir_shl || code == lir_ushr) { + check_register_collision(dreg, &lreg, &lreg_hi, rscratch1); + check_register_collision(dreg_hi, &lreg, &lreg_hi, rscratch2); + } + + switch (code) { + case lir_shl: + __ andr(dreg, count->as_register(), 0x3f); + __ sub(dreg_hi, dreg, word_bits); + __ lsl(lreg_hi, lreg_hi, dreg); + __ orr(lreg_hi, lreg_hi, lreg, lsl(dreg_hi)); + __ rsb(dreg_hi, dreg, word_bits); + __ orr(dreg_hi, lreg_hi, lreg, lsr(dreg_hi)); + __ lsl(dreg, lreg, dreg); + break; + case lir_shr: { + __ mov(rscratch2, lreg_hi); + __ andr(rscratch1, count->as_register(), 0x3f); + __ lsr(dreg, lreg, rscratch1); + __ rsb(dreg_hi, rscratch1, word_bits); + __ orr(dreg, dreg, rscratch2, lsl(dreg_hi)); + __ asr(dreg_hi, rscratch2, rscratch1); + __ subs(rscratch1, rscratch1, word_bits); + __ mov(dreg, rscratch2, asr(rscratch1), Assembler::GT); + } + break; + case lir_ushr: + __ andr(dreg, count->as_register(), 0x3f); + __ lsr(lreg, lreg, dreg); + __ rsb(dreg_hi, dreg, word_bits); + __ orr(lreg, lreg, lreg_hi, lsl(dreg_hi)); + __ lsr(dreg_hi, lreg_hi, dreg); + __ sub(dreg, dreg, word_bits); + __ orr(dreg, lreg, lreg_hi, lsr(dreg)); + break; + default: + ShouldNotReachHere(); + break; + } + } + break; + default: + ShouldNotReachHere(); + break; + } +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + + if (!count) { + reg2reg(left, dest); + return; + } + + switch (left->type()) { + case T_INT: + case T_ADDRESS: + case T_OBJECT: + switch (code) { + case lir_shl: __ lsl(dreg, lreg, count); break; + case lir_shr: __ asr(dreg, lreg, count); break; + case lir_ushr: __ lsr(dreg, lreg, count); break; + default: + ShouldNotReachHere(); + break; + } + break; + case T_LONG: { + Register lreg_hi = left->as_register_hi(); + Register dreg_hi = dest->as_register_hi(); + const int word_bits = 8 * wordSize; + + switch (code) { + case lir_shl: + if (count >= word_bits) { + __ lsl(dreg_hi, lreg, count - word_bits); + __ mov(dreg, 0); + } else { + check_register_collision(dreg_hi, &lreg); + __ lsl(dreg_hi, lreg_hi, count); + __ orr(dreg_hi, dreg_hi, lreg, lsr(word_bits - count)); + __ lsl(dreg, lreg, count); + } + break; + case lir_shr: + if (count >= word_bits) { + __ asr(dreg, lreg_hi, count - word_bits); + __ asr(dreg_hi, lreg_hi, word_bits); + } else { + check_register_collision(dreg, &lreg_hi); + __ lsr(dreg, lreg, count); + __ orr(dreg, dreg, lreg_hi, lsl(word_bits - count)); + __ asr(dreg_hi, lreg_hi, count); + } + break; + case lir_ushr: + if (count >= word_bits) { + __ lsr(dreg, lreg_hi, count - word_bits); + __ mov(dreg_hi, 0); + } else { + check_register_collision(dreg, &lreg_hi); + __ lsr(dreg, lreg, count); + __ orr(dreg, dreg, lreg_hi, lsl(word_bits - count)); + __ lsr(dreg_hi, lreg_hi, count); + } + break; + default: + ShouldNotReachHere(); + break; + } + } + break; + default: + ShouldNotReachHere(); + break; + } +} + + +void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ str (r, Address(sp, offset_from_sp_in_bytes)); +} + + +void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ mov (rscratch1, c); + __ str (rscratch1, Address(sp, offset_from_sp_in_bytes)); +} + +// This code replaces a call to arraycopy; no exception may +// be thrown in this code, they must be thrown in the System.arraycopy +// activation frame; we could save some checks if this would not be the case +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + ciArrayKlass* default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + // due to limited number of registers available and in order to simplify + // the code we fix the registers used by the arguments to this intrinsic. + // see the comment in LIRGenerator::do_ArrayCopy + assert(src == j_rarg0, "assumed by implementation"); + assert(src_pos == j_rarg1, "assumed by implementation"); + assert(dst == j_rarg2, "assumed by implementation"); + assert(dst_pos == j_rarg3, "assumed by implementation"); + assert(length == r4, "assumed by implementation"); + assert(tmp == r5, "assumed by implementation"); + + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (basic_type == T_ARRAY) basic_type = T_OBJECT; + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL // || basic_type == T_OBJECT + ) { + Label done; + assert(src == r1 && src_pos == r2, "mismatch in calling convention"); + + // Save the arguments in case the generic arraycopy fails and we + // have to fall back to the JNI stub + __ str(dst, Address(sp, 0*BytesPerWord)); + __ str(dst_pos, Address(sp, 1*BytesPerWord)); + __ str(length, Address(sp, 2*BytesPerWord)); + __ str(src_pos, Address(sp, 3*BytesPerWord)); + __ str(src, Address(sp, 4*BytesPerWord)); + + address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy); + address copyfunc_addr = StubRoutines::generic_arraycopy(); + + // The arguments are in java calling convention so we shift them + // to C convention + assert(c_rarg0 == j_rarg3, "assumed in the code below"); + __ mov(rscratch1, c_rarg0); + assert_different_registers(c_rarg0, j_rarg1, j_rarg2); + __ mov(c_rarg0, j_rarg0); + assert_different_registers(c_rarg1, j_rarg2, j_rarg3); + __ mov(c_rarg1, j_rarg1); + assert_different_registers(c_rarg2, j_rarg3); + __ mov(c_rarg2, j_rarg2); + __ mov(c_rarg3, rscratch1); + __ str(length, Address(sp)); // the below C function follows C calling convention, + // so should put 5th arg to stack + + if (copyfunc_addr == NULL) { // Use C version if stub was not generated + __ mov(rscratch1, RuntimeAddress(C_entry)); + __ bl(rscratch1); + } else { +#ifndef PRODUCT + if (PrintC1Statistics) { + __ increment(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); + } +#endif + __ far_call(RuntimeAddress(copyfunc_addr)); + } + + __ cbz(r0, *stub->continuation()); + + // Reload values from the stack so they are where the stub + // expects them. + __ ldr(dst, Address(sp, 0*BytesPerWord)); + __ ldr(dst_pos, Address(sp, 1*BytesPerWord)); + __ ldr(length, Address(sp, 2*BytesPerWord)); + __ ldr(src_pos, Address(sp, 3*BytesPerWord)); + __ ldr(src, Address(sp, 4*BytesPerWord)); + + if (copyfunc_addr != NULL) { + // r0 is -1^K where K == partial copied count + __ inv(rscratch1, r0); + // adjust length down and src/end pos up by partial copied count + __ sub(length, length, rscratch1); + __ add(src_pos, src_pos, rscratch1); + __ add(dst_pos, dst_pos, rscratch1); + } + __ b(*stub->entry()); + + __ bind(*stub->continuation()); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); + + int elem_size = type2aelembytes(basic_type); + int scale = exact_log2(elem_size); + + Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); + Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); + Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); + Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); + + // test for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + __ cbz(src, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ cbz(dst, *stub->entry()); + } + + // check if negative + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + __ cmp(src_pos, 0); + __ b(*stub->entry(), Assembler::LT); + } + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + __ cmp(dst_pos, 0); + __ b(*stub->entry(), Assembler::LT); + } + + if (flags & LIR_OpArrayCopy::length_positive_check) { + __ cmp(length, 0); + __ b(*stub->entry(), Assembler::LT); + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ add(tmp, src_pos, length); + __ ldr(rscratch1, src_length_addr); + __ cmp(tmp, rscratch1); + __ b(*stub->entry(), Assembler::HI); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ add(tmp, dst_pos, length); + __ ldr(rscratch1, dst_length_addr); + __ cmp(tmp, rscratch1); + __ b(*stub->entry(), Assembler::HI); + } + + // FIXME: The logic in LIRGenerator::arraycopy_helper clears + // length_positive_check if the source of our length operand is an + // arraylength. However, that arraylength might be zero, and the + // stub that we're about to call contains an assertion that count != + // 0 . So we make this check purely in order not to trigger an + // assertion failure. + __ cbz(length, *stub->continuation()); + + if (flags & LIR_OpArrayCopy::type_check) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + __ ldr(tmp, src_klass_addr); + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + __ b(*stub->entry(), Assembler::NE); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + Label cont, slow; + + __ push(RegSet::of(src, dst), sp); + + __ load_klass(src, src); + __ load_klass(dst, dst); + + __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); + + __ push(src); // sub + __ push(dst); // super + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + // result on TOS + __ pop(src); // result + __ pop(dst); + + __ cbnz(src, cont); + + __ bind(slow); + __ pop(RegSet::of(src, dst), sp); + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + if (copyfunc_addr != NULL) { // use stub if available + // src is not a sub class of dst so we have to do a + // per-element check. + + int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + Address klass_lh_addr(tmp, lh_offset); + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ ldr(rscratch1, klass_lh_addr); + __ mov(rscratch2, objArray_lh); + __ eor(rscratch1, rscratch1, rscratch2); + __ cbnz(rscratch1, *stub->entry()); + } + + // Spill because stubs can use any register they like and it's + // easier to restore just those that we care about. + __ str(dst, Address(sp, 0*BytesPerWord)); + __ str(dst_pos, Address(sp, 1*BytesPerWord)); + __ str(length, Address(sp, 2*BytesPerWord)); + __ str(src_pos, Address(sp, 3*BytesPerWord)); + __ str(src, Address(sp, 4*BytesPerWord)); + + assert(dst_pos == r0, "assumed in the code below"); + __ mov(rscratch1, dst_pos); // save dst_pos which is r0 + __ lea(c_rarg0, Address(src, src_pos, lsl(scale))); + __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg0, dst, length); + __ lea(c_rarg1, Address(dst, rscratch1, lsl(scale))); + __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg1, dst, length); + + __ load_klass(c_rarg2, dst); + __ ldr(c_rarg2, Address(c_rarg2, ObjArrayKlass::element_klass_offset())); + __ ldr(c_rarg3, Address(c_rarg2, Klass::super_check_offset_offset())); + __ far_call(RuntimeAddress(copyfunc_addr)); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ cbnz(r0, failed); + __ increment(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt)); + __ bind(failed); + } +#endif + + __ cbz(r0, *stub->continuation()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ increment(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt)); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, rscratch1); + __ mov(rscratch1, r0); + + // Restore previously spilled arguments + __ ldr(dst, Address(sp, 0*BytesPerWord)); + __ ldr(dst_pos, Address(sp, 1*BytesPerWord)); + __ ldr(length, Address(sp, 2*BytesPerWord)); + __ ldr(src_pos, Address(sp, 3*BytesPerWord)); + __ ldr(src, Address(sp, 4*BytesPerWord)); + + // return value is -1^K where K is partial copied count + __ mvn(rscratch1, rscratch1); + // adjust length down and src/end pos up by partial copied count + __ sub(length, length, rscratch1); + __ add(src_pos, src_pos, rscratch1); + __ add(dst_pos, dst_pos, rscratch1); + } + + __ b(*stub->entry()); + + __ bind(cont); + __ pop(RegSet::of(src, dst), sp); + } + } + +#ifdef ASSERT + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + Label known_ok, halt; + __ mov_metadata(tmp, default_type->constant_encoding()); + + if (basic_type != T_OBJECT) { + + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + __ b(halt, Assembler::NE); + __ ldr(rscratch1, src_klass_addr); + __ cmp(tmp, rscratch1); + __ b(known_ok, Assembler::EQ); + } else { + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + __ b(known_ok, Assembler::EQ); + __ cmp(src, dst); + __ b(known_ok, Assembler::EQ); + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +#endif + + assert(dst_pos == r0, "assumed in the code below"); + __ mov(rscratch1, dst_pos); // save r0 + __ lea(c_rarg0, Address(src, src_pos, lsl(scale))); + __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg0, dst, rscratch1, length); + __ lea(c_rarg1, Address(dst, rscratch1, lsl(scale))); + __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg1, dst, length); + __ mov(c_rarg2, length); + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb) { + __ far_call(RuntimeAddress(entry)); + } else { + __ call_VM_leaf(entry, 3); + } + + __ bind(*stub->continuation()); +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + if (!UseFastLocking) { + __ b(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + Register scratch = noreg; + if (UseBiasedLocking) { + scratch = op->scratch_opr()->as_register(); + } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible + int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } + // done + } else if (op->code() == lir_unlock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + Unimplemented(); + } + __ bind(*op->stub()->continuation()); +} + + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + Bytecodes::Code bc = method->java_code_at_bci(bci); + const bool callee_is_static = callee->is_loaded() && callee->is_static(); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) && + !callee_is_static && // required for optimized MH invokes + C1ProfileVirtualCalls) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(rscratch1, known_klass->constant_encoding()); + __ lea(rscratch2, recv_addr); + __ str(rscratch1, Address(rscratch2)); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ addptr(counter_addr, DataLayout::counter_increment); + + __ bind(update_done); + } + } else { + // Static call + __ addptr(counter_addr, DataLayout::counter_increment); + } +} + + +void LIR_Assembler::emit_delay(LIR_OpDelay*) { + Unimplemented(); +} + + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { + __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { + assert(op->crc()->is_single_cpu(), "crc must be register"); + assert(op->val()->is_single_cpu(), "byte value must be register"); + assert(op->result_opr()->is_single_cpu(), "result must be register"); + Register crc = op->crc()->as_register(); + Register val = op->val()->as_register(); + Register res = op->result_opr()->as_register(); + + assert_different_registers(val, crc, res); + __ lea(res, ExternalAddress(StubRoutines::crc_table_addr())); + + __ inv(crc, crc); + __ update_byte_crc32(crc, val, res); + __ inv(res, crc); +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + COMMENT("emit_profile_type {"); + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + Address mdo_addr = as_Address(op->mdp()->as_address_ptr(), noreg, Address::IDT_INT); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + assert(mdo_addr.base() != rscratch1, "wrong register"); + + __ verify_oop(obj); + + if (tmp != obj) { + __ mov(tmp, obj); + } + if (do_null) { + __ cbnz(tmp, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ldr(rscratch2, mdo_addr); + __ orr(rscratch2, rscratch2, TypeEntries::null_seen); + __ str(rscratch2, mdo_addr); + } + if (do_update) { +#ifndef ASSERT + __ b(next); + } +#else + __ b(next); + } + } else { + __ cbnz(tmp, update); + __ stop("unexpected null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + Label ok; + __ load_klass(tmp, tmp); + __ mov_metadata(rscratch1, exact_klass->constant_encoding()); + __ eor(rscratch1, tmp, rscratch1); + __ cbz(rscratch1, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); + } +#endif + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + } else { + __ load_klass(tmp, tmp); + } + + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ andr(rscratch1, tmp, TypeEntries::type_klass_mask); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ cbz(rscratch1, next); + + __ andr(rscratch1, tmp, TypeEntries::type_unknown); + __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore. + + if (TypeEntries::is_type_none(current_klass)) { + __ cbz(rscratch2, none); + __ cmp(rscratch2, TypeEntries::null_seen); + __ b(none, Assembler::EQ); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + __ dmb(Assembler::ISH); + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ andr(rscratch1, tmp, TypeEntries::type_klass_mask); + __ cbz(rscratch1, next); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ ldr(tmp, mdo_addr); + __ andr(rscratch1, tmp, TypeEntries::type_unknown); + __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore. + } + + // different than before. Cannot keep accurate profile. + __ ldr(rscratch2, mdo_addr); + __ orr(rscratch2, rscratch2, TypeEntries::type_unknown); + __ str(rscratch2, mdo_addr); + + if (TypeEntries::is_type_none(current_klass)) { + __ b(next); + + __ bind(none); + // first time here. Set profile type. + __ str(tmp, mdo_addr); + } + } else { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ andr(rscratch1, tmp, TypeEntries::type_klass_mask); + __ cbz(rscratch1, next); +#ifdef ASSERT + { + Label ok; + __ ldr(rscratch1, mdo_addr); + __ cbz(rscratch1, ok); + __ cmp(rscratch1, TypeEntries::null_seen); + __ b(ok, Assembler::EQ); + // may have been set by another thread + __ dmb(Assembler::ISH); + __ mov_metadata(rscratch1, exact_klass->constant_encoding()); + __ ldr(rscratch2, mdo_addr); + __ eor(rscratch2, rscratch1, rscratch2); + __ andr(rscratch2, rscratch2, TypeEntries::type_mask); + __ cbz(rscratch2, ok); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } +#endif + // first time here. Set profile type. + __ ldr(tmp, mdo_addr); + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + __ ldr(tmp, mdo_addr); + __ andr(rscratch1, tmp, TypeEntries::type_unknown); + __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore. + + __ orr(tmp, tmp, TypeEntries::type_unknown); + __ str(tmp, mdo_addr); + // FIXME: Write barrier needed here? + } + } + + __ bind(next); + } + COMMENT("} emit_profile_type"); +} + + +void LIR_Assembler::align_backward_branch_target() { +} + + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) { + if (left->is_single_cpu()) { + assert(dest->is_single_cpu(), "expect single result reg"); + __ neg(dest->as_register(), left->as_register()); + } else if (left->is_double_cpu()) { + assert(dest->is_double_cpu(), "expect double result reg"); + const Register l_lo = left->as_register_lo(); + Register l_hi = left->as_register_hi(); + check_register_collision(dest->as_register_lo(), &l_hi); + __ rsbs(dest->as_register_lo(), l_lo, 0); + __ rsc(dest->as_register_hi(), l_hi, 0); + } else if (left->is_single_fpu()) { + assert(dest->is_single_fpu(), "expect single float result reg"); + __ vneg_f32(dest->as_float_reg(), left->as_float_reg()); + } else { + assert(left->is_double_fpu(), "expect double float operand reg"); + assert(dest->is_double_fpu(), "expect double float result reg"); + __ vneg_f64(dest->as_double_reg(), left->as_double_reg()); + } +} + + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) { + __ lea(dest->as_register(), as_Address(addr->as_address_ptr(), noreg, Address::IDT_LEA)); +} + + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb) { + __ far_call(RuntimeAddress(dest)); + } else { + __ lea(rscratch1, RuntimeAddress(dest)); + __ bl(rscratch1); + } + if (info != NULL) { + add_call_info_here(info); + } + __ maybe_isb(); +} + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (type == T_LONG || type == T_DOUBLE) { + const LIR_Opr long_val = FrameMap::long0_opr; + + int null_check_offset = -1; + + if (src->is_register() && dest->is_address()) { + // long1 reserved as temp by LinearScan::pd_add_temps + const LIR_Opr long_tmp = FrameMap::long1_opr; + __ lea(rscratch1, as_Address_lo(dest->as_address_ptr(), Address::IDT_LEA)); + + if (type == T_DOUBLE) { + // long0 reserved as temp by LinearScan::pd_add_temps + __ vmov_f64(long_val->as_register_lo(), long_val->as_register_hi(), src->as_double_reg()); + } else { + assert(type == T_LONG && src->is_same_register(long_val), "T_LONG src should be in long0 (by LIRGenerator)"); + } + + null_check_offset = __ offset(); + __ atomic_strd(long_val->as_register_lo(), long_val->as_register_hi(), rscratch1, + long_tmp->as_register_lo(), long_tmp->as_register_hi()); + + } else if (src->is_address() && dest->is_register()) { + __ lea(rscratch1, as_Address_lo(src->as_address_ptr(), Address::IDT_LEA)); + + null_check_offset = __ offset(); + __ atomic_ldrd(long_val->as_register_lo(), long_val->as_register_hi(), rscratch1); + + if (type == T_DOUBLE) { + __ vmov_f64(dest->as_double_reg(), long_val->as_register_lo(), long_val->as_register_hi()); + } else { + assert(type != T_LONG || dest->is_same_register(long_val), "T_LONG dest should be in long0 (by LIRGenerator)"); + } + } else { + Unimplemented(); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_offset, info); + } + + } else { + move_op(src, dest, type, lir_patch_none, info, + /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); + } +} + +#ifdef ASSERT +// emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op); + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + + Label ok; + if (op->condition() != lir_cond_always) { + Assembler::Condition acond = Assembler::AL; + switch (op->condition()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = Assembler::LT; break; + case lir_cond_greaterEqual: acond = Assembler::GE; break; + case lir_cond_lessEqual: acond = Assembler::LE; break; + case lir_cond_greater: acond = Assembler::GT; break; + case lir_cond_belowEqual: acond = Assembler::LS; break; + case lir_cond_aboveEqual: acond = Assembler::HS; break; + default: ShouldNotReachHere(); + } + if (op->in_opr1()->type() == T_LONG) { + // a special trick here to be able to effectively compare jlongs + // for the lessEqual and greater conditions the jlong operands are swapped + // during comparison and hence should use mirror condition in conditional + // instruction + // see LIR_Assembler::comp_op and LIR_Assembler::cmove + switch (op->condition()) { + case lir_cond_lessEqual: acond = Assembler::GE; break; + case lir_cond_greater: acond = Assembler::LT; break; + } + } + __ b(ok, acond); + } + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +} +#endif + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +void LIR_Assembler::membar() { + COMMENT("membar"); + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::membar_acquire() { + __ membar(Assembler::LoadLoad|Assembler::LoadStore); +} + +void LIR_Assembler::membar_release() { + __ membar(Assembler::LoadStore|Assembler::StoreStore); +} + +void LIR_Assembler::membar_loadload() { + __ membar(Assembler::LoadLoad); +} + +void LIR_Assembler::membar_storestore() { + __ membar(MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); } + +void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); } + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + __ mov(result_reg->as_register(), rthread); +} + + +void LIR_Assembler::peephole(LIR_List *lir) { +#if 0 + if (tableswitch_count >= max_tableswitches) + return; + + /* + This finite-state automaton recognizes sequences of compare-and- + branch instructions. We will turn them into a tableswitch. You + could argue that C1 really shouldn't be doing this sort of + optimization, but without it the code is really horrible. + */ + + enum { start_s, cmp1_s, beq_s, cmp_s } state; + int first_key, last_key = -2147483648; + int next_key = 0; + int start_insn = -1; + int last_insn = -1; + Register reg = noreg; + LIR_Opr reg_opr; + state = start_s; + + LIR_OpList* inst = lir->instructions_list(); + for (int i = 0; i < inst->length(); i++) { + LIR_Op* op = inst->at(i); + switch (state) { + case start_s: + first_key = -1; + start_insn = i; + switch (op->code()) { + case lir_cmp: + LIR_Opr opr1 = op->as_Op2()->in_opr1(); + LIR_Opr opr2 = op->as_Op2()->in_opr2(); + if (opr1->is_cpu_register() && opr1->is_single_cpu() + && opr2->is_constant() + && opr2->type() == T_INT) { + reg_opr = opr1; + reg = opr1->as_register(); + first_key = opr2->as_constant_ptr()->as_jint(); + next_key = first_key + 1; + state = cmp_s; + goto next_state; + } + break; + } + break; + case cmp_s: + switch (op->code()) { + case lir_branch: + if (op->as_OpBranch()->cond() == lir_cond_equal) { + state = beq_s; + last_insn = i; + goto next_state; + } + } + state = start_s; + break; + case beq_s: + switch (op->code()) { + case lir_cmp: { + LIR_Opr opr1 = op->as_Op2()->in_opr1(); + LIR_Opr opr2 = op->as_Op2()->in_opr2(); + if (opr1->is_cpu_register() && opr1->is_single_cpu() + && opr1->as_register() == reg + && opr2->is_constant() + && opr2->type() == T_INT + && opr2->as_constant_ptr()->as_jint() == next_key) { + last_key = next_key; + next_key++; + state = cmp_s; + goto next_state; + } + } + } + last_key = next_key; + state = start_s; + break; + default: + assert(false, "impossible state"); + } + if (state == start_s) { + if (first_key < last_key - 5L && reg != noreg) { + { + // printf("found run register %d starting at insn %d low value %d high value %d\n", + // reg->encoding(), + // start_insn, first_key, last_key); + // for (int i = 0; i < inst->length(); i++) { + // inst->at(i)->print(); + // tty->print("\n"); + // } + // tty->print("\n"); + } + + struct tableswitch *sw = &switches[tableswitch_count]; + sw->_insn_index = start_insn, sw->_first_key = first_key, + sw->_last_key = last_key, sw->_reg = reg; + inst->insert_before(last_insn + 1, new LIR_OpLabel(&sw->_after)); + { + // Insert the new table of branches + int offset = last_insn; + for (int n = first_key; n < last_key; n++) { + inst->insert_before + (last_insn + 1, + new LIR_OpBranch(lir_cond_always, T_ILLEGAL, + inst->at(offset)->as_OpBranch()->label())); + offset -= 2, i++; + } + } + // Delete all the old compare-and-branch instructions + for (int n = first_key; n < last_key; n++) { + inst->remove_at(start_insn); + inst->remove_at(start_insn); + } + // Insert the tableswitch instruction + inst->insert_before(start_insn, + new LIR_Op2(lir_cmp, lir_cond_always, + LIR_OprFact::intConst(tableswitch_count), + reg_opr)); + inst->insert_before(start_insn + 1, new LIR_OpLabel(&sw->_branches)); + tableswitch_count++; + } + reg = noreg; + last_key = -2147483648; + } + next_state: + ; + } +#endif +} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { + BasicType type = src->type(); + Address addr = as_Address(src->as_address_ptr(), Address::toInsnDataType(type)); + + bool is_long = false; + + switch(type) { + case T_INT: + case T_OBJECT: + case T_ARRAY: + break; + case T_LONG: + is_long = true; + break; + default: + ShouldNotReachHere(); + } + + switch (code) { + case lir_xadd: + { + Register tmp = tmp_op->as_register(); + Register dst = as_reg(dest); + Label again; + __ lea(tmp, addr); + __ bind(again); + if(is_long) { + assert(dest->as_register_lo()->successor() == dest->as_register_hi(), "must be contiguous"); + assert((dest->as_register_lo()->encoding() & 1) == 0, "must be even"); + _masm->ldrexd(dst, tmp); + } else { + _masm->ldrex(dst, tmp); + } + arith_op(lir_add, dest, data, dest, NULL, false); + if (is_long) { + _masm->strexd(rscratch1, dst, tmp); + } else { + _masm->strex(rscratch1, dst, tmp); + } + __ cbnz(rscratch1, again); + arith_op(lir_sub, dest, data, dest, NULL, false); + break; + } + case lir_xchg: + { + Register tmp = tmp_op->as_register(); + Register obj = as_reg(data); + Register dst = as_reg(dest); + assert_different_registers(obj, addr.base(), tmp, rscratch1, dst); + Label again; + __ lea(tmp, addr); + __ bind(again); + if(is_long) { + assert(dest->as_register_lo()->successor() == dest->as_register_hi(), "must be contiguous"); + assert((dest->as_register_lo()->encoding() & 1) == 0, "must be even"); + + assert(data->is_double_cpu(), "should be double register"); + assert(data->as_register_lo()->successor() == data->as_register_hi(), "must be contiguous"); + assert((data->as_register_lo()->encoding() & 1) == 0, "must be even"); + + _masm->ldrexd(dst, tmp); + _masm->strexd(rscratch1, obj, tmp); + } else { + _masm->ldrex(dst, tmp); + _masm->strex(rscratch1, obj, tmp); + } + __ cbnz(rscratch1, again); + } + break; + default: + ShouldNotReachHere(); + } + __ membar(__ AnyAny); +} + +void LIR_Assembler::check_register_collision(Register d, Register *s1, Register *s2, Register tmp) { + // use a temp if any of the registers used as a source of operation + // collide with result register of the prerequisite operation + if (d == *s1) { + __ mov(tmp, d); + *s1 = tmp; + } else if (s2 && d == *s2) { + __ mov(tmp, d); + *s2 = tmp; + } +} + +#undef __ --- /dev/null 2016-08-26 13:07:53.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_LIRAssembler_aarch32.hpp 2016-08-26 13:07:53.000000000 +0300 @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#ifndef CPU_X86_VM_C1_LIRASSEMBLER_AARCH32_HPP +#define CPU_X86_VM_C1_LIRASSEMBLER_AARCH32_HPP + +#include "assembler_aarch32.hpp" + + + private: + + int array_element_size(BasicType type) const; + + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. + address float_constant(float f); + address double_constant(double d); + + address int_constant(jlong n); + + Address as_Address(LIR_Address* addr, Register tmp, Address::InsnDataType type); + Address as_Address_hi(LIR_Address* addr, Address::InsnDataType type); + Address as_Address_lo(LIR_Address* addr, Address::InsnDataType type); + + Address as_Address(LIR_Address* addr, Address::InsnDataType type) { + return as_Address(addr, rscratch1, type); + } + + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done); + void add_debug_info_for_branch(address adr, CodeEmitInfo* info); + + void casw(Register addr, Register newval, Register cmpval, Register result); + void casl(Register addr, Register newval_lo, Register newval_hi, + Register cmpval_lo, Register cmpval_hi, + Register tmp_lo, Register tmp_hi, Register result); + + FloatRegister as_float_reg(LIR_Opr doubleReg); + + static const int max_tableswitches = 20; + struct tableswitch switches[max_tableswitches]; + int tableswitch_count; + + void init() { tableswitch_count = 0; } + + void deoptimize_trap(CodeEmitInfo *info); + + // remap input register (*s1 or *s2) to a temp one if it is at the same time + // used a result register (d) of a preceeding operation (so otherwise its + // contents gets effectively corrupt) + void check_register_collision(Register d, Register *s1, Register *s2 = NULL, Register tmp = rscratch1); + +public: + + void store_parameter(Register r, int offset_from_sp_in_words); + void store_parameter(jint c, int offset_from_sp_in_words); + void store_parameter(jobject c, int offset_from_sp_in_words); + +enum { call_stub_size = 12 * NativeInstruction::arm_insn_sz, + exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), + deopt_handler_size = 7 * NativeInstruction::arm_insn_sz }; + +#endif // CPU_X86_VM_C1_LIRASSEMBLER_AARCH32_HPP --- /dev/null 2016-08-26 13:07:55.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_LIRGenerator_aarch32.cpp 2016-08-26 13:07:55.000000000 +0300 @@ -0,0 +1,1452 @@ +/* + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#include "precompiled.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_aarch32.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +// Item will be loaded into a byte register; Intel only +void LIRItem::load_byte_item() { + load_item(); +} + + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (r->is_constant()) { + _result = r; + } else { + load_item(); + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r0_oop_opr; } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::r3_opr; } +LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::r0_opr; } +LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } + + +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = FrameMap::r0_opr; break; + case objectTag: opr = FrameMap::r0_oop_opr; break; + case longTag: opr = FrameMap::long0_opr; break; + case floatTag: opr = FrameMap::fpu0_float_opr; break; + case doubleTag: opr = FrameMap::fpu0_double_opr; break; + + case addressTag: + default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; + } + + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + LIR_Opr reg = new_register(T_INT); + set_vreg_flag(reg, LIRGenerator::byte_reg); + return reg; +} + + +//--------- loading items into registers -------------------------------- + + +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0L; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + +bool LIRGenerator::can_inline_as_constant(Value v) const { + if (v->type()->as_IntConstant() != NULL) { + return Assembler::operand_valid_for_add_sub_immediate(v->type()->as_IntConstant()->value()); + } else if (v->type()->as_LongConstant() != NULL) { + return Assembler::operand_valid_for_add_sub_immediate(v->type()->as_LongConstant()->value()); + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + switch (c->type()) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + return Assembler::operand_valid_for_add_sub_immediate(c->as_jint()); + case T_LONG: + return Assembler::operand_valid_for_add_sub_immediate(c->as_jlong()); + + case T_OBJECT: + return c->as_jobject() == (jobject) NULL; + case T_METADATA: + return c->as_metadata() == (Metadata*) NULL; + + case T_FLOAT: + return Assembler::operand_valid_for_float_immediate(c->as_jfloat()); + case T_DOUBLE: + return Assembler::operand_valid_for_float_immediate(c->as_jdouble()); + } + return false; +} + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + const Address::InsnDataType insn_type = Address::toInsnDataType(type); + assert(base->is_register(), "must be"); + + // accumulate fixed displacements + if (index->is_constant()) { + disp += index->as_constant_ptr()->as_jint() << shift; + index = LIR_OprFact::illegalOpr; + shift = 0; + } + + // aarch32 cannot handle natively both index and offset at the same time + // need to calculate effective value + if (index->is_register()) { + if ((disp != 0) && + Address::shift_ok_for_index(lsl(shift), insn_type) && + Assembler::operand_valid_for_add_sub_immediate(disp)) { + // add tmp, base, disp + // ldr r, [tmp, index, LSL #shift ] + LIR_Opr tmp = new_pointer_register(); + __ add(base, LIR_OprFact::intptrConst(disp), tmp); + base = tmp; + disp = 0; + } else { + assert(shift <= (int) LIR_Address::times_8, "no large shift could be here"); + // add tmp, base, index, LSL #shift + // ... + // ldr r, [tmp, ...] + LIR_Opr tmp = new_pointer_register(); + __ leal(LIR_OprFact::address(new LIR_Address(base, index, (LIR_Address::Scale) shift, 0, type)), tmp); + base = tmp; + index = LIR_OprFact::illegalOpr; + shift = 0; + } + } + + assert(!index->is_register() || (disp == 0), "should be"); + + if (!Address::offset_ok_for_immed(disp, insn_type)) { + assert(!index->is_valid(), "should be"); + // here index should be illegal so we can replace it with the displacement + // loaded into a register + // mov tmp, disp + // ldr r, [base, tmp] + index = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(disp), index); + disp = 0; + } + + assert(Address::offset_ok_for_immed(disp, Address::toInsnDataType(type)), "must be"); + return new LIR_Address(base, index, (LIR_Address::Scale) shift, disp, type); +} + + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, + BasicType type, bool needs_card_mark) { + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + + LIR_Address* addr = generate_address(array_opr, index_opr, shift, offset_in_bytes, type); + + if (needs_card_mark) { + // This store will need a precise card mark, so go ahead and + // compute the full adddres instead of computing once for the + // store and again for the card mark. + LIR_Opr tmp = new_pointer_register(); + __ leal(LIR_OprFact::address(addr), tmp); + return new LIR_Address(tmp, type); + } else { + return addr; + } +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; + if (type == T_LONG) { + r = LIR_OprFact::longConst(x); + if (!Assembler::operand_valid_for_logical_immediate(false, x)) { + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else if (type == T_INT) { + r = LIR_OprFact::intConst(x); + if (!Assembler::operand_valid_for_logical_immediate(true, x)) { + // This is all rather nasty. We don't know whether our constant + // is required for a logical or an arithmetic operation, wo we + // don't know what the range of valid values is!! + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else { + ShouldNotReachHere(); + } + return r; +} + + + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr imm = NULL; + switch(addr->type()) { + case T_INT: + imm = LIR_OprFact::intConst(step); + break; + case T_LONG: + imm = LIR_OprFact::longConst(step); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr reg = new_register(addr->type()); + __ load(addr, reg); + __ add(reg, imm, reg); + __ store(reg, addr); +} + +void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); + __ cmp(condition, reg, LIR_OprFact::intConst(c)); +} + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); + __ cmp(condition, reg, reg1); +} + + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + + if (is_power_of_2(c - 1)) { + __ shift_left(left, exact_log2(c - 1), tmp); + __ add(tmp, left, result); + return true; + } else if (is_power_of_2(c + 1)) { + __ shift_left(left, exact_log2(c + 1), tmp); + __ sub(tmp, left, result); + return true; + } else { + return false; + } +} + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType type = item->type(); + __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + + +void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { + assert(x->is_pinned(),""); + bool needs_range_check = x->compute_needs_range_check(); + bool use_length = x->length() != NULL; + bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT; + bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL || + !get_jobject_constant(x->value())->is_null_object() || + x->should_profile()); + + LIRItem array(x->array(), this); + LIRItem index(x->index(), this); + LIRItem value(x->value(), this); + LIRItem length(this); + + array.load_item(); + index.load_nonconstant(); + + if (use_length && needs_range_check) { + length.set_instruction(x->length()); + length.load_item(); + + } + if (needs_store_check || x->check_boolean()) { + value.load_item(); + } else { + value.load_for_store(x->elt_type()); + } + + set_no_result(x); + + // the CodeEmitInfo must be duplicated for each different + // LIR-instruction because spilling can occur anywhere between two + // instructions and so the debug information must be different + CodeEmitInfo* range_check_info = state_for(x); + CodeEmitInfo* null_check_info = NULL; + if (x->needs_null_check()) { + null_check_info = new CodeEmitInfo(range_check_info); + } + + // emit array address setup early so it schedules better + // FIXME? No harm in this on aarch64, and it might help + LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store); + + if (GenerateRangeChecks && needs_range_check) { + if (use_length) { + __ cmp(lir_cond_belowEqual, length.result(), index.result()); + __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result())); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // range_check also does the null check + null_check_info = NULL; + } + } + + if (GenerateArrayStoreCheck && needs_store_check) { + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = new_register(objectType); + + CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info); + __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci()); + } + + if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(value.result(), array_addr, null_check_info); + // Seems to be a precise + post_barrier(LIR_OprFact::address(array_addr), value.result()); + } else { + LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info); + __ move(result, array_addr, null_check_info); + } +} + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); + // Need a scratch register for biased locking + LIR_Opr scratch = LIR_OprFact::illegalOpr; + if (UseBiasedLocking) { + scratch = new_register(T_INT); + } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(),""); + + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + LIR_Opr lock = new_register(T_INT); + LIR_Opr obj_temp = new_register(T_INT); + set_no_result(x); + monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); +} + + +void LIRGenerator::do_NegateOp(NegateOp* x) { + + LIRItem from(x->x(), this); + from.load_item(); + LIR_Opr result = rlock_result(x); + __ negate (from.result(), result); + +} + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + + if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { + // float remainder is implemented as a direct call into the runtime + LIRItem right(x->x(), this); + LIRItem left(x->y(), this); + + BasicTypeList signature(2); + if (x->op() == Bytecodes::_frem) { + signature.append(T_FLOAT); + signature.append(T_FLOAT); + } else { + signature.append(T_DOUBLE); + signature.append(T_DOUBLE); + } + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + const LIR_Opr result_reg = result_register_for(x->type()); + left.load_item_force(cc->at(1)); + right.load_item(); + + __ move(right.result(), cc->at(0)); + + address entry; + if (x->op() == Bytecodes::_frem) { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + } else { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + } + + LIR_Opr result = rlock_result(x); + __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + return; + } + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + + // Always load right hand side. + right.load_item(); + + if (!left.is_register()) + left.load_item(); + + LIR_Opr reg = rlock(x); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { + tmp = new_register(T_DOUBLE); + } + + arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), NULL); + + set_result(x, round_item(reg)); +} + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + + // missing test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + + BasicTypeList signature(2); + signature.append(T_LONG); + signature.append(T_LONG); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + // check for division by zero (destroys registers of right operand!) + CodeEmitInfo* info = state_for(x); + + right.load_item(); + + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + + const LIR_Opr result_reg = result_register_for(x->type()); + left.load_item_force(cc->at(1)); + __ move(right.result(), cc->at(0)); + + address entry; + switch (x->op()) { + case Bytecodes::_lrem: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::lrem); + break; // check if dividend is 0 is done elsewhere + case Bytecodes::_ldiv: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv); + break; // check if dividend is 0 is done elsewhere + default: + ShouldNotReachHere(); + } + + LIR_Opr result = rlock_result(x); + __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + } else { + assert (x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, + "expect lmul, ladd or lsub"); + // add, sub, mul + left.load_item(); + if (! right.is_register()) { + if (x->op() == Bytecodes::_lmul + || ! right.is_constant() + || ! Assembler::operand_valid_for_add_sub_immediate(right.get_jlong_constant())) { + right.load_item(); + } else { // add, sub + assert (x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); + // don't load constants to save register + right.load_nonconstant(); + } + } + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + } +} + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + + // Test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + if (x->is_commutative() && left.is_stack() && right.is_register()) { + // swap them if left is real stack (or cached) and right is real register(not cached) + left_arg = &right; + right_arg = &left; + } + + left_arg->load_item(); + + // do not need to load right, as we can handle stack and constants + if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { + + right_arg->load_item(); + rlock_result(x); + + if (!(VM_Version::features() & FT_HW_DIVIDE)) { + // MacroAssembler::divide32 destroys both operand registers + left_arg->set_destroys_register(); + right_arg->set_destroys_register(); + } + + CodeEmitInfo* info = state_for(x); + LIR_Opr tmp = new_register(T_INT); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::intConst(0)); + __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + info = state_for(x); + + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL); + } + + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() + && Assembler::operand_valid_for_add_sub_immediate(right.get_jint_constant())) { + right.load_nonconstant(); + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); + } else { + assert (x->op() == Bytecodes::_imul, "expect imul"); + if (right.is_constant()) { + int c = right.get_jint_constant(); + if (! is_power_of_2(c) && ! is_power_of_2(c + 1) && ! is_power_of_2(c - 1)) { + // Cannot use constant op. + right.load_item(); + } else { + right.dont_load_item(); + } + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); + } +} + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + // when an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + } + ShouldNotReachHere(); +} + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant()) { + right.dont_load_item(); + + switch (x->op()) { + case Bytecodes::_ishl: { + int c = right.get_jint_constant() & 0x1f; + __ shift_left(left.result(), c, x->operand()); + break; + } + case Bytecodes::_ishr: { + int c = right.get_jint_constant() & 0x1f; + __ shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_iushr: { + int c = right.get_jint_constant() & 0x1f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lshl: { + int c = right.get_jint_constant() & 0x3f; + __ shift_left(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lshr: { + int c = right.get_jint_constant() & 0x3f; + __ shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lushr: { + int c = right.get_jint_constant() & 0x3f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + } + default: + ShouldNotReachHere(); + } + } else { + right.load_item(); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + if (left.result()->type() == T_LONG) + left.set_destroys_register(); + switch (x->op()) { + case Bytecodes::_ishl: { + __ shift_left(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_ishr: { + __ shift_right(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_iushr: { + __ unsigned_shift_right(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_lshl: { + __ shift_left(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_lshr: { + __ shift_right(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_lushr: { + __ unsigned_shift_right(left.result(), right.result(), x->operand(), tmp); + break; + } + default: + ShouldNotReachHere(); + } + } +} + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant() + && ((right.type()->tag() == intTag + && Assembler::operand_valid_for_logical_immediate(true, right.get_jint_constant())) + || (right.type()->tag() == longTag + && Assembler::operand_valid_for_logical_immediate(false, right.get_jlong_constant())))) { + right.dont_load_item(); + } else { + right.load_item(); + } + switch (x->op()) { + case Bytecodes::_iand: + case Bytecodes::_land: + __ logical_and(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ior: + case Bytecodes::_lor: + __ logical_or (left.result(), right.result(), x->operand()); break; + case Bytecodes::_ixor: + case Bytecodes::_lxor: + __ logical_xor(left.result(), right.result(), x->operand()); break; + default: Unimplemented(); + } +} + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + ValueTag tag = x->x()->type()->tag(); + left.load_item(); + right.load_item(); + LIR_Opr reg = rlock_result(x); + + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + Unimplemented(); + } +} + +void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { + assert(x->number_of_arguments() == 4, "wrong type"); + LIRItem obj (x->argument_at(0), this); // object + LIRItem offset(x->argument_at(1), this); // offset of field + LIRItem cmp (x->argument_at(2), this); // value to compare with field + LIRItem val (x->argument_at(3), this); // replace field with val if matches cmp + + assert(obj.type()->tag() == objectTag, "invalid type"); + + // In 64bit the type can be long, sparc doesn't have this assert + // assert(offset.type()->tag() == intTag, "invalid type"); + + assert(cmp.type()->tag() == type->tag(), "invalid type"); + assert(val.type()->tag() == type->tag(), "invalid type"); + + // get address of field + obj.load_item(); + offset.load_nonconstant(); + if (type == longType) { + // not need if allocator reserves correct pairs + val.load_item_force(FrameMap::long0_opr); + } else { + val.load_item(); + } + cmp.load_item(); + + LIR_Address* a; + if(offset.result()->is_constant()) { + jint c = offset.result()->as_jint(); + a = new LIR_Address(obj.result(), + c, + as_BasicType(type)); + } else { + a = new LIR_Address(obj.result(), + offset.result(), + LIR_Address::times_1, + 0, + as_BasicType(type)); + } + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + if (type == objectType) { // Write-barrier needed for Object fields. + // Do the pre-write barrier, if any. + pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + + LIR_Opr result = rlock_result(x); + + LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience + if (type == objectType) + __ cas_obj(addr, cmp.result(), val.result(), ill, ill, result); + else if (type == intType) + __ cas_int(addr, cmp.result(), val.result(), ill, ill, result); + else if (type == longType) + __ cas_long(addr, cmp.result(), val.result(), FrameMap::long1_opr, ill, + result); + else { + ShouldNotReachHere(); + } + + __ logical_xor(result, LIR_OprFact::intConst(1), result); + + if (type == objectType) { // Write-barrier needed for Object fields. + // Seems to be precise + post_barrier(addr, val.result()); + } +} + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + switch (x->id()) { + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_dabs: { + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + } + break; + } + case vmIntrinsics::_dlog10: // fall through + case vmIntrinsics::_dlog: // fall through + case vmIntrinsics::_dsin: // fall through + case vmIntrinsics::_dtan: // fall through + case vmIntrinsics::_dcos: // fall through + case vmIntrinsics::_dexp: { + assert(x->number_of_arguments() == 1, "wrong type"); + + address runtime_entry = NULL; + switch (x->id()) { + case vmIntrinsics::_dsin: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case vmIntrinsics::_dcos: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case vmIntrinsics::_dtan: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case vmIntrinsics::_dlog: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case vmIntrinsics::_dlog10: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case vmIntrinsics::_dexp: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + default: + ShouldNotReachHere(); + } + + LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + case vmIntrinsics::_dpow: { + assert(x->number_of_arguments() == 2, "wrong type"); + address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + } +} + + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); + + // Make all state_for calls early since they can emit code + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // operands for arraycopy must use fixed registers, otherwise + // LinearScan will fail allocation (because arraycopy always needs a + // call) + + // The java calling convention does not give us enough registers + // so we occupy two more: r4 and r5. The fast path code will be able to + // make use of these registers for performance purpose. If going into + // slow path we'll spill extra data to the stack as necessary + + src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); + src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); + dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); + dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); + + length.load_item_force (FrameMap::as_opr(r4)); + LIR_Opr tmp = FrameMap::as_opr(r5); + + set_no_result(x); + + int flags; + ciArrayKlass* expected_type; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + assert(UseCRC32Intrinsics, "why are we here?"); + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + switch (x->id()) { + case vmIntrinsics::_updateCRC32: { + LIRItem crc(x->argument_at(0), this); + LIRItem val(x->argument_at(1), this); + // val is destroyed by update_crc32 + val.set_destroys_register(); + crc.load_item(); + val.load_item(); + __ update_crc32(crc.result(), val.result(), result); + break; + } + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: { + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem len(x->argument_at(3), this); + buf.load_item(); + off.load_nonconstant(); + + LIR_Opr index = off.result(); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + LIR_Opr base_op = buf.result(); + + if (offset) { + LIR_Opr tmp = new_pointer_register(); + __ add(base_op, LIR_OprFact::intConst(offset), tmp); + base_op = tmp; + offset = 0; + } + + LIR_Address* a = new LIR_Address(base_op, + index, + LIR_Address::times_1, + offset, + T_BYTE); + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + len.load_item_force(cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + // insired by sparc port + switch (x->op()) { + case Bytecodes::_d2l: + case Bytecodes::_f2l: + case Bytecodes::_l2d: + case Bytecodes::_l2f: { + address entry; + + switch (x->op()) { + case Bytecodes::_d2l: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2l); + break; + case Bytecodes::_f2l: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::f2l); + break; + case Bytecodes::_l2d: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2d); + break; + case Bytecodes::_l2f: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f); + break; + default: + ShouldNotReachHere(); + } + + LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL); + set_result(x, result); + } + break; + + default: + LIRItem value(x->value(), this); + value.load_item(); + + if (x->op() == Bytecodes::_f2i || x->op() == Bytecodes::_d2i) { + value.set_destroys_register(); + } + + LIR_Opr input = value.result(); + LIR_Opr result = rlock(x); + + __ convert(x->op(), input, result); + + assert(result->is_virtual(), "result must be virtual register"); + set_result(x, result); + } +} + +void LIRGenerator::do_NewInstance(NewInstance* x) { +#ifndef PRODUCT + if (PrintNotLoaded && !x->klass()->is_loaded()) { + tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); + } +#endif + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::r2_oop_opr, + FrameMap::r5_oop_opr, + FrameMap::r4_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::r3_metadata_opr, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem length(x->length(), this); + length.load_item_force(FrameMap::r6_opr); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r2_oop_opr; + LIR_Opr tmp2 = FrameMap::r4_oop_opr; + LIR_Opr tmp3 = FrameMap::r5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r3_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + LIRItem length(x->length(), this); + // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + CodeEmitInfo* info = state_for(x, x->state()); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r2_oop_opr; + LIR_Opr tmp2 = FrameMap::r4_oop_opr; + LIR_Opr tmp3 = FrameMap::r5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r3_metadata_opr; + + length.load_item_force(FrameMap::r6_opr); + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(dims->length(), NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for(x, x->state()); + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + + store_stack_parameter(size->result(), in_ByteSize(i*4)); + } + + LIR_Opr klass_reg = FrameMap::r1_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::r2_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::r3_opr; + __ move(FrameMap::sp_opr, varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // nothing to do for now +} + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) { + // must do this before locking the destination register as an oop register, + // and before the obj is loaded (the latter is for deoptimization) + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + + // info for exceptions + CodeEmitInfo* info_for_exception = state_for(x); + + CodeStub* stub; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); + } + LIR_Opr reg = rlock_result(x); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded()) { + tmp3 = new_register(objectType); + } + __ checkcast(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + + // result and test object may not be in same register + LIR_Opr reg = rlock_result(x); + CodeEmitInfo* patching_info = NULL; + if ((!x->klass()->is_loaded() || PatchALot)) { + // must do this before locking the destination register as an oop register + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded()) { + tmp3 = new_register(objectType); + } + __ instanceof(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_If(If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + bool is_safepoint = x->is_safepoint(); + + If::Condition cond = x->cond(); + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + + xin->load_item(); + + if (yin->is_constant()) { + if (tag == longTag + && Assembler::operand_valid_for_add_sub_immediate(yin->get_jlong_constant())) { + yin->dont_load_item(); + } else if (tag == intTag + && Assembler::operand_valid_for_add_sub_immediate(yin->get_jint_constant())) { + yin->dont_load_item(); + } else if (tag == addressTag + && Assembler::operand_valid_for_add_sub_immediate(yin->get_address_constant())) { + yin->dont_load_item(); + } else if (tag == objectTag && yin->get_jobject_constant()->is_null_object()) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + } else { + yin->load_item(); + } + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + // increment backedge counter if needed + increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + set_no_result(x); + + LIR_Opr left = xin->result(); + LIR_Opr right = yin->result(); + + __ cmp(lir_cond(cond), left, right); + // Generate branch profiling. Profiling code doesn't kill flags. + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); + } else { + __ branch(lir_cond(cond), right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(rthread); +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { + __ move(LIR_OprFact::intConst(block->block_id()), FrameMap::r0_opr); + LIR_OprList* args = new LIR_OprList(1); + args->append(FrameMap::r0_opr); + address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry); + __ call_runtime_leaf(func, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, args); +} + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + if (value->is_double_cpu()) { + __ move(value, FrameMap::long0_opr); + __ volatile_store_mem_reg(FrameMap::long0_opr, address, info); + } else { + __ volatile_store_mem_reg(value, address, info); + } +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + if (result->is_double_cpu()) { + __ volatile_load_mem_reg(address, FrameMap::long0_opr, info); + __ move(FrameMap::long0_opr, result); + } else { + __ volatile_load_mem_reg(address, result, info); + } +} + +void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset, + BasicType type, bool is_volatile) { + LIR_Address* addr = new LIR_Address(src, offset, type); + __ load(addr, dst); +} + + +void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data, + BasicType type, bool is_volatile) { + LIR_Address* addr = new LIR_Address(src, offset, type); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(data, addr); + assert(src->is_register(), "must be register"); + // Seems to be a precise address + post_barrier(LIR_OprFact::address(addr), data); + } else { + __ move(data, addr); + } +} + +void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { + BasicType type = x->basic_type(); + LIRItem src(x->object(), this); + LIRItem off(x->offset(), this); + LIRItem value(x->value(), this); + + src.load_item(); + off.load_nonconstant(); + if (type == T_LONG && !x->is_add()) { + // not need if allocator reserves correct pairs + value.load_item_force(FrameMap::long1_opr); + } else { + // We can cope with a constant increment in an xadd + if (! (x->is_add() + && value.is_constant() + && can_inline_as_constant(x->value()))) { + value.load_item(); + } + } + + bool is_long = (type == T_LONG); + LIR_Opr dst = is_long ? FrameMap::long0_opr : rlock_result(x, type); + LIR_Opr data = value.result(); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + LIR_Opr offset = off.result(); + + if (data == dst) { + LIR_Opr tmp = new_register(data->type()); + __ move(data, tmp); + data = tmp; + } + + LIR_Address* addr; + if (offset->is_constant()) { + addr = new LIR_Address(src.result(), offset->as_jint(), type); + } else { + addr = new LIR_Address(src.result(), offset, type); + } + + LIR_Opr tmp = new_register(T_INT); + LIR_Opr ptr = LIR_OprFact::illegalOpr; + + if (x->is_add()) { + __ xadd(LIR_OprFact::address(addr), data, dst, tmp); + } else { + if (is_obj) { + // Do the pre-write barrier, if any. + ptr = new_pointer_register(); + __ add(src.result(), off.result(), ptr); + pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + __ xchg(LIR_OprFact::address(addr), data, dst, tmp); + if (is_obj) { + post_barrier(ptr, data); + } + } + + if (is_long) { + dst = rlock_result(x, type); + __ move(FrameMap::long0_opr, dst); + } +} --- /dev/null 2016-08-26 13:07:56.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_LinearScan_aarch32.cpp 2016-08-26 13:07:56.000000000 +0300 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_LinearScan.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on AArch32 +} --- /dev/null 2016-08-26 13:07:58.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_LinearScan_aarch32.hpp 2016-08-26 13:07:58.000000000 +0300 @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#ifndef CPU_AARCH32_VM_C1_LINEARSCAN_AARCH32_HPP +#define CPU_AARCH32_VM_C1_LINEARSCAN_AARCH32_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) { + return reg_num <= pd_last_cpu_reg || reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + if (type == T_LONG || type == T_DOUBLE) { + return 2; + } + return 1; +} + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + if (type == T_DOUBLE) { + return true; + } + return false; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, + "should call this only for registers"); + // TODO: Remove the following line when support for callee-saved registers + // is added + return true; + if (assigned_reg < pd_first_callee_saved_cpu_reg) { + return true; + } + if (assigned_reg > pd_last_callee_saved_cpu_reg && + assigned_reg < pd_first_callee_saved_fpu_reg) { + return true; + } + if (assigned_reg > pd_last_callee_saved_fpu_reg && + assigned_reg <= pd_last_fpu_reg) { + return true; + } + return false; +} + +// If there are special cases when some particular LIR operations kill some +// specific registers, this behavior should be described here. An example +// can be found in x86 port. +inline void LinearScan::pd_add_temps(LIR_Op* op) { + if (op->code() == lir_move) { + LIR_Op1* move_op = op->as_Op1(); + if (move_op->move_kind() == lir_move_volatile) { + bool is_long = move_op->type() == T_LONG; + bool is_double = move_op->type() == T_DOUBLE; + bool is_store = move_op->in_opr()->is_register(); + if (is_double) { + add_temp(reg_num(FrameMap::long0_opr), op->id(), noUse, T_ILLEGAL); + add_temp(reg_numHi(FrameMap::long0_opr), op->id(), noUse, T_ILLEGAL); + } + if (is_store && (is_long || is_double)) { + add_temp(reg_num(FrameMap::long1_opr), op->id(), noUse, T_ILLEGAL); + add_temp(reg_numHi(FrameMap::long1_opr), op->id(), noUse, T_ILLEGAL); + } + } + } +} + +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { + // The default logic is good enough for AArch32. + return false; +} + +#endif // CPU_AARCH32_VM_C1_LINEARSCAN_AARCH32_HPP --- /dev/null 2016-08-26 13:07:59.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_MacroAssembler_aarch32.cpp 2016-08-26 13:07:59.000000000 +0300 @@ -0,0 +1,476 @@ +/* + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#include "precompiled.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" +#include "gc_interface/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, + Register result) +{ + Label done; + if (is_float) { + vcmp_f32(f0, f1); + } else { + vcmp_f64(f0, f1); + } + + get_fpsr(); + + mov(result, 0); + if (unordered_result < 0) { + // we want -1 for unordered or less than, 0 for equal and 1 for + // greater than. + mov(result, 1, NE); // Not equal or unordered + neg(result, result, LT); // Less than or unordered + } else { + // we want -1 for less than, 0 for equal and 1 for unordered or + // greater than. + mov(result, 1, NE); // Not equal or unordered + neg(result, result, LO); // Less than + } +} + +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done, fail; + int null_check_offset = -1; + + verify_oop(obj); + + // save object being locked into the BasicObjectLock + str(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + assert(scratch != noreg, "should have scratch register at this point"); + null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); + } else { + null_check_offset = offset(); + } + + // Load object header + ldr(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + orr(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + str(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + lea(rscratch2, Address(obj, hdr_offset)); + cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + mov(rscratch1, sp); + sub(hdr, hdr, rscratch1); + mov(rscratch2, aligned_mask - os::vm_page_size()); + ands(hdr, hdr, rscratch2); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + str(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + cbnz(hdr, slow_case); + // done + bind(done); + if (PrintBiasedLockingStatistics) { + lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); + addmw(Address(rscratch2, 0), 1, rscratch1); + } + return null_check_offset; +} + + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + + if (UseBiasedLocking) { + // load object + ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // load displaced header + ldr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + cbz(hdr, done); + if (!UseBiasedLocking) { + // load object + ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + lea(rscratch1, Address(obj, hdr_offset)); + cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case); + } else { + cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case); + } + // done + bind(done); +} + + +// Defines obj, preserves var_size_in_bytes +void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) { + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); + } else { + eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); + incr_allocated_bytes(noreg, var_size_in_bytes, con_size_in_bytes, t1); + } +} + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { + assert_different_registers(obj, klass, len); + if (UseBiasedLocking && !len->is_valid()) { + assert_different_registers(obj, klass, len, t1, t2); + ldr(t1, Address(klass, Klass::prototype_header_offset())); + } else { + // This assumes that all prototype bits fit in an int32_t + mov(t1, (int32_t)(intptr_t)markOopDesc::prototype()); + } + str(t1, Address(obj, oopDesc::mark_offset_in_bytes())); + str(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + + if (len->is_valid()) { + str(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } +} + +// Zero words; len is in bytes +// Destroys all registers except addr +// len must be a nonzero multiple of wordSize +void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) { + assert_different_registers(addr, len, t1, rscratch1, rscratch2); + +#ifdef ASSERT + { Label L; + tst(len, BytesPerWord - 1); + b(L, Assembler::EQ); + stop("len is not a multiple of BytesPerWord"); + bind(L); + } +#endif + +#ifndef PRODUCT + block_comment("zero memory"); +#endif + + Label loop; + Label entry; + +// Algorithm: +// +// scratch1 = cnt & 7; +// cnt -= scratch1; +// p += scratch1; +// switch (scratch1) { +// do { +// cnt -= 8; +// p[-8] = 0; +// case 7: +// p[-7] = 0; +// case 6: +// p[-6] = 0; +// // ... +// case 1: +// p[-1] = 0; +// case 0: +// p += 8; +// } while (cnt); +// } + + const int unroll = 8; // Number of str instructions we'll unroll + + lsr(len, len, LogBytesPerWord); + andr(rscratch1, len, unroll - 1); // tmp1 = cnt % unroll + sub(len, len, rscratch1); // cnt -= unroll + // t1 always points to the end of the region we're about to zero + add(t1, addr, rscratch1, lsl(LogBytesPerWord)); + adr(rscratch2, entry); + sub(rscratch2, rscratch2, rscratch1, lsl(2)); + mov(rscratch1, 0); + b(rscratch2); + bind(loop); + sub(len, len, unroll); + for (int i = -unroll; i < 0; i++) + str(rscratch1, Address(t1, i * wordSize)); + bind(entry); + add(t1, t1, unroll * wordSize); + cbnz(len, loop); +} + +// preserves obj, destroys len_in_bytes +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) { + Label done; + assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different"); + assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord"); + Register index = len_in_bytes; + // index is positive and ptr sized + subs(index, index, hdr_size_in_bytes); + b(done, Assembler::EQ); + // note: for the remaining code to work, index must be a multiple of BytesPerWord +#ifdef ASSERT + { Label L; + tst(index, BytesPerWord - 1); + b(L, Assembler::EQ); + stop("index is not a multiple of BytesPerWord"); + bind(L); + } +#endif + + // Preserve obj + if (hdr_size_in_bytes) + add(obj, obj, hdr_size_in_bytes); + zero_memory(obj, index, t1); + if (hdr_size_in_bytes) + sub(obj, obj, hdr_size_in_bytes); + + // done + bind(done); +} + + +void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) { + assert_different_registers(obj, t1, t2); // XXX really? + assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); + + try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); + + initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2); +} + +// This method clobbers t1, t2, and rscratch1 registers. +void C1_MacroAssembler::initialize_object(Register obj, Register klass, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, Register t2) { + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, + "con_size_in_bytes is not multiple of alignment"); + + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + initialize_header(obj, klass, noreg, t1, t2); + + // Null out rest of allocated space + const Register index = t2; + const int threshold = 8 * BytesPerWord; + if (var_size_in_bytes != noreg) { + mov(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, t1); + } else if (con_size_in_bytes <= threshold) { + // Emit required number of str instructions (unroll loop completely) + mov(t1, 0); + for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) { + str(t1, Address(obj, i)); + } + } else if (con_size_in_bytes > hdr_size_in_bytes) { + // Use loop to null out fields + int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord; + mov(t1, 0); + + const int unroll = 4; // Number of str instructions we'll unroll + mov(index, words / unroll); + int remainder = words % unroll; + lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord)); + + Label entry_point, loop; + b(entry_point); + bind(loop); + sub(index, index, 1); + for (int i = -unroll; i < 0; i++) { + if (-i == remainder) { + bind(entry_point); + } + str(t1, Address(rscratch1, i * BytesPerWord)); + } + if (remainder == 0) { + bind(entry_point); + } + add(rscratch1, rscratch1, unroll * BytesPerWord); + cbnz(index, loop); + } + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == r0, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for( + Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) { + assert_different_registers(obj, len, t1, t2, klass); + + // determine alignment mask + assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); + + // check for negative or excessive length + mov(rscratch1, (int32_t)max_array_allocation_length); + cmp(len, rscratch1); + b(slow_case, Assembler::HS); + + const Register arr_size = t2; // okay to be the same + // align object end + mov(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); + add(arr_size, arr_size, len, Assembler::lsl(f)); + mov(t1, ~MinObjAlignmentInBytesMask); + andr(arr_size, arr_size, t1); + + try_allocate(obj, arr_size, 0, t1, t2, slow_case); + + initialize_header(obj, klass, len, t1, t2); + + // clear rest of allocated space + const Register len_zero = len; + initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero); + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == r0, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + + +void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { + verify_oop(receiver); + // explicit NULL check not needed since load from [klass_offset] causes a trap + // check against inline cache + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check"); + + cmp_klass(receiver, iCache, rscratch1); +} + +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, + int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, + "stack bang size incorrect"); + + // If we have to make this method not-entrant, we'll overwrite its first + // instruction with a jump. For this action to be legal we must ensure that + // this first instruction is a B, BL, NOP, BKPT, or SVC. Make it a NOP + nop(); + + // Make sure there is enough stack space for this method's activation + generate_stack_overflow_check(bang_size_in_bytes); + + // Push lr, rfp, and optionally update rfp. rfp points to the first stack + // word used by the new frame. + stmdb(sp, RegSet::of(rfp, lr).bits()); + if (PreserveFramePointer) { + add(rfp, sp, BytesPerWord); + } + + // Create frame. frame_size_in_bytes always comes from + // LIR_Assembler::initial_frame_size_in_bytes() method, and it already + // takes into account two stack words spent on saving lr and rfp. + decrement(sp, frame_size_in_bytes); +} + +void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { + // Remove frame. frame_size_in_bytes always comes from + // LIR_Assembler::initial_frame_size_in_bytes() method, and it already + // takes into account two stack words spent on saving lr and rfp. + increment(sp, frame_size_in_bytes); + + // Pop rfp and lr + ldmia(sp, RegSet::of(rfp, lr).bits()); +} + +void C1_MacroAssembler::verified_entry() { +} + +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) return; + verify_oop_addr(Address(sp, stack_offset), "oop"); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + Label not_null; + cbnz(r, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool inv_r0, bool inv_r2, bool inv_r3) { +#ifdef ASSERT + static int nn; + if (inv_r0) mov(r0, 0xDEAD); + if (inv_r2) mov(r2, nn++); + if (inv_r3) mov(r3, 0xDEAD); +#endif +} +#endif // ifndef PRODUCT --- /dev/null 2016-08-26 13:08:01.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_MacroAssembler_aarch32.hpp 2016-08-26 13:08:01.000000000 +0300 @@ -0,0 +1,113 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#ifndef CPU_AARCH32_VM_C1_MACROASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_VM_C1_MACROASSEMBLER_AARCH32_HPP + +// C1_MacroAssembler contains high-level macros for C1 + + private: + int _rsp_offset; // track rsp changes + // initialization + void pd_init() { _rsp_offset = 0; } + +void zero_memory(Register addr, Register len, Register t1); + + public: + void try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1); + + void float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, + Register result); + + // locking + // hdr : must be r0, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved + // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information + int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must be r0 & must point to the displaced header location, contents destroyed + void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); + + void initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2 // temp register + ); + + // allocation of fixed-size objects + // (can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards) + // obj : will contain pointer to allocated object + // t1, t2 : scratch registers - contents destroyed + // header_size: size of object header in words + // object_size: total size of object in words + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case); + + enum { + max_array_allocation_length = 0x00FFFFFF + }; + + // allocation of arrays + // obj : will contain pointer to allocated object + // len : array length in number of elements + // t : scratch register - contents destroyed + // header_size: size of object header in words + // f : element scale factor + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, int f, Register klass, Label& slow_case); + + int rsp_offset() const { return _rsp_offset; } + void set_rsp_offset(int n) { _rsp_offset = n; } + + void invalidate_registers(bool inv_r0, bool inv_r2, bool inv_r3) PRODUCT_RETURN; + +#endif // CPU_AARCH32_VM_C1_MACROASSEMBLER_AARCH32_HPP --- /dev/null 2016-08-26 13:08:03.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_Runtime1_aarch32.cpp 2016-08-26 13:08:02.000000000 +0300 @@ -0,0 +1,1316 @@ +/* + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_aarch32.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_aarch32.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#endif + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { + // setup registers + assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different"); + assert(oop_result1 != rthread && metadata_result != rthread, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + + mov(c_rarg0, rthread); + set_num_rt_args(0); // Nothing on stack + + Label retaddr; + set_last_Java_frame(sp, rfp, retaddr, rscratch1); + + // do the call + lea(rscratch1, RuntimeAddress(entry)); + bl(rscratch1); + bind(retaddr); + int call_offset = offset(); + // verify callee-saved register +#ifdef ASSERT + push(r0, sp); + { Label L; + get_thread(r0); + cmp(rthread, r0); + b(L, Assembler::EQ); + stop("StubAssembler::call_RT: rthread not callee saved?"); + bind(L); + } + pop(r0, sp); +#endif + reset_last_Java_frame(true, true); + maybe_isb(); + + // check for pending exceptions + { Label L; + // check for pending exceptions (java_thread is set upon return) + ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + cbz(rscratch1, L); + mov(rscratch1, 0); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared + if (oop_result1->is_valid()) { + str(rscratch1, Address(rthread, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { + str(rscratch1, Address(rthread, JavaThread::vm_result_2_offset())); + } + if (frame_size() == no_frame_size) { + leave(); + far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + } + bind(L); + } + // get oop results if there are any and reset the values in the thread + if (oop_result1->is_valid()) { + get_vm_result(oop_result1, rthread); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, rthread); + } + return call_offset; +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { + mov(c_rarg1, arg1); + return call_RT(oop_result1, metadata_result, entry, 1); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { + if (c_rarg1 == arg2) { + if (c_rarg2 == arg1) { + mov(rscratch1, arg1); + mov(arg1, arg2); + mov(arg2, rscratch1); + } else { + mov(c_rarg2, arg2); + mov(c_rarg1, arg1); + } + } else { + mov(c_rarg1, arg1); + mov(c_rarg2, arg2); + } + return call_RT(oop_result1, metadata_result, entry, 2); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + // if there is any conflict use the stack + if (arg1 == c_rarg2 || arg1 == c_rarg3 || + arg2 == c_rarg1 || arg2 == c_rarg3 || + arg3 == c_rarg1 || arg3 == c_rarg2) { + push(arg2); + push(arg3); + push(arg1); + pop(c_rarg1); + pop(c_rarg3); + pop(c_rarg2); + } else { + mov(c_rarg1, arg1); + mov(c_rarg2, arg2); + mov(c_rarg3, arg3); + } + return call_RT(oop_result1, metadata_result, entry, 3); +} + +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; + + public: + StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +};; + + +#define __ _sasm-> + +StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { + _sasm = sasm; + __ set_info(name, must_gc_arguments); + __ enter(); +} + +// load parameters that were stored with LIR_Assembler::store_parameter +// Note: offsets for store_parameter and load_argument must match +void StubFrame::load_argument(int offset_in_words, Register reg) { + // - 1: link + // fp 0: return address + // + 1: argument with offset 0 + // + 2: argument with offset 1 + // + 3: ... + + __ ldr(reg, Address(rfp, (offset_in_words + 1) * BytesPerWord)); +} + + +StubFrame::~StubFrame() { + __ leave(); + __ ret(lr); +} + +#undef __ + + +// Implementation of Runtime1 + +#define __ sasm-> + +const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; + +// Stack layout for saving/restoring all the registers needed during a runtime +// call (this includes deoptimization) +// Note: note that users of this frame may well have arguments to some runtime +// while these values are on the stack. These positions neglect those arguments +// but the code in save_live_registers will take the argument count into +// account. +// + +enum reg_save_layout { + reg_save_s0, + reg_save_s31 = reg_save_s0 + 31, + reg_save_pad, // to align to doubleword to simplify conformance to APCS + reg_save_r0, + reg_save_r1, + reg_save_r2, + reg_save_r3, + reg_save_r4, + reg_save_r5, + reg_save_r6, + reg_save_r7, + reg_save_r8, + reg_save_r9, + reg_save_r10, + reg_save_r11, + reg_save_r12, + // pushed by enter + rfp_off, + return_off, + reg_save_frame_size +}; + +// Save off registers which might be killed by calls into the runtime. +// Tries to smart of about FP registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. +// In all other cases it should be sufficient to simply save their +// current value. + +static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; +static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; +static int reg_save_size_in_words; +static int frame_size_in_bytes = -1; + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { + int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r0), r0->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r1), r1->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r2), r2->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r3), r3->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r4), r4->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r5), r5->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r6), r6->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r7), r7->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r8), r8->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r9), r9->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r10), r10->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r11), r11->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r12), r12->as_VMReg()); + + for (int i = 0; i < 32; ++i) { + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_s0 + i), as_FloatRegister(i)->as_VMReg()); + } + + return oop_map; +} + +static OopMap* save_live_registers(StubAssembler* sasm, + bool save_fpu_registers = true) { + __ block_comment("save_live_registers"); + + __ push(RegSet::range(r0, r12), sp); // integer registers except lr & sp + __ sub(sp, sp, 4); // align to 8 bytes + + if (save_fpu_registers) { + __ vstmdb_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1); + } else { + __ sub(sp, sp, FrameMap::nof_fpu_regs * 4); + } + + return generate_oop_map(sasm, save_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + __ vldmia_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1); + } else { + __ add(sp, sp, FrameMap::nof_fpu_regs * 4); + } + + __ add(sp, sp, 4); + __ pop(RegSet::range(r0, r12), sp); +} + +static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true) { + + if (restore_fpu_registers) { + __ vldmia_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1); + } else { + __ add(sp, sp, FrameMap::nof_fpu_regs * 4); + } + + __ add(sp, sp, 8); + __ pop(RegSet::range(r1, r12), sp); +} + +void Runtime1::initialize_pd() { +} + +// target: the entry point of the method that creates and posts the exception oop +// has_argument: true if the exception needs an argument (passed in rscratch1) + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + int call_offset; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + call_offset = __ call_RT(noreg, noreg, target, rscratch1); + } + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + __ should_not_reach_here(); + return oop_maps; +} + + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + __ block_comment("generate_handle_exception"); + + // incoming parameters + const Register exception_oop = r0; + const Register exception_pc = r3; + // other registers used in this stub + + // Save registers, if required. + OopMapSet* oop_maps = new OopMapSet(); + OopMap* oop_map = NULL; + switch (id) { + case forward_exception_id: + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm, 1 /*thread*/); + __ mov(rscratch1, 0); + + // load and clear pending exception oop into r0 + __ ldr(exception_oop, Address(rthread, Thread::pending_exception_offset())); + __ str(rscratch1, Address(rthread, Thread::pending_exception_offset())); + + // load issuing PC (the return address for this stub) into r3 + __ ldr(exception_pc, Address(rfp)); + + // make sure that the vm_results are cleared (may be unnecessary) + __ str(rscratch1, Address(rthread, JavaThread::vm_result_offset())); + __ str(rscratch1, Address(rthread, JavaThread::vm_result_2_offset())); + break; + case handle_exception_nofpu_id: + case handle_exception_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: { + // At this point all registers except exception oop (r0) and + // exception pc (lr) are dead. + const int frame_size = 2 /*fp, return address*/; + assert(frame_size*wordSize % StackAlignmentInBytes == 0, "must be"); + oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); + sasm->set_frame_size(frame_size); + break; + } + default: + __ should_not_reach_here(); + break; + } + + // verify that only r0 and r3 are valid at this time + __ invalidate_registers(false, true, false); + // verify that r0 contains a valid exception + __ verify_not_null_oop(exception_oop); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are + // empty before writing to them + Label oop_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop already set"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc already set"); + __ bind(pc_empty); +#endif + + // save exception oop and issuing pc into JavaThread + // (exception handler will load it from here) + __ str(exception_oop, Address(rthread, JavaThread::exception_oop_offset())); + __ str(exception_pc, Address(rthread, JavaThread::exception_pc_offset())); + + // patch throwing pc into return address (has bci & oop map) + __ str(exception_pc, Address(rfp)); + + // compute the exception handler. + // the exception oop and the throwing pc are read from the fields in JavaThread + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + oop_maps->add_gc_map(call_offset, oop_map); + + // r0: handler address + // will be the deopt blob if nmethod was deoptimized while we looked up + // handler regardless of whether handler existed in the nmethod. + + // only r0 is valid at this time, all other registers have been destroyed by the runtime call + __ invalidate_registers(false, true, true); + + // patch the return address, this stub will directly return to the exception handler + __ str(r0, Address(rfp)); + + switch (id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + // Restore the registers that were saved at the beginning. + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: + // Pop the return address. + __ leave(); + __ ret(lr); // jump to exception handler + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + + +void Runtime1::generate_unwind_exception(StubAssembler *sasm) { + // incoming parameters + const Register exception_oop = r0; + // other registers used in this stub + const Register exception_pc = r3; + const Register handler_addr = r1; + + // verify that only r0, is valid at this time + __ invalidate_registers(false, true, true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Save our return address because + // exception_handler_for_return_address will destroy it. We also + // save exception_oop + __ push(exception_oop); + __ push(lr); + + // search the exception handler address of the caller (using the return address) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, lr); + // r0: exception handler address of the caller + + // Only R0 is valid at this time; all other registers have been + // destroyed by the call. + __ invalidate_registers(false, true, true); + + // move result of call into correct register + __ mov(handler_addr, r0); + + // get throwing pc (= return address). + // lr has been destroyed by the call + __ pop(lr); + __ pop(exception_oop); + __ mov(r3, lr); + + __ verify_not_null_oop(exception_oop); + + // continue at exception handler (return address removed) + // note: do *not* remove arguments when unwinding the + // activation since the caller assumes having + // all arguments on the stack when entering the + // runtime to determine the exception handler + // (GC happens at call site with arguments!) + // r0: exception oop + // r3: throwing pc + // r1: exception handler + __ b(handler_addr); +} + + + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // use the maximum number of runtime-arguments here because it is difficult to + // distinguish each RT-Call. + // Note: This number affects also the RT-Call in generate_handle_exception because + // the oop-map is shared for all calls. + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + OopMap* oop_map = save_live_registers(sasm); + + __ mov(c_rarg0, rthread); + Label retaddr; + __ set_last_Java_frame(sp, rfp, retaddr, rscratch1); + // do the call + __ lea(rscratch1, RuntimeAddress(target)); + __ bl(rscratch1); + __ bind(retaddr); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(__ offset(), oop_map); + // verify callee-saved register +#ifdef ASSERT + { Label L; + __ get_thread(rscratch1); + __ cmp(rthread, rscratch1); + __ b(L, Assembler::EQ); + __ stop("StubAssembler::call_RT: rthread not callee saved?"); + __ bind(L); + } +#endif + __ reset_last_Java_frame(true, false); + __ maybe_isb(); + + // check for pending exceptions + { Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + // exception pending => remove activation and forward to exception handler + + { Label L1; + __ cbnz(r0, L1); // have we deoptimized? + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + __ bind(L1); + } + + // the deopt blob expects exceptions in the special fields of + // JavaThread, so copy and clear pending exception. + + // load and clear pending exception + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, Thread::pending_exception_offset())); + + // check that there is really a valid exception + __ verify_not_null_oop(r0); + + // load throwing pc: this is the return address of the stub + __ ldr(r3, Address(rfp)); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // store exception oop and throwing pc to JavaThread + __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); + + restore_live_registers(sasm); + + __ leave(); + + // Forward the exception directly to deopt blob. We can blow no + // registers and must leave throwing pc on the stack. A patch may + // have values live in registers so the entry point with the + // exception in tls. + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); + + __ bind(L); + } + + + // Runtime will return true if the nmethod has been deoptimized during + // the patching process. In that case we must do a deopt reexecute instead. + + Label reexecuteEntry, cont; + + __ cbz(r0, cont); // have we deoptimized? + + // Will reexecute. Proper return address is already on the stack we just restore + // registers, pop all of our frame but the return address and jump to the deopt blob + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + + __ bind(cont); + restore_live_registers(sasm); + __ leave(); + __ ret(lr); + + return oop_maps; +} + + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + const Register exception_oop = r0; + const Register exception_pc = r3; + + // for better readability + const bool must_gc_arguments = true; + const bool dont_gc_arguments = false; + + // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu + bool save_fpu_registers = true; + + // stub code & info for the different stubs + OopMapSet* oop_maps = NULL; + OopMap* oop_map = NULL; + switch (id) { + { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + __ leave(); + __ ret(lr); + } + break; + + case throw_div0_exception_id: + { StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register klass = r3; // Incoming + Register obj = r0; // Result + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + UseTLAB && FastTLABRefill) { + Label slow_path; + Register obj_size = r2; + Register t1 = r5; + Register t2 = r4; + assert_different_registers(klass, obj, obj_size, t1, t2); + + __ push(t1); + __ push(r5); + + if (id == fast_new_instance_init_check_id) { + // make sure the klass is initialized + __ ldrb(rscratch1, Address(klass, InstanceKlass::init_state_offset())); + __ cmp(rscratch1, InstanceKlass::fully_initialized); + __ b(slow_path, Assembler::NE); + } + +#ifdef ASSERT + // assert object can be fast path allocated + { + Label ok, not_ok; + __ ldr(obj_size, Address(klass, Klass::layout_helper_offset())); + __ cmp(obj_size, 0u); + __ b(not_ok, Assembler::LE); // Make sure it's an instance (layout helper is positive) + __ tst(obj_size, Klass::_lh_instance_slow_path_bit); + __ b(ok, Assembler::EQ); + __ bind(not_ok); + __ stop("assert(can be fast path allocated)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // if we got here then the TLAB allocation failed, so try + // refilling the TLAB or allocating directly from eden. + Label retry_tlab, try_eden; + __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy r3 (klass), returns r5 + + __ bind(retry_tlab); + + // get the instance size (size is postive so movl is fine for 64bit) + __ ldr(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path); + + __ initialize_object(obj, klass, obj_size, 0, t1, t2); + __ verify_oop(obj); + __ pop(r5); + __ pop(t1); + __ ret(lr); + + __ bind(try_eden); + // get the instance size (size is postive so movl is fine for 64bit) + __ ldr(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ eden_allocate(obj, obj_size, 0, t1, slow_path); + __ incr_allocated_bytes(rthread, obj_size, 0, rscratch1); + + __ initialize_object(obj, klass, obj_size, 0, t1, t2); + __ verify_oop(obj); + __ pop(r5); + __ pop(t1); + __ ret(lr); + + __ bind(slow_path); + __ pop(r5); + __ pop(t1); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + __ verify_oop(obj); + __ leave(); + __ ret(lr); + + // r0,: new instance + } + + break; + + case counter_overflow_id: + { + Register bci = r0, method = r1; + __ enter(); + OopMap* map = save_live_registers(sasm); + // Retrieve bci + __ ldr(bci, Address(rfp, 2*BytesPerWord)); + // And a pointer to the Method* + __ ldr(method, Address(rfp, 3*BytesPerWord)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + __ ret(lr); + } + break; + + case new_type_array_id: + case new_object_array_id: + { + Register length = r6; // Incoming + Register klass = r3; // Incoming + Register obj = r0; // Result + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // assert object type is really an array of the proper kind + { + Label ok; + Register t0 = obj; + __ ldr(t0, Address(klass, Klass::layout_helper_offset())); + __ asr(t0, t0, Klass::_lh_array_tag_shift); + int tag = ((id == new_type_array_id) + ? Klass::_lh_array_tag_type_value + : Klass::_lh_array_tag_obj_value); + __ mov(rscratch1, tag); + __ cmp(t0, rscratch1); + __ b(ok, Assembler::EQ); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + if (UseTLAB && FastTLABRefill) { + Register arr_size = r4; + Register t1 = r2; + Register t2 = r5; + Label slow_path; + assert_different_registers(length, klass, obj, arr_size, t1, t2); + + // check that array length is small enough for fast path. + __ mov(rscratch1, C1_MacroAssembler::max_array_allocation_length); + __ cmp(length, rscratch1); + __ b(slow_path, Assembler::HI); + + // if we got here then the TLAB allocation failed, so try + // refilling the TLAB or allocating directly from eden. + Label retry_tlab, try_eden; + const Register thread = + __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves r6 & r3, returns rthread + + __ bind(retry_tlab); + + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + // since size is positive ldrw does right thing on 64bit + __ ldr(t1, Address(klass, Klass::layout_helper_offset())); + __ andr(rscratch1, t1, 0x1f); + __ lsl(arr_size, length, rscratch1); + __ extract_bits(t1, t1, Klass::_lh_header_size_shift, + exact_log2(Klass::_lh_header_size_mask + 1)); + __ add(arr_size, arr_size, t1); + __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ mov(rscratch1, ~MinObjAlignmentInBytesMask); + __ andr(arr_size, arr_size, rscratch1); + + __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size + + __ initialize_header(obj, klass, length, t1, t2); + // Assume Little-Endian + __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andr(t1, t1, Klass::_lh_header_size_mask); + __ sub(arr_size, arr_size, t1); // body length + __ add(t1, t1, obj); // body start + __ initialize_body(t1, arr_size, 0, t2); + __ verify_oop(obj); + + __ ret(lr); + + __ bind(try_eden); + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + // since size is positive ldrw does right thing on 64bit + __ ldr(t1, Address(klass, Klass::layout_helper_offset())); + __ andr(rscratch1, t1, 0x1f); + __ lsl(arr_size, length, rscratch1); + __ extract_bits(t1, t1, Klass::_lh_header_size_shift, + exact_log2(Klass::_lh_header_size_mask + 1)); + __ add(arr_size, arr_size, t1); + __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ mov(rscratch1, ~MinObjAlignmentInBytesMask); + __ andr(arr_size, arr_size, rscratch1); + + __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size + __ incr_allocated_bytes(thread, arr_size, 0, rscratch1); + + __ initialize_header(obj, klass, length, t1, t2); + // Assume Little-Endian + __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andr(t1, t1, Klass::_lh_header_size_mask); + __ sub(arr_size, arr_size, t1); // body length + __ add(t1, t1, obj); // body start + __ initialize_body(t1, arr_size, 0, t2); + __ verify_oop(obj); + + __ ret(lr); + + __ bind(slow_path); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset; + if (id == new_type_array_id) { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + __ verify_oop(obj); + __ leave(); + __ ret(lr); + + // r0: new array + } + break; + + case new_multi_array_id: + { StubFrame f(sasm, "new_multi_array", dont_gc_arguments); + // r1: klass + // r2: rank + // r3: address of 1st dimension + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(r0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), r1, r2, r3); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + // r0,: new multi array + __ verify_oop(r0); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // This is called via call_runtime so the arguments + // will be place in C abi locations + + __ verify_oop(c_rarg0); + + // load the klass and check the has finalizer flag + Label register_finalizer; + Register t = r5; + __ load_klass(t, r0); + __ ldr(t, Address(t, Klass::access_flags_offset())); + __ tst(t, JVM_ACC_HAS_FINALIZER); + __ b(register_finalizer, Assembler::NE); + __ ret(lr); + + __ bind(register_finalizer); + __ enter(); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), r0); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers + restore_live_registers(sasm); + + __ leave(); + __ ret(lr); + } + break; + + case throw_class_cast_exception_id: + { StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { + // Typical calling sequence: + // __ push(klass_RInfo); // object klass or other subclass + // __ push(sup_k_RInfo); // array element klass or other superclass + // __ bl(slow_subtype_check); + // Note that the subclass is pushed first, and is therefore deepest. + enum layout { + r0_off, + r2_off, + r4_off, + r5_off, + sup_k_off, + klass_off, + framesize, + result_off = sup_k_off + }; + + __ set_info("slow_subtype_check", dont_gc_arguments); + __ push(RegSet::of(r0, r2, r4, r5), sp); + + // This is called by pushing args and not with C abi + __ ldr(r4, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass + __ ldr(r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass + + + Label miss; + __ check_klass_subtype_slow_path(r4, r0, r2, r5, NULL, &miss); + + // fallthrough on success: + __ mov(rscratch1, 1); + __ str(rscratch1, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop(RegSet::of(r0, r2, r4, r5), sp); + __ ret(lr); + + __ bind(miss); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop(RegSet::of(r0, r2, r4, r5), sp); + __ ret(lr); + } + break; + + case monitorenter_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorenter_id: + { + StubFrame f(sasm, "monitorenter", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(1, r0); // r0,: object + f.load_argument(0, r1); // r1,: lock address + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), r0, r1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case monitorexit_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorexit_id: + { + StubFrame f(sasm, "monitorexit", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(0, r0); // r0,: lock address + + // note: really a leaf routine but must setup last java sp + // => use call_RT for now (speed can be improved by + // doing last java sp setup manually) + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), r0); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case deoptimize_id: + { + StubFrame f(sasm, "deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + case throw_range_check_failed_id: + { StubFrame f(sasm, "range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case unwind_exception_id: + { __ set_info("unwind_exception", dont_gc_arguments); + // note: no stubframe since we are about to leave the current + // activation and we are calling a leaf VM function only. + generate_unwind_exception(sasm); + } + break; + + case access_field_patching_id: + { StubFrame f(sasm, "access_field_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case handle_exception_nofpu_id: + case handle_exception_id: + { StubFrame f(sasm, "handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case throw_index_exception_id: + { StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + +#if INCLUDE_ALL_GCS + +// Registers to be saved around calls to g1_wb_pre or g1_wb_post +#define G1_SAVE_REGS (RegSet::range(r0, r12) - RegSet::of(rscratch1, rscratch2)) + + case g1_pre_barrier_slow_id: + { + StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); + // arg0 : previous value of memory + + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ mov(r0, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0); + __ should_not_reach_here(); + break; + } + + const Register pre_val = r0; + const Register thread = rthread; + const Register tmp = rscratch1; + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + Label done; + Label runtime; + + // Can we store original value in the thread's buffer? + __ ldr(tmp, queue_index); + __ cbz(tmp, runtime); + + __ sub(tmp, tmp, wordSize); + __ str(tmp, queue_index); + __ ldr(rscratch2, buffer); + __ add(tmp, tmp, rscratch2); + f.load_argument(0, rscratch2); + __ str(rscratch2, Address(tmp, 0)); + __ b(done); + + __ bind(runtime); + __ push(G1_SAVE_REGS, sp); + f.load_argument(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); + __ pop(G1_SAVE_REGS, sp); + __ bind(done); + } + break; + case g1_post_barrier_slow_id: + { + StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments); + + // arg0: store_address + Address store_addr(rfp, 2*BytesPerWord); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + + const Register thread = rthread; + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + const Register card_addr = rscratch2; + ExternalAddress cardtable((address) ct->byte_map_base); + + f.load_argument(0, card_addr); + __ lsr(card_addr, card_addr, CardTableModRefBS::card_shift); + __ mov(rscratch1, cardtable); + __ add(card_addr, card_addr, rscratch1); + __ ldrb(rscratch1, Address(card_addr)); + __ cmp(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val()); + __ b(done, Assembler::EQ); + + assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0"); + + __ membar(Assembler::StoreLoad); + __ ldrb(rscratch1, Address(card_addr)); + __ cbz(rscratch1, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + __ mov(rscratch1, 0); + __ strb(rscratch1, Address(card_addr)); + + __ ldr(rscratch1, queue_index); + __ cbz(rscratch1, runtime); + __ sub(rscratch1, rscratch1, wordSize); + __ str(rscratch1, queue_index); + + const Register buffer_addr = r0; + + __ push(RegSet::of(r0, r1), sp); + __ ldr(buffer_addr, buffer); + __ str(card_addr, Address(buffer_addr, rscratch1)); + __ pop(RegSet::of(r0, r1), sp); + __ b(done); + + __ bind(runtime); + __ push(G1_SAVE_REGS, sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + __ pop(G1_SAVE_REGS, sp); + __ bind(done); + + } + break; +#endif + + case predicate_failed_trap_id: + { + StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); + + OopMap* map = save_live_registers(sasm); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + + default: + { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); + __ mov(r0, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0); + __ should_not_reach_here(); + } + break; + } + } + return oop_maps; +} + +#undef __ + +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } --- /dev/null 2016-08-26 13:08:04.000000000 +0300 +++ new/src/cpu/aarch32/vm/c1_globals_aarch32.hpp 2016-08-26 13:08:04.000000000 +0300 @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2016. +// Copyright 2013-2016 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +#ifndef CPU_AARCH32_VM_C1_GLOBALS_AARCH32_HPP +#define CPU_AARCH32_VM_C1_GLOBALS_AARCH32_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + +#ifndef TIERED +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, UseTLAB, true ); +define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1500 ); +define_pd_global(intx, BackEdgeThreshold, 100000); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); +define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); +define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); +define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(uint64_t,MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // !TIERED +define_pd_global(bool, UseTypeProfile, false); +define_pd_global(bool, RoundFPResults, true ); + +define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, true ); +define_pd_global(bool, TwoOperandLIRForm, false); + +define_pd_global(intx, SafepointPollOffset, 0 ); + +#endif // CPU_AARCH32_VM_C1_GLOBALS_AARCH32_HPP