< prev index next >

src/cpu/ppc/vm/c1_MacroAssembler_ppc.cpp

Print this page
rev 12397 : 8170991: PPC64: Bad code for initialization of short arrays
Summary: Implement special ClearArray nodes to improve initialization.
Reviewed-by:

*** 236,311 **** void C1_MacroAssembler::initialize_body(Register obj, Register tmp1, Register tmp2, int obj_size_in_bytes, int hdr_size_in_bytes) { const int index = (obj_size_in_bytes - hdr_size_in_bytes) / HeapWordSize; ! const int cl_size = VM_Version::L1_data_cache_line_size(), ! cl_dwords = cl_size>>3, ! cl_dw_addr_bits = exact_log2(cl_dwords); ! ! const Register tmp = R0, ! base_ptr = tmp1, ! cnt_dwords = tmp2; ! ! if (index <= 6) { ! // Use explicit NULL stores. ! if (index > 0) { li(tmp, 0); } ! for (int i = 0; i < index; ++i) { std(tmp, hdr_size_in_bytes + i * HeapWordSize, obj); } ! ! } else if (index < (2<<cl_dw_addr_bits)-1) { ! // simple loop ! Label loop; ! ! li(cnt_dwords, index); ! addi(base_ptr, obj, hdr_size_in_bytes); // Compute address of first element. ! li(tmp, 0); ! mtctr(cnt_dwords); // Load counter. ! bind(loop); ! std(tmp, 0, base_ptr); // Clear 8byte aligned block. ! addi(base_ptr, base_ptr, 8); ! bdnz(loop); ! } else { ! // like clear_memory_doubleword ! Label startloop, fast, fastloop, restloop, done; addi(base_ptr, obj, hdr_size_in_bytes); // Compute address of first element. ! load_const_optimized(cnt_dwords, index); ! rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line. ! beq(CCR0, fast); // Already 128byte aligned. ! ! subfic(tmp, tmp, cl_dwords); ! mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords). ! subf(cnt_dwords, tmp, cnt_dwords); // rest. ! li(tmp, 0); ! ! bind(startloop); // Clear at the beginning to reach 128byte boundary. ! std(tmp, 0, base_ptr); // Clear 8byte aligned block. ! addi(base_ptr, base_ptr, 8); ! bdnz(startloop); ! ! bind(fast); // Clear 128byte blocks. ! srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0). ! andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords. ! mtctr(tmp); // Load counter. ! ! bind(fastloop); ! dcbz(base_ptr); // Clear 128byte aligned block. ! addi(base_ptr, base_ptr, cl_size); ! bdnz(fastloop); ! ! cmpdi(CCR0, cnt_dwords, 0); // size 0? ! beq(CCR0, done); // rest == 0 ! li(tmp, 0); ! mtctr(cnt_dwords); // Load counter. ! ! bind(restloop); // Clear rest. ! std(tmp, 0, base_ptr); // Clear 8byte aligned block. ! addi(base_ptr, base_ptr, 8); ! bdnz(restloop); ! ! bind(done); } } void C1_MacroAssembler::allocate_object( Register obj, // result: pointer to object after successful allocation --- 236,253 ---- void C1_MacroAssembler::initialize_body(Register obj, Register tmp1, Register tmp2, int obj_size_in_bytes, int hdr_size_in_bytes) { const int index = (obj_size_in_bytes - hdr_size_in_bytes) / HeapWordSize; ! if (index < 10) { ! clear_memory_unrolled(obj, index, R0, hdr_size_in_bytes); } else { ! const Register base_ptr = tmp1, ! cnt_dwords = tmp2; addi(base_ptr, obj, hdr_size_in_bytes); // Compute address of first element. ! clear_memory_doubleword(base_ptr, cnt_dwords, R0, index); } } void C1_MacroAssembler::allocate_object( Register obj, // result: pointer to object after successful allocation
< prev index next >