< prev index next >

src/cpu/ppc/vm/c1_MacroAssembler_ppc.cpp

Print this page
rev 12397 : 8170991: PPC64: Bad code for initialization of short arrays
Summary: Implement special ClearArray nodes to improve initialization.
Reviewed-by:

@@ -236,76 +236,18 @@
 
 void C1_MacroAssembler::initialize_body(Register obj, Register tmp1, Register tmp2,
                                         int obj_size_in_bytes, int hdr_size_in_bytes) {
   const int index = (obj_size_in_bytes - hdr_size_in_bytes) / HeapWordSize;
 
-  const int cl_size         = VM_Version::L1_data_cache_line_size(),
-            cl_dwords       = cl_size>>3,
-            cl_dw_addr_bits = exact_log2(cl_dwords);
-
-  const Register tmp = R0,
-                 base_ptr = tmp1,
-                 cnt_dwords = tmp2;
-
-  if (index <= 6) {
-    // Use explicit NULL stores.
-    if (index > 0) { li(tmp, 0); }
-    for (int i = 0; i < index; ++i) { std(tmp, hdr_size_in_bytes + i * HeapWordSize, obj); }
-
-  } else if (index < (2<<cl_dw_addr_bits)-1) {
-    // simple loop
-    Label loop;
-
-    li(cnt_dwords, index);
-    addi(base_ptr, obj, hdr_size_in_bytes); // Compute address of first element.
-    li(tmp, 0);
-    mtctr(cnt_dwords);                      // Load counter.
-  bind(loop);
-    std(tmp, 0, base_ptr);                  // Clear 8byte aligned block.
-    addi(base_ptr, base_ptr, 8);
-    bdnz(loop);
-
+  if (index < 10) {
+    clear_memory_unrolled(obj, index, R0, hdr_size_in_bytes);
   } else {
-    // like clear_memory_doubleword
-    Label startloop, fast, fastloop, restloop, done;
+    const Register base_ptr = tmp1,
+                   cnt_dwords = tmp2;
 
     addi(base_ptr, obj, hdr_size_in_bytes);           // Compute address of first element.
-    load_const_optimized(cnt_dwords, index);
-    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
-    beq(CCR0, fast);                                  // Already 128byte aligned.
-
-    subfic(tmp, tmp, cl_dwords);
-    mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
-    subf(cnt_dwords, tmp, cnt_dwords); // rest.
-    li(tmp, 0);
-
-  bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
-    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
-    addi(base_ptr, base_ptr, 8);
-    bdnz(startloop);
-
-  bind(fast);                                  // Clear 128byte blocks.
-    srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
-    andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
-    mtctr(tmp);                                // Load counter.
-
-  bind(fastloop);
-    dcbz(base_ptr);                    // Clear 128byte aligned block.
-    addi(base_ptr, base_ptr, cl_size);
-    bdnz(fastloop);
-
-    cmpdi(CCR0, cnt_dwords, 0);        // size 0?
-    beq(CCR0, done);                   // rest == 0
-    li(tmp, 0);
-    mtctr(cnt_dwords);                 // Load counter.
-
-  bind(restloop);                      // Clear rest.
-    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
-    addi(base_ptr, base_ptr, 8);
-    bdnz(restloop);
-
-  bind(done);
+    clear_memory_doubleword(base_ptr, cnt_dwords, R0, index);
   }
 }
 
 void C1_MacroAssembler::allocate_object(
   Register obj,                        // result: pointer to object after successful allocation
< prev index next >