< prev index next >

src/cpu/ppc/vm/macroAssembler_ppc.cpp

Print this page
rev 12397 : 8170991: PPC64: Bad code for initialization of short arrays
Summary: Implement special ClearArray nodes to improve initialization.
Reviewed-by:


3315   }
3316 }
3317 
3318 void MacroAssembler::load_klass(Register dst, Register src) {
3319   if (UseCompressedClassPointers) {
3320     lwz(dst, oopDesc::klass_offset_in_bytes(), src);
3321     // Attention: no null check here!
3322     decode_klass_not_null(dst, dst);
3323   } else {
3324     ld(dst, oopDesc::klass_offset_in_bytes(), src);
3325   }
3326 }
3327 
3328 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) {
3329   ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
3330   ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
3331   ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
3332 }
3333 
3334 // Clear Array




























3335 // Kills both input registers. tmp == R0 is allowed.
3336 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
3337   // Procedure for large arrays (uses data cache block zero instruction).
3338     Label startloop, fast, fastloop, small_rest, restloop, done;
3339     const int cl_size         = VM_Version::L1_data_cache_line_size(),
3340               cl_dwords       = cl_size>>3,
3341               cl_dw_addr_bits = exact_log2(cl_dwords),
3342               dcbz_min        = 1;                     // Min count of dcbz executions, needs to be >0.

3343 
3344 //2:
3345     cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
3346     blt(CCR1, small_rest);                                      // Too small.









3347     rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits);           // Extract dword offset within first cache line.
3348     beq(CCR0, fast);                                            // Already 128byte aligned.
3349 
3350     subfic(tmp, tmp, cl_dwords);
3351     mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
3352     subf(cnt_dwords, tmp, cnt_dwords); // rest.
3353     li(tmp, 0);
3354 //10:
3355   bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
3356     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
3357     addi(base_ptr, base_ptr, 8);
3358     bdnz(startloop);
3359 //13:
3360   bind(fast);                                  // Clear 128byte blocks.
3361     srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
3362     andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
3363     mtctr(tmp);                                // Load counter.
3364 //16:
3365   bind(fastloop);
3366     dcbz(base_ptr);                    // Clear 128byte aligned block.
3367     addi(base_ptr, base_ptr, cl_size);
3368     bdnz(fastloop);
3369     if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
3370 //20:
3371   bind(small_rest);
3372     cmpdi(CCR0, cnt_dwords, 0);        // size 0?
3373     beq(CCR0, done);                   // rest == 0
3374     li(tmp, 0);
3375     mtctr(cnt_dwords);                 // Load counter.
3376 //24:
3377   bind(restloop);                      // Clear rest.
3378     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
3379     addi(base_ptr, base_ptr, 8);
3380     bdnz(restloop);
3381 //27:
3382   bind(done);
3383 }
3384 
3385 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3386 
3387 #ifdef COMPILER2
3388 // Intrinsics for CompactStrings
3389 
3390 // Compress char[] to byte[] by compressing 16 bytes at once.
3391 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
3392                                         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
3393                                         Label& Lfailure) {
3394 
3395   const Register tmp0 = R0;
3396   assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3397   Label Lloop, Lslow;
3398 
3399   // Check if cnt >= 8 (= 16 bytes)
3400   lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF00FF00FF
3401   srwi_(tmp2, cnt, 3);




3315   }
3316 }
3317 
3318 void MacroAssembler::load_klass(Register dst, Register src) {
3319   if (UseCompressedClassPointers) {
3320     lwz(dst, oopDesc::klass_offset_in_bytes(), src);
3321     // Attention: no null check here!
3322     decode_klass_not_null(dst, dst);
3323   } else {
3324     ld(dst, oopDesc::klass_offset_in_bytes(), src);
3325   }
3326 }
3327 
3328 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) {
3329   ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
3330   ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
3331   ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
3332 }
3333 
3334 // Clear Array
3335 // For very short arrays. tmp == R0 is allowed.
3336 void MacroAssembler::clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp, int offset) {
3337   if (cnt_dwords > 0) { li(tmp, 0); }
3338   for (int i = 0; i < cnt_dwords; ++i) { std(tmp, offset + i * 8, base_ptr); }
3339 }
3340 
3341 // Version for constant short array length. Kills base_ptr. tmp == R0 is allowed.
3342 void MacroAssembler::clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp) {
3343   if (cnt_dwords < 8) {
3344     clear_memory_unrolled(base_ptr, cnt_dwords, tmp);
3345     return;
3346   }
3347 
3348   Label loop;
3349   const long loopcnt   = cnt_dwords >> 1,
3350              remainder = cnt_dwords & 1;
3351 
3352   li(tmp, loopcnt);
3353   mtctr(tmp);
3354   li(tmp, 0);
3355   bind(loop);
3356     std(tmp, 0, base_ptr);
3357     std(tmp, 8, base_ptr);
3358     addi(base_ptr, base_ptr, 16);
3359     bdnz(loop);
3360   if (remainder) { std(tmp, 0, base_ptr); }
3361 }
3362 
3363 // Kills both input registers. tmp == R0 is allowed.
3364 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp, long const_cnt) {
3365   // Procedure for large arrays (uses data cache block zero instruction).
3366     Label startloop, fast, fastloop, small_rest, restloop, done;
3367     const int cl_size         = VM_Version::L1_data_cache_line_size(),
3368               cl_dwords       = cl_size >> 3,
3369               cl_dw_addr_bits = exact_log2(cl_dwords),
3370               dcbz_min        = 1,  // Min count of dcbz executions, needs to be >0.
3371               min_cnt         = ((dcbz_min + 1) << cl_dw_addr_bits) - 1;
3372 
3373   if (const_cnt >= 0) {
3374     // Constant case.
3375     if (const_cnt < min_cnt) {
3376       clear_memory_constlen(base_ptr, const_cnt, tmp);
3377       return;
3378     }
3379     load_const_optimized(cnt_dwords, const_cnt, tmp);
3380   } else {
3381     // cnt_dwords already loaded in register. Need to check size.
3382     cmpdi(CCR1, cnt_dwords, min_cnt); // Big enough? (ensure >= dcbz_min lines included).
3383     blt(CCR1, small_rest);
3384   }
3385     rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits);           // Extract dword offset within first cache line.
3386     beq(CCR0, fast);                                            // Already 128byte aligned.
3387 
3388     subfic(tmp, tmp, cl_dwords);
3389     mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
3390     subf(cnt_dwords, tmp, cnt_dwords); // rest.
3391     li(tmp, 0);
3392 
3393   bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
3394     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
3395     addi(base_ptr, base_ptr, 8);
3396     bdnz(startloop);
3397 
3398   bind(fast);                                  // Clear 128byte blocks.
3399     srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
3400     andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
3401     mtctr(tmp);                                // Load counter.
3402 
3403   bind(fastloop);
3404     dcbz(base_ptr);                    // Clear 128byte aligned block.
3405     addi(base_ptr, base_ptr, cl_size);
3406     bdnz(fastloop);
3407 

3408   bind(small_rest);
3409     cmpdi(CCR0, cnt_dwords, 0);        // size 0?
3410     beq(CCR0, done);                   // rest == 0
3411     li(tmp, 0);
3412     mtctr(cnt_dwords);                 // Load counter.
3413 
3414   bind(restloop);                      // Clear rest.
3415     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
3416     addi(base_ptr, base_ptr, 8);
3417     bdnz(restloop);
3418 
3419   bind(done);
3420 }
3421 
3422 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3423 
3424 #ifdef COMPILER2
3425 // Intrinsics for CompactStrings
3426 
3427 // Compress char[] to byte[] by compressing 16 bytes at once.
3428 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
3429                                         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
3430                                         Label& Lfailure) {
3431 
3432   const Register tmp0 = R0;
3433   assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3434   Label Lloop, Lslow;
3435 
3436   // Check if cnt >= 8 (= 16 bytes)
3437   lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF00FF00FF
3438   srwi_(tmp2, cnt, 3);


< prev index next >