3315 }
3316 }
3317
3318 void MacroAssembler::load_klass(Register dst, Register src) {
3319 if (UseCompressedClassPointers) {
3320 lwz(dst, oopDesc::klass_offset_in_bytes(), src);
3321 // Attention: no null check here!
3322 decode_klass_not_null(dst, dst);
3323 } else {
3324 ld(dst, oopDesc::klass_offset_in_bytes(), src);
3325 }
3326 }
3327
3328 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) {
3329 ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
3330 ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
3331 ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
3332 }
3333
3334 // Clear Array
3335 // Kills both input registers. tmp == R0 is allowed.
3336 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
3337 // Procedure for large arrays (uses data cache block zero instruction).
3338 Label startloop, fast, fastloop, small_rest, restloop, done;
3339 const int cl_size = VM_Version::L1_data_cache_line_size(),
3340 cl_dwords = cl_size>>3,
3341 cl_dw_addr_bits = exact_log2(cl_dwords),
3342 dcbz_min = 1; // Min count of dcbz executions, needs to be >0.
3343
3344 //2:
3345 cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
3346 blt(CCR1, small_rest); // Too small.
3347 rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
3348 beq(CCR0, fast); // Already 128byte aligned.
3349
3350 subfic(tmp, tmp, cl_dwords);
3351 mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
3352 subf(cnt_dwords, tmp, cnt_dwords); // rest.
3353 li(tmp, 0);
3354 //10:
3355 bind(startloop); // Clear at the beginning to reach 128byte boundary.
3356 std(tmp, 0, base_ptr); // Clear 8byte aligned block.
3357 addi(base_ptr, base_ptr, 8);
3358 bdnz(startloop);
3359 //13:
3360 bind(fast); // Clear 128byte blocks.
3361 srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0).
3362 andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
3363 mtctr(tmp); // Load counter.
3364 //16:
3365 bind(fastloop);
3366 dcbz(base_ptr); // Clear 128byte aligned block.
3367 addi(base_ptr, base_ptr, cl_size);
3368 bdnz(fastloop);
3369 if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
3370 //20:
3371 bind(small_rest);
3372 cmpdi(CCR0, cnt_dwords, 0); // size 0?
3373 beq(CCR0, done); // rest == 0
3374 li(tmp, 0);
3375 mtctr(cnt_dwords); // Load counter.
3376 //24:
3377 bind(restloop); // Clear rest.
3378 std(tmp, 0, base_ptr); // Clear 8byte aligned block.
3379 addi(base_ptr, base_ptr, 8);
3380 bdnz(restloop);
3381 //27:
3382 bind(done);
3383 }
3384
3385 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3386
3387 #ifdef COMPILER2
3388 // Intrinsics for CompactStrings
3389
3390 // Compress char[] to byte[] by compressing 16 bytes at once.
3391 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
3392 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
3393 Label& Lfailure) {
3394
3395 const Register tmp0 = R0;
3396 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3397 Label Lloop, Lslow;
3398
3399 // Check if cnt >= 8 (= 16 bytes)
3400 lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF00FF00FF
3401 srwi_(tmp2, cnt, 3);
|
3315 }
3316 }
3317
3318 void MacroAssembler::load_klass(Register dst, Register src) {
3319 if (UseCompressedClassPointers) {
3320 lwz(dst, oopDesc::klass_offset_in_bytes(), src);
3321 // Attention: no null check here!
3322 decode_klass_not_null(dst, dst);
3323 } else {
3324 ld(dst, oopDesc::klass_offset_in_bytes(), src);
3325 }
3326 }
3327
3328 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) {
3329 ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
3330 ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
3331 ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
3332 }
3333
3334 // Clear Array
3335 // For very short arrays. tmp == R0 is allowed.
3336 void MacroAssembler::clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp, int offset) {
3337 if (cnt_dwords > 0) { li(tmp, 0); }
3338 for (int i = 0; i < cnt_dwords; ++i) { std(tmp, offset + i * 8, base_ptr); }
3339 }
3340
3341 // Version for constant short array length. Kills base_ptr. tmp == R0 is allowed.
3342 void MacroAssembler::clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp) {
3343 if (cnt_dwords < 8) {
3344 clear_memory_unrolled(base_ptr, cnt_dwords, tmp);
3345 return;
3346 }
3347
3348 Label loop;
3349 const long loopcnt = cnt_dwords >> 1,
3350 remainder = cnt_dwords & 1;
3351
3352 li(tmp, loopcnt);
3353 mtctr(tmp);
3354 li(tmp, 0);
3355 bind(loop);
3356 std(tmp, 0, base_ptr);
3357 std(tmp, 8, base_ptr);
3358 addi(base_ptr, base_ptr, 16);
3359 bdnz(loop);
3360 if (remainder) { std(tmp, 0, base_ptr); }
3361 }
3362
3363 // Kills both input registers. tmp == R0 is allowed.
3364 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp, long const_cnt) {
3365 // Procedure for large arrays (uses data cache block zero instruction).
3366 Label startloop, fast, fastloop, small_rest, restloop, done;
3367 const int cl_size = VM_Version::L1_data_cache_line_size(),
3368 cl_dwords = cl_size >> 3,
3369 cl_dw_addr_bits = exact_log2(cl_dwords),
3370 dcbz_min = 1, // Min count of dcbz executions, needs to be >0.
3371 min_cnt = ((dcbz_min + 1) << cl_dw_addr_bits) - 1;
3372
3373 if (const_cnt >= 0) {
3374 // Constant case.
3375 if (const_cnt < min_cnt) {
3376 clear_memory_constlen(base_ptr, const_cnt, tmp);
3377 return;
3378 }
3379 load_const_optimized(cnt_dwords, const_cnt, tmp);
3380 } else {
3381 // cnt_dwords already loaded in register. Need to check size.
3382 cmpdi(CCR1, cnt_dwords, min_cnt); // Big enough? (ensure >= dcbz_min lines included).
3383 blt(CCR1, small_rest);
3384 }
3385 rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
3386 beq(CCR0, fast); // Already 128byte aligned.
3387
3388 subfic(tmp, tmp, cl_dwords);
3389 mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
3390 subf(cnt_dwords, tmp, cnt_dwords); // rest.
3391 li(tmp, 0);
3392
3393 bind(startloop); // Clear at the beginning to reach 128byte boundary.
3394 std(tmp, 0, base_ptr); // Clear 8byte aligned block.
3395 addi(base_ptr, base_ptr, 8);
3396 bdnz(startloop);
3397
3398 bind(fast); // Clear 128byte blocks.
3399 srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0).
3400 andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
3401 mtctr(tmp); // Load counter.
3402
3403 bind(fastloop);
3404 dcbz(base_ptr); // Clear 128byte aligned block.
3405 addi(base_ptr, base_ptr, cl_size);
3406 bdnz(fastloop);
3407
3408 bind(small_rest);
3409 cmpdi(CCR0, cnt_dwords, 0); // size 0?
3410 beq(CCR0, done); // rest == 0
3411 li(tmp, 0);
3412 mtctr(cnt_dwords); // Load counter.
3413
3414 bind(restloop); // Clear rest.
3415 std(tmp, 0, base_ptr); // Clear 8byte aligned block.
3416 addi(base_ptr, base_ptr, 8);
3417 bdnz(restloop);
3418
3419 bind(done);
3420 }
3421
3422 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3423
3424 #ifdef COMPILER2
3425 // Intrinsics for CompactStrings
3426
3427 // Compress char[] to byte[] by compressing 16 bytes at once.
3428 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
3429 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
3430 Label& Lfailure) {
3431
3432 const Register tmp0 = R0;
3433 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3434 Label Lloop, Lslow;
3435
3436 // Check if cnt >= 8 (= 16 bytes)
3437 lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF00FF00FF
3438 srwi_(tmp2, cnt, 3);
|