< prev index next >

src/cpu/ppc/vm/macroAssembler_ppc.cpp

Print this page
rev 9944 : 8145336: PPC64: fix string intrinsics after CompactStrings change


3155   bind(small_rest);
3156     cmpdi(CCR0, cnt_dwords, 0);        // size 0?
3157     beq(CCR0, done);                   // rest == 0
3158     li(tmp, 0);
3159     mtctr(cnt_dwords);                 // Load counter.
3160 //24:
3161   bind(restloop);                      // Clear rest.
3162     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
3163     addi(base_ptr, base_ptr, 8);
3164     bdnz(restloop);
3165 //27:
3166   bind(done);
3167 }
3168 
3169 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3170 
3171 // Search for a single jchar in an jchar[].
3172 //
3173 // Assumes that result differs from all other registers.
3174 //
3175 // Haystack, needle are the addresses of jchar-arrays.
3176 // NeedleChar is needle[0] if it is known at compile time.
3177 // Haycnt is the length of the haystack. We assume haycnt >=1.

3178 //
3179 // Preserves haystack, haycnt, kills all other registers.
3180 //
3181 // If needle == R0, we search for the constant needleChar.
3182 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
3183                                       Register needle, jchar needleChar,
3184                                       Register tmp1, Register tmp2) {
3185 
3186   assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
3187 
3188   Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
3189   Register needle0 = needle, // Contains needle[0].
3190            addr = tmp1,
3191            ch1 = tmp2,
3192            ch2 = R0;
3193 
3194 //2 (variable) or 3 (const):
3195    if (needle != R0) lhz(needle0, 0, needle); // Preload needle character, needle has len==1.
3196    dcbtct(haystack, 0x00);                        // Indicate R/O access to haystack.
3197 
3198    srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
3199    mr(addr, haystack);
3200    beq(CCR0, L_FinalCheck);
3201    mtctr(tmp2);              // Move to count register.
3202 //8:
3203   bind(L_InnerLoop);             // Main work horse (2x unrolled search loop).
3204    lhz(ch1, 0, addr);        // Load characters from haystack.
3205    lhz(ch2, 2, addr);
3206    (needle != R0) ? cmpw(CCR0, ch1, needle0) : cmplwi(CCR0, ch1, needleChar);
3207    (needle != R0) ? cmpw(CCR1, ch2, needle0) : cmplwi(CCR1, ch2, needleChar);
3208    beq(CCR0, L_Found1);   // Did we find the needle?
3209    beq(CCR1, L_Found2);
3210    addi(addr, addr, 4);
3211    bdnz(L_InnerLoop);
3212 //16:
3213   bind(L_FinalCheck);
3214    andi_(R0, haycnt, 1);
3215    beq(CCR0, L_NotFound);
3216    lhz(ch1, 0, addr);        // One position left at which we have to compare.
3217    (needle != R0) ? cmpw(CCR1, ch1, needle0) : cmplwi(CCR1, ch1, needleChar);
3218    beq(CCR1, L_Found3);
3219 //21:
3220   bind(L_NotFound);
3221    li(result, -1);           // Not found.
3222    b(L_End);
3223 
3224   bind(L_Found2);
3225    addi(addr, addr, 2);
3226 //24:
3227   bind(L_Found1);
3228   bind(L_Found3);                  // Return index ...
3229    subf(addr, haystack, addr); // relative to haystack,
3230    srdi(result, addr, 1);      // in characters.
3231   bind(L_End);
3232 }
3233 
3234 
3235 // Implementation of IndexOf for jchar arrays.
3236 //
3237 // The length of haystack and needle are not constant, i.e. passed in a register.


3382 //48:
3383   bind(L_End);
3384 }
3385 
3386 // Implementation of Compare for jchar arrays.
3387 //
3388 // Kills the registers str1, str2, cnt1, cnt2.
3389 // Kills cr0, ctr.
3390 // Assumes that result differes from the input registers.
3391 void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg,
3392                                     Register result_reg, Register tmp_reg) {
3393    assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg);
3394 
3395    Label Ldone, Lslow_case, Lslow_loop, Lfast_loop;
3396    Register cnt_diff = R0,
3397             limit_reg = cnt1_reg,
3398             chr1_reg = result_reg,
3399             chr2_reg = cnt2_reg,
3400             addr_diff = str2_reg;
3401 





3402    // Offset 0 should be 32 byte aligned.



3403 //-4:
3404     dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
3405     dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
3406 //-2:
3407    // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters).
3408     subf(result_reg, cnt2_reg, cnt1_reg);  // difference between cnt1/2
3409     subf_(addr_diff, str1_reg, str2_reg);  // alias?
3410     beq(CCR0, Ldone);                   // return cnt difference if both ones are identical
3411     srawi(limit_reg, result_reg, 31);      // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow)
3412     mr(cnt_diff, result_reg);
3413     andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt1<cnt2 ? cnt1-cnt2 : 0
3414     add_(limit_reg, cnt2_reg, limit_reg);  // min(cnt1, cnt2)==0?
3415     beq(CCR0, Ldone);                   // return cnt difference if one has 0 length
3416 
3417     lhz(chr1_reg, 0, str1_reg);            // optional: early out if first characters mismatch
3418     lhzx(chr2_reg, str1_reg, addr_diff);   // optional: early out if first characters mismatch
3419     addi(tmp_reg, limit_reg, -1);          // min(cnt1, cnt2)-1
3420     subf_(result_reg, chr2_reg, chr1_reg); // optional: early out if first characters mismatch
3421     bne(CCR0, Ldone);                   // optional: early out if first characters mismatch
3422 


3461 
3462 // Compare char[] arrays.
3463 //
3464 // str1_reg   USE only
3465 // str2_reg   USE only
3466 // cnt_reg    USE_DEF, due to tmp reg shortage
3467 // result_reg DEF only, might compromise USE only registers
3468 void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg,
3469                                         Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg,
3470                                         Register tmp5_reg) {
3471 
3472   // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
3473   assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
3474   assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
3475 
3476   // Offset 0 should be 32 byte aligned.
3477   Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false;
3478   Register index_reg = tmp5_reg;
3479   Register cbc_iter  = tmp4_reg;
3480 





3481 //-1:
3482   dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
3483   dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
3484 //1:
3485   andi(cbc_iter, cnt_reg, 4-1);            // Remaining iterations after 4 java characters per iteration loop.

3486   li(index_reg, 0); // init
3487   li(result_reg, 0); // assume false
3488   srwi_(tmp2_reg, cnt_reg, exact_log2(4)); // Div: 4 java characters per iteration (main loop).

3489 
3490   cmpwi(CCR1, cbc_iter, 0);             // CCR1 = (cbc_iter==0)
3491   beq(CCR0, Linit_cbc);                 // too short
3492     mtctr(tmp2_reg);
3493 //8:
3494     bind(Lloop);
3495       ldx(tmp1_reg, str1_reg, index_reg);
3496       ldx(tmp2_reg, str2_reg, index_reg);
3497       cmpd(CCR0, tmp1_reg, tmp2_reg);
3498       bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
3499       addi(index_reg, index_reg, 4*sizeof(jchar));
3500       bdnz(Lloop);
3501 //14:
3502   bind(Linit_cbc);
3503   beq(CCR1, Ldone_true);
3504     mtctr(cbc_iter);
3505 //16:
3506     bind(Lcbc);
3507       lhzx(tmp1_reg, str1_reg, index_reg);
3508       lhzx(tmp2_reg, str2_reg, index_reg);
3509       cmpw(CCR0, tmp1_reg, tmp2_reg);
3510       bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
3511       addi(index_reg, index_reg, 1*sizeof(jchar));
3512       bdnz(Lcbc);
3513     nop();
3514   bind(Ldone_true);
3515   li(result_reg, 1);
3516 //24:
3517   bind(Ldone_false);
3518 }
3519 
3520 
3521 void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
3522                                            Register tmp1_reg, Register tmp2_reg) {
3523   // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
3524   assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg);
3525   assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg);
3526   assert(sizeof(jchar) == 2, "must be");
3527   assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate");





3528 
3529   Label Ldone_false;
3530 
3531   if (cntval < 16) { // short case
3532     if (cntval != 0) li(result_reg, 0); // assume false
3533 
3534     const int num_bytes = cntval*sizeof(jchar);
3535     int index = 0;
3536     for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) {
3537       ld(tmp1_reg, index, str1_reg);
3538       ld(tmp2_reg, index, str2_reg);
3539       cmpd(CCR0, tmp1_reg, tmp2_reg);
3540       bne(CCR0, Ldone_false);
3541     }
3542     if (cntval & 2) {
3543       lwz(tmp1_reg, index, str1_reg);
3544       lwz(tmp2_reg, index, str2_reg);
3545       cmpw(CCR0, tmp1_reg, tmp2_reg);
3546       bne(CCR0, Ldone_false);
3547       index += 4;




3155   bind(small_rest);
3156     cmpdi(CCR0, cnt_dwords, 0);        // size 0?
3157     beq(CCR0, done);                   // rest == 0
3158     li(tmp, 0);
3159     mtctr(cnt_dwords);                 // Load counter.
3160 //24:
3161   bind(restloop);                      // Clear rest.
3162     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
3163     addi(base_ptr, base_ptr, 8);
3164     bdnz(restloop);
3165 //27:
3166   bind(done);
3167 }
3168 
3169 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3170 
3171 // Search for a single jchar in an jchar[].
3172 //
3173 // Assumes that result differs from all other registers.
3174 //
3175 // 'haystack' is the addresses of a jchar-array.
3176 // 'needle' is either the character to search for or R0.
3177 // 'needleChar' is the character to search for if 'needle' == R0..
3178 // 'haycnt' is the length of the haystack. We assume 'haycnt' >=1.
3179 //
3180 // Preserves haystack, haycnt, needle and kills all other registers.
3181 //
3182 // If needle == R0, we search for the constant needleChar.
3183 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
3184                                       Register needle, jchar needleChar,
3185                                       Register tmp1, Register tmp2) {
3186 
3187   assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
3188 
3189   Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
3190   Register addr = tmp1,

3191            ch1 = tmp2,
3192            ch2 = R0;
3193 
3194 //2:

3195    dcbtct(haystack, 0x00);                        // Indicate R/O access to haystack.
3196 
3197    srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
3198    mr(addr, haystack);
3199    beq(CCR0, L_FinalCheck);
3200    mtctr(tmp2);              // Move to count register.
3201 //8:
3202   bind(L_InnerLoop);             // Main work horse (2x unrolled search loop).
3203    lhz(ch1, 0, addr);        // Load characters from haystack.
3204    lhz(ch2, 2, addr);
3205    (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, needleChar);
3206    (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, needleChar);
3207    beq(CCR0, L_Found1);   // Did we find the needle?
3208    beq(CCR1, L_Found2);
3209    addi(addr, addr, 4);
3210    bdnz(L_InnerLoop);
3211 //16:
3212   bind(L_FinalCheck);
3213    andi_(R0, haycnt, 1);
3214    beq(CCR0, L_NotFound);
3215    lhz(ch1, 0, addr);        // One position left at which we have to compare.
3216    (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, needleChar);
3217    beq(CCR1, L_Found3);
3218 //21:
3219   bind(L_NotFound);
3220    li(result, -1);           // Not found.
3221    b(L_End);
3222 
3223   bind(L_Found2);
3224    addi(addr, addr, 2);
3225 //24:
3226   bind(L_Found1);
3227   bind(L_Found3);                  // Return index ...
3228    subf(addr, haystack, addr); // relative to haystack,
3229    srdi(result, addr, 1);      // in characters.
3230   bind(L_End);
3231 }
3232 
3233 
3234 // Implementation of IndexOf for jchar arrays.
3235 //
3236 // The length of haystack and needle are not constant, i.e. passed in a register.


3381 //48:
3382   bind(L_End);
3383 }
3384 
3385 // Implementation of Compare for jchar arrays.
3386 //
3387 // Kills the registers str1, str2, cnt1, cnt2.
3388 // Kills cr0, ctr.
3389 // Assumes that result differes from the input registers.
3390 void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg,
3391                                     Register result_reg, Register tmp_reg) {
3392    assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg);
3393 
3394    Label Ldone, Lslow_case, Lslow_loop, Lfast_loop;
3395    Register cnt_diff = R0,
3396             limit_reg = cnt1_reg,
3397             chr1_reg = result_reg,
3398             chr2_reg = cnt2_reg,
3399             addr_diff = str2_reg;
3400 
3401    // 'cnt_reg' contains the number of characters in the string's character array for the
3402    // pre-CompactStrings strings implementation and the number of bytes in the string's
3403    // byte array for the CompactStrings strings implementation.
3404    const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array
3405 
3406    // Offset 0 should be 32 byte aligned.
3407 //-6:
3408     srawi(cnt1_reg, cnt1_reg, HAS_COMPACT_STRING);
3409     srawi(cnt2_reg, cnt2_reg, HAS_COMPACT_STRING);
3410 //-4:
3411     dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
3412     dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
3413 //-2:
3414    // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters).
3415     subf(result_reg, cnt2_reg, cnt1_reg);  // difference between cnt1/2
3416     subf_(addr_diff, str1_reg, str2_reg);  // alias?
3417     beq(CCR0, Ldone);                   // return cnt difference if both ones are identical
3418     srawi(limit_reg, result_reg, 31);      // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow)
3419     mr(cnt_diff, result_reg);
3420     andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt1<cnt2 ? cnt1-cnt2 : 0
3421     add_(limit_reg, cnt2_reg, limit_reg);  // min(cnt1, cnt2)==0?
3422     beq(CCR0, Ldone);                   // return cnt difference if one has 0 length
3423 
3424     lhz(chr1_reg, 0, str1_reg);            // optional: early out if first characters mismatch
3425     lhzx(chr2_reg, str1_reg, addr_diff);   // optional: early out if first characters mismatch
3426     addi(tmp_reg, limit_reg, -1);          // min(cnt1, cnt2)-1
3427     subf_(result_reg, chr2_reg, chr1_reg); // optional: early out if first characters mismatch
3428     bne(CCR0, Ldone);                   // optional: early out if first characters mismatch
3429 


3468 
3469 // Compare char[] arrays.
3470 //
3471 // str1_reg   USE only
3472 // str2_reg   USE only
3473 // cnt_reg    USE_DEF, due to tmp reg shortage
3474 // result_reg DEF only, might compromise USE only registers
3475 void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg,
3476                                         Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg,
3477                                         Register tmp5_reg) {
3478 
3479   // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
3480   assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
3481   assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
3482 
3483   // Offset 0 should be 32 byte aligned.
3484   Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false;
3485   Register index_reg = tmp5_reg;
3486   Register cbc_iter  = tmp4_reg;
3487 
3488   // 'cnt_reg' contains the number of characters in the string's character array for the
3489   // pre-CompactStrings strings implementation and the number of bytes in the string's
3490   // byte array for the CompactStrings strings implementation.
3491   const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array
3492 
3493 //-1:
3494   dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
3495   dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
3496 //1:
3497   // cbc_iter: remaining characters after the '4 java characters per iteration' loop.
3498   rlwinm(cbc_iter, cnt_reg, 32 - HAS_COMPACT_STRING, 30, 31); // (cnt_reg % (HAS_COMPACT_STRING ? 8 : 4)) >> HAS_COMPACT_STRING
3499   li(index_reg, 0); // init
3500   li(result_reg, 0); // assume false
3501   // tmp2_reg: units of 4 java characters (i.e. 8 bytes) per iteration (main loop).
3502   srwi_(tmp2_reg, cnt_reg, exact_log2(4 << HAS_COMPACT_STRING)); // cnt_reg / (HAS_COMPACT_STRING ? 8 : 4)
3503 
3504   cmpwi(CCR1, cbc_iter, 0);             // CCR1 = (cbc_iter==0)
3505   beq(CCR0, Linit_cbc);                 // too short
3506     mtctr(tmp2_reg);
3507 //8:
3508     bind(Lloop);
3509       ldx(tmp1_reg, str1_reg, index_reg);
3510       ldx(tmp2_reg, str2_reg, index_reg);
3511       cmpd(CCR0, tmp1_reg, tmp2_reg);
3512       bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
3513       addi(index_reg, index_reg, 4*sizeof(jchar));
3514       bdnz(Lloop);
3515 //14:
3516   bind(Linit_cbc);
3517   beq(CCR1, Ldone_true);
3518     mtctr(cbc_iter);
3519 //16:
3520     bind(Lcbc);
3521       lhzx(tmp1_reg, str1_reg, index_reg);
3522       lhzx(tmp2_reg, str2_reg, index_reg);
3523       cmpw(CCR0, tmp1_reg, tmp2_reg);
3524       bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
3525       addi(index_reg, index_reg, 1*sizeof(jchar));
3526       bdnz(Lcbc);
3527     nop();
3528   bind(Ldone_true);
3529   li(result_reg, 1);
3530 //24:
3531   bind(Ldone_false);
3532 }
3533 
3534 
3535 void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
3536                                            Register tmp1_reg, Register tmp2_reg) {
3537   // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
3538   assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg);
3539   assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg);
3540   assert(sizeof(jchar) == 2, "must be");
3541   assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate");
3542 
3543   // 'cntval' contains the number of characters in the string's character array for the
3544   // pre-CompactStrings strings implementation and the number of bytes in the string's
3545   // byte array for the CompactStrings strings implementation.
3546   cntval >>= (java_lang_String::has_coder_field() ? 1 : 0); // '1' = byte array strings, '0' = char array strings
3547 
3548   Label Ldone_false;
3549 
3550   if (cntval < 16) { // short case
3551     if (cntval != 0) li(result_reg, 0); // assume false
3552 
3553     const int num_bytes = cntval*sizeof(jchar);
3554     int index = 0;
3555     for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) {
3556       ld(tmp1_reg, index, str1_reg);
3557       ld(tmp2_reg, index, str2_reg);
3558       cmpd(CCR0, tmp1_reg, tmp2_reg);
3559       bne(CCR0, Ldone_false);
3560     }
3561     if (cntval & 2) {
3562       lwz(tmp1_reg, index, str1_reg);
3563       lwz(tmp2_reg, index, str2_reg);
3564       cmpw(CCR0, tmp1_reg, tmp2_reg);
3565       bne(CCR0, Ldone_false);
3566       index += 4;


< prev index next >