< prev index next >

src/cpu/ppc/vm/macroAssembler_ppc.cpp

Print this page
rev 10235 : 8149655: PPC64: Implement CompactString intrinsics
Reviewed-by: goetz


  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/cardTableModRefBS.hpp"
  30 #include "gc/shared/collectedHeap.inline.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "nativeInst_ppc.hpp"
  34 #include "prims/methodHandles.hpp"
  35 #include "runtime/biasedLocking.hpp"
  36 #include "runtime/icache.hpp"
  37 #include "runtime/interfaceSupport.hpp"
  38 #include "runtime/objectMonitor.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "utilities/macros.hpp"
  43 #if INCLUDE_ALL_GCS
  44 #include "gc/g1/g1CollectedHeap.inline.hpp"
  45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  46 #include "gc/g1/heapRegion.hpp"
  47 #endif // INCLUDE_ALL_GCS



  48 
  49 #ifdef PRODUCT
  50 #define BLOCK_COMMENT(str) // nothing
  51 #else
  52 #define BLOCK_COMMENT(str) block_comment(str)
  53 #endif
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 
  56 #ifdef ASSERT
  57 // On RISC, there's no benefit to verifying instruction boundaries.
  58 bool AbstractAssembler::pd_check_instruction_mark() { return false; }
  59 #endif
  60 
  61 void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) {
  62   assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range");
  63   if (Assembler::is_simm(si31, 16)) {
  64     ld(d, si31, a);
  65     if (emit_filler_nop) nop();
  66   } else {
  67     const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31);


3151     addi(base_ptr, base_ptr, cl_size);
3152     bdnz(fastloop);
3153     if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
3154 //20:
3155   bind(small_rest);
3156     cmpdi(CCR0, cnt_dwords, 0);        // size 0?
3157     beq(CCR0, done);                   // rest == 0
3158     li(tmp, 0);
3159     mtctr(cnt_dwords);                 // Load counter.
3160 //24:
3161   bind(restloop);                      // Clear rest.
3162     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
3163     addi(base_ptr, base_ptr, 8);
3164     bdnz(restloop);
3165 //27:
3166   bind(done);
3167 }
3168 
3169 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3170 



































































































































































































































































































































































































































































































































































3171 // Search for a single jchar in an jchar[].
3172 //
3173 // Assumes that result differs from all other registers.
3174 //
3175 // 'haystack' is the addresses of a jchar-array.
3176 // 'needle' is either the character to search for or R0.
3177 // 'needleChar' is the character to search for if 'needle' == R0..
3178 // 'haycnt' is the length of the haystack. We assume 'haycnt' >=1.
3179 //
3180 // Preserves haystack, haycnt, needle and kills all other registers.
3181 //
3182 // If needle == R0, we search for the constant needleChar.
3183 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
3184                                       Register needle, jchar needleChar,
3185                                       Register tmp1, Register tmp2) {
3186 
3187   assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
3188 
3189   Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
3190   Register addr = tmp1,


3596     //14:
3597     if (cntval & 2) {
3598       lwzx(R0, str1_reg, index_reg);
3599       lwzx(tmp2_reg, str2_reg, index_reg);
3600       cmpw(CCR0, R0, tmp2_reg);
3601       bne(CCR0, Ldone_false);
3602       if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
3603     }
3604     if (cntval & 1) {
3605       lhzx(R0, str1_reg, index_reg);
3606       lhzx(tmp2_reg, str2_reg, index_reg);
3607       cmpw(CCR0, R0, tmp2_reg);
3608       bne(CCR0, Ldone_false);
3609     }
3610     // fallthru: true
3611   }
3612   li(result_reg, 1);
3613   bind(Ldone_false);
3614 }
3615 


3616 // Helpers for Intrinsic Emitters
3617 //
3618 // Revert the byte order of a 32bit value in a register
3619 //   src: 0x44556677
3620 //   dst: 0x77665544
3621 // Three steps to obtain the result:
3622 //  1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word
3623 //     into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared.
3624 //     This value initializes dst.
3625 //  2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost
3626 //     byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go.
3627 //     This value is mask inserted into dst with a [0..23] mask of 1s.
3628 //  3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position.
3629 //     This value is mask inserted into dst with a [8..15] mask of 1s.
3630 void MacroAssembler::load_reverse_32(Register dst, Register src) {
3631   assert_different_registers(dst, src);
3632 
3633   rldicl(dst, src, (4+1)*8, 56);       // Rotate byte 4 into position 7 (rightmost), clear all to the left.
3634   rlwimi(dst, src,     3*8,  0, 23);   // Insert byte 5 into position 6, 7 into 4, leave pos 7 alone.
3635   rlwimi(dst, src,     1*8,  8, 15);   // Insert byte 6 into position 5, leave the rest alone.




  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/cardTableModRefBS.hpp"
  30 #include "gc/shared/collectedHeap.inline.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "nativeInst_ppc.hpp"
  34 #include "prims/methodHandles.hpp"
  35 #include "runtime/biasedLocking.hpp"
  36 #include "runtime/icache.hpp"
  37 #include "runtime/interfaceSupport.hpp"
  38 #include "runtime/objectMonitor.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "utilities/macros.hpp"
  43 #if INCLUDE_ALL_GCS
  44 #include "gc/g1/g1CollectedHeap.inline.hpp"
  45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  46 #include "gc/g1/heapRegion.hpp"
  47 #endif // INCLUDE_ALL_GCS
  48 #ifdef COMPILER2
  49 #include "opto/intrinsicnode.hpp"
  50 #endif
  51 
  52 #ifdef PRODUCT
  53 #define BLOCK_COMMENT(str) // nothing
  54 #else
  55 #define BLOCK_COMMENT(str) block_comment(str)
  56 #endif
  57 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  58 
  59 #ifdef ASSERT
  60 // On RISC, there's no benefit to verifying instruction boundaries.
  61 bool AbstractAssembler::pd_check_instruction_mark() { return false; }
  62 #endif
  63 
  64 void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) {
  65   assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range");
  66   if (Assembler::is_simm(si31, 16)) {
  67     ld(d, si31, a);
  68     if (emit_filler_nop) nop();
  69   } else {
  70     const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31);


3154     addi(base_ptr, base_ptr, cl_size);
3155     bdnz(fastloop);
3156     if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
3157 //20:
3158   bind(small_rest);
3159     cmpdi(CCR0, cnt_dwords, 0);        // size 0?
3160     beq(CCR0, done);                   // rest == 0
3161     li(tmp, 0);
3162     mtctr(cnt_dwords);                 // Load counter.
3163 //24:
3164   bind(restloop);                      // Clear rest.
3165     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
3166     addi(base_ptr, base_ptr, 8);
3167     bdnz(restloop);
3168 //27:
3169   bind(done);
3170 }
3171 
3172 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3173 
3174 #ifdef COMPILER2
3175 // Intrinsics for CompactStrings
3176 
3177 // Compress char[] to byte[] by compressing 16 bytes at once.
3178 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
3179                                         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
3180                                         Label& Lfailure) {
3181 
3182   const Register tmp0 = R0;
3183   assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3184   Label Lloop, Lslow;
3185 
3186   // Check if cnt >= 8 (= 16 bytes)
3187   lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF00FF00FF
3188   srwi_(tmp2, cnt, 3);
3189   beq(CCR0, Lslow);
3190   ori(tmp1, tmp1, 0xFF);
3191   rldimi(tmp1, tmp1, 32, 0);
3192   mtctr(tmp2);
3193 
3194   // 2x unrolled loop
3195   bind(Lloop);
3196   ld(tmp2, 0, src);               // _0_1_2_3 (Big Endian)
3197   ld(tmp4, 8, src);               // _4_5_6_7
3198 
3199   orr(tmp0, tmp2, tmp4);
3200   rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
3201   rldimi(tmp2, tmp2, 2*8, 2*8);   // _0_2_3_3
3202   rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
3203   rldimi(tmp4, tmp4, 2*8, 2*8);   // _4_6_7_7
3204 
3205   andc_(tmp0, tmp0, tmp1);
3206   bne(CCR0, Lfailure);            // Not latin1.
3207   addi(src, src, 16);
3208 
3209   rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
3210   srdi(tmp2, tmp2, 3*8);          // ____0_2_
3211   rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
3212   srdi(tmp4, tmp4, 3*8);          // ____4_6_
3213 
3214   orr(tmp2, tmp2, tmp3);          // ____0123
3215   orr(tmp4, tmp4, tmp5);          // ____4567
3216 
3217   stw(tmp2, 0, dst);
3218   stw(tmp4, 4, dst);
3219   addi(dst, dst, 8);
3220   bdnz(Lloop);
3221 
3222   bind(Lslow);                    // Fallback to slow version
3223 }
3224 
3225 // Compress char[] to byte[]. cnt must be positive int.
3226 void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
3227   Label Lloop;
3228   mtctr(cnt);
3229 
3230   bind(Lloop);
3231   lhz(tmp, 0, src);
3232   cmplwi(CCR0, tmp, 0xff);
3233   bgt(CCR0, Lfailure);            // Not latin1.
3234   addi(src, src, 2);
3235   stb(tmp, 0, dst);
3236   addi(dst, dst, 1);
3237   bdnz(Lloop);
3238 }
3239 
3240 // Inflate byte[] to char[] by inflating 16 bytes at once.
3241 void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
3242                                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
3243   const Register tmp0 = R0;
3244   assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3245   Label Lloop, Lslow;
3246 
3247   // Check if cnt >= 8
3248   srwi_(tmp2, cnt, 3);
3249   beq(CCR0, Lslow);
3250   lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF
3251   ori(tmp1, tmp1, 0xFF);
3252   mtctr(tmp2);
3253 
3254   // 2x unrolled loop
3255   bind(Lloop);
3256   lwz(tmp2, 0, src);              // ____0123 (Big Endian)
3257   lwz(tmp4, 4, src);              // ____4567
3258   addi(src, src, 8);
3259 
3260   rldicl(tmp3, tmp2, 7*8, 64-8);  // _______2
3261   rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
3262   rldicl(tmp5, tmp4, 7*8, 64-8);  // _______6
3263   rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
3264 
3265   andc(tmp0, tmp2, tmp1);         // ____0_1_
3266   rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
3267   andc(tmp3, tmp4, tmp1);         // ____4_5_
3268   rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
3269 
3270   rldimi(tmp2, tmp0, 3*8, 0*8);   // _0_1_2_3
3271   rldimi(tmp4, tmp3, 3*8, 0*8);   // _4_5_6_7
3272 
3273   std(tmp2, 0, dst);
3274   std(tmp4, 8, dst);
3275   addi(dst, dst, 16);
3276   bdnz(Lloop);
3277 
3278   bind(Lslow);                    // Fallback to slow version
3279 }
3280 
3281 // Inflate byte[] to char[]. cnt must be positive int.
3282 void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
3283   Label Lloop;
3284   mtctr(cnt);
3285 
3286   bind(Lloop);
3287   lbz(tmp, 0, src);
3288   addi(src, src, 1);
3289   sth(tmp, 0, dst);
3290   addi(dst, dst, 2);
3291   bdnz(Lloop);
3292 }
3293 
3294 void MacroAssembler::string_compare(Register str1, Register str2,
3295                                     Register cnt1, Register cnt2,
3296                                     Register tmp1, Register result, int ae) {
3297   const Register tmp0 = R0,
3298                  diff = tmp1;
3299 
3300   assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
3301   Label Ldone, Lslow, Lloop, Lreturn_diff;
3302 
3303   // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
3304   // we interchange str1 and str2 in the UL case and negate the result.
3305   // Like this, str1 is always latin1 encoded, except for the UU case.
3306   // In addition, we need 0 (or sign which is 0) extend.
3307 
3308   if (ae == StrIntrinsicNode::UU) {
3309     srwi(cnt1, cnt1, 1);
3310   } else {
3311     clrldi(cnt1, cnt1, 32);
3312   }
3313 
3314   if (ae != StrIntrinsicNode::LL) {
3315     srwi(cnt2, cnt2, 1);
3316   } else {
3317     clrldi(cnt2, cnt2, 32);
3318   }
3319 
3320   // See if the lengths are different, and calculate min in cnt1.
3321   // Save diff in case we need it for a tie-breaker.
3322   subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
3323   // if (diff > 0) { cnt1 = cnt2; }
3324   if (VM_Version::has_isel()) {
3325     isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
3326   } else {
3327     Label Lskip;
3328     blt(CCR0, Lskip);
3329     mr(cnt1, cnt2);
3330     bind(Lskip);
3331   }
3332 
3333   // Rename registers
3334   Register chr1 = result;
3335   Register chr2 = tmp0;
3336 
3337   // Compare multiple characters in fast loop (only implemented for same encoding).
3338   int stride1 = 8, stride2 = 8;
3339   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3340     int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
3341     Label Lfastloop, Lskipfast;
3342 
3343     srwi_(tmp0, cnt1, log2_chars_per_iter);
3344     beq(CCR0, Lskipfast);
3345     rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
3346     li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
3347     mtctr(tmp0);
3348 
3349     bind(Lfastloop);
3350     ld(chr1, 0, str1);
3351     ld(chr2, 0, str2);
3352     cmpd(CCR0, chr1, chr2);
3353     bne(CCR0, Lslow);
3354     addi(str1, str1, stride1);
3355     addi(str2, str2, stride2);
3356     bdnz(Lfastloop);
3357     mr(cnt1, cnt2); // Remaining characters.
3358     bind(Lskipfast);
3359   }
3360 
3361   // Loop which searches the first difference character by character.
3362   cmpwi(CCR0, cnt1, 0);
3363   beq(CCR0, Lreturn_diff);
3364   bind(Lslow);
3365   mtctr(cnt1);
3366 
3367   switch (ae) {
3368     case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
3369     case StrIntrinsicNode::UL: // fallthru (see comment above)
3370     case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
3371     case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
3372     default: ShouldNotReachHere(); break;
3373   }
3374 
3375   bind(Lloop);
3376   if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
3377   if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
3378   subf_(result, chr2, chr1); // result = chr1 - chr2
3379   bne(CCR0, Ldone);
3380   addi(str1, str1, stride1);
3381   addi(str2, str2, stride2);
3382   bdnz(Lloop);
3383 
3384   // If strings are equal up to min length, return the length difference.
3385   bind(Lreturn_diff);
3386   mr(result, diff);
3387 
3388   // Otherwise, return the difference between the first mismatched chars.
3389   bind(Ldone);
3390   if (ae == StrIntrinsicNode::UL) {
3391     neg(result, result); // Negate result (see note above).
3392   }
3393 }
3394 
3395 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
3396                                   Register limit, Register tmp1, Register result, bool is_byte) {
3397   const Register tmp0 = R0;
3398   assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
3399   Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
3400   bool limit_needs_shift = false;
3401 
3402   if (is_array_equ) {
3403     const int length_offset = arrayOopDesc::length_offset_in_bytes();
3404     const int base_offset   = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
3405 
3406     // Return true if the same array.
3407     cmpd(CCR0, ary1, ary2);
3408     beq(CCR0, Lskiploop);
3409 
3410     // Return false if one of them is NULL.
3411     cmpdi(CCR0, ary1, 0);
3412     cmpdi(CCR1, ary2, 0);
3413     li(result, 0);
3414     cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
3415     beq(CCR0, Ldone);
3416 
3417     // Load the lengths of arrays.
3418     lwz(limit, length_offset, ary1);
3419     lwz(tmp0, length_offset, ary2);
3420 
3421     // Return false if the two arrays are not equal length.
3422     cmpw(CCR0, limit, tmp0);
3423     bne(CCR0, Ldone);
3424 
3425     // Load array addresses.
3426     addi(ary1, ary1, base_offset);
3427     addi(ary2, ary2, base_offset);
3428   } else {
3429     limit_needs_shift = !is_byte;
3430     li(result, 0); // Assume not equal.
3431   }
3432 
3433   // Rename registers
3434   Register chr1 = tmp0;
3435   Register chr2 = tmp1;
3436 
3437   // Compare 8 bytes per iteration in fast loop.
3438   const int log2_chars_per_iter = is_byte ? 3 : 2;
3439 
3440   srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
3441   beq(CCR0, Lskipfast);
3442   mtctr(tmp0);
3443 
3444   bind(Lfastloop);
3445   ld(chr1, 0, ary1);
3446   ld(chr2, 0, ary2);
3447   addi(ary1, ary1, 8);
3448   addi(ary2, ary2, 8);
3449   cmpd(CCR0, chr1, chr2);
3450   bne(CCR0, Ldone);
3451   bdnz(Lfastloop);
3452 
3453   bind(Lskipfast);
3454   rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
3455   beq(CCR0, Lskiploop);
3456   mtctr(limit);
3457 
3458   // Character by character.
3459   bind(Lloop);
3460   if (is_byte) {
3461     lbz(chr1, 0, ary1);
3462     lbz(chr2, 0, ary2);
3463     addi(ary1, ary1, 1);
3464     addi(ary2, ary2, 1);
3465   } else {
3466     lhz(chr1, 0, ary1);
3467     lhz(chr2, 0, ary2);
3468     addi(ary1, ary1, 2);
3469     addi(ary2, ary2, 2);
3470   }
3471   cmpw(CCR0, chr1, chr2);
3472   bne(CCR0, Ldone);
3473   bdnz(Lloop);
3474 
3475   bind(Lskiploop);
3476   li(result, 1); // All characters are equal.
3477   bind(Ldone);
3478 }
3479 
3480 void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
3481                                     Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
3482                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
3483 
3484   // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
3485   Label L_TooShort, L_Found, L_NotFound, L_End;
3486   Register last_addr = haycnt, // Kill haycnt at the beginning.
3487   addr      = tmp1,
3488   n_start   = tmp2,
3489   ch1       = tmp3,
3490   ch2       = R0;
3491 
3492   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
3493   const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
3494   const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
3495 
3496   // **************************************************************************************************
3497   // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
3498   // **************************************************************************************************
3499 
3500   // Compute last haystack addr to use if no match gets found.
3501   clrldi(haycnt, haycnt, 32);         // Ensure positive int is valid as 64 bit value.
3502   addi(addr, haystack, -h_csize);     // Accesses use pre-increment.
3503   if (needlecntval == 0) { // variable needlecnt
3504    cmpwi(CCR6, needlecnt, 2);
3505    clrldi(needlecnt, needlecnt, 32);  // Ensure positive int is valid as 64 bit value.
3506    blt(CCR6, L_TooShort);             // Variable needlecnt: handle short needle separately.
3507   }
3508 
3509   if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
3510 
3511   if (needlecntval == 0) { // variable needlecnt
3512    subf(ch1, needlecnt, haycnt);      // Last character index to compare is haycnt-needlecnt.
3513    addi(needlecnt, needlecnt, -2);    // Rest of needle.
3514   } else { // constant needlecnt
3515   guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
3516   assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
3517    addi(ch1, haycnt, -needlecntval);  // Last character index to compare is haycnt-needlecnt.
3518    if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
3519   }
3520 
3521   if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
3522 
3523   if (ae ==StrIntrinsicNode::UL) {
3524    srwi(tmp4, n_start, 1*8);          // ___0
3525    rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
3526   }
3527 
3528   add(last_addr, haystack, ch1);      // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
3529 
3530   // Main Loop (now we have at least 2 characters).
3531   Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
3532   bind(L_OuterLoop); // Search for 1st 2 characters.
3533   Register addr_diff = tmp4;
3534    subf(addr_diff, addr, last_addr);  // Difference between already checked address and last address to check.
3535    addi(addr, addr, h_csize);         // This is the new address we want to use for comparing.
3536    srdi_(ch2, addr_diff, h_csize);
3537    beq(CCR0, L_FinalCheck);           // 2 characters left?
3538    mtctr(ch2);                        // num of characters / 2
3539   bind(L_InnerLoop);                  // Main work horse (2x unrolled search loop)
3540    if (h_csize == 2) {                // Load 2 characters of haystack (ignore alignment).
3541     lwz(ch1, 0, addr);
3542     lwz(ch2, 2, addr);
3543    } else {
3544     lhz(ch1, 0, addr);
3545     lhz(ch2, 1, addr);
3546    }
3547    cmpw(CCR0, ch1, n_start);          // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
3548    cmpw(CCR1, ch2, n_start);
3549    beq(CCR0, L_Comp1);                // Did we find the needle start?
3550    beq(CCR1, L_Comp2);
3551    addi(addr, addr, 2 * h_csize);
3552    bdnz(L_InnerLoop);
3553   bind(L_FinalCheck);
3554    andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
3555    beq(CCR0, L_NotFound);
3556    if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
3557    cmpw(CCR1, ch1, n_start);
3558    beq(CCR1, L_Comp1);
3559   bind(L_NotFound);
3560    li(result, -1);                    // not found
3561    b(L_End);
3562 
3563    // **************************************************************************************************
3564    // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
3565    // **************************************************************************************************
3566   if (needlecntval == 0) {           // We have to handle these cases separately.
3567   Label L_OneCharLoop;
3568   bind(L_TooShort);
3569    mtctr(haycnt);
3570    if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
3571   bind(L_OneCharLoop);
3572    if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
3573    cmpw(CCR1, ch1, n_start);
3574    beq(CCR1, L_Found);               // Did we find the one character needle?
3575    bdnz(L_OneCharLoop);
3576    li(result, -1);                   // Not found.
3577    b(L_End);
3578   }
3579 
3580   // **************************************************************************************************
3581   // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
3582   // **************************************************************************************************
3583 
3584   // Compare the rest
3585   bind(L_Comp2);
3586    addi(addr, addr, h_csize);        // First comparison has failed, 2nd one hit.
3587   bind(L_Comp1);                     // Addr points to possible needle start.
3588   if (needlecntval != 2) {           // Const needlecnt==2?
3589    if (needlecntval != 3) {
3590     if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
3591     Register n_ind = tmp4,
3592              h_ind = n_ind;
3593     li(n_ind, 2 * n_csize);          // First 2 characters are already compared, use index 2.
3594     mtctr(needlecnt);                // Decremented by 2, still > 0.
3595    Label L_CompLoop;
3596    bind(L_CompLoop);
3597     if (ae ==StrIntrinsicNode::UL) {
3598       h_ind = ch1;
3599       sldi(h_ind, n_ind, 1);
3600     }
3601     if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
3602     if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
3603     cmpw(CCR1, ch1, ch2);
3604     bne(CCR1, L_OuterLoop);
3605     addi(n_ind, n_ind, n_csize);
3606     bdnz(L_CompLoop);
3607    } else { // No loop required if there's only one needle character left.
3608     if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
3609     if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
3610     cmpw(CCR1, ch1, ch2);
3611     bne(CCR1, L_OuterLoop);
3612    }
3613   }
3614   // Return index ...
3615   bind(L_Found);
3616    subf(result, haystack, addr);     // relative to haystack, ...
3617    if (h_csize == 2) { srdi(result, result, 1); } // in characters.
3618   bind(L_End);
3619 } // string_indexof
3620 
3621 void MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
3622                                          Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
3623   assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
3624 
3625   Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
3626   Register addr = tmp1,
3627            ch1 = tmp2,
3628            ch2 = R0;
3629 
3630   const int h_csize = is_byte ? 1 : 2;
3631 
3632 //4:
3633    srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
3634    mr(addr, haystack);
3635    beq(CCR0, L_FinalCheck);
3636    mtctr(tmp2);              // Move to count register.
3637 //8:
3638   bind(L_InnerLoop);         // Main work horse (2x unrolled search loop).
3639    if (!is_byte) {
3640     lhz(ch1, 0, addr);
3641     lhz(ch2, 2, addr);
3642    } else {
3643     lbz(ch1, 0, addr);
3644     lbz(ch2, 1, addr);
3645    }
3646    (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
3647    (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
3648    beq(CCR0, L_Found1);      // Did we find the needle?
3649    beq(CCR1, L_Found2);
3650    addi(addr, addr, 2 * h_csize);
3651    bdnz(L_InnerLoop);
3652 //16:
3653   bind(L_FinalCheck);
3654    andi_(R0, haycnt, 1);
3655    beq(CCR0, L_NotFound);
3656    if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
3657    (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
3658    beq(CCR1, L_Found1);
3659 //21:
3660   bind(L_NotFound);
3661    li(result, -1);           // Not found.
3662    b(L_End);
3663 
3664   bind(L_Found2);
3665    addi(addr, addr, h_csize);
3666 //24:
3667   bind(L_Found1);            // Return index ...
3668    subf(result, haystack, addr); // relative to haystack, ...
3669    if (!is_byte) { srdi(result, result, 1); } // in characters.
3670   bind(L_End);
3671 } // string_indexof_char
3672 
3673 
3674 void MacroAssembler::has_negatives(Register src, Register cnt, Register result,
3675                                    Register tmp1, Register tmp2) {
3676   const Register tmp0 = R0;
3677   assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
3678   Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
3679 
3680   // Check if cnt >= 8 (= 16 bytes)
3681   lis(tmp1, (int)(short)0x8080);  // tmp1 = 0x8080808080808080
3682   srwi_(tmp2, cnt, 4);
3683   li(result, 1);                  // Assume there's a negative byte.
3684   beq(CCR0, Lslow);
3685   ori(tmp1, tmp1, 0x8080);
3686   rldimi(tmp1, tmp1, 32, 0);
3687   mtctr(tmp2);
3688 
3689   // 2x unrolled loop
3690   bind(Lfastloop);
3691   ld(tmp2, 0, src);
3692   ld(tmp0, 8, src);
3693 
3694   orr(tmp0, tmp2, tmp0);
3695 
3696   and_(tmp0, tmp0, tmp1);
3697   bne(CCR0, Ldone);               // Found negative byte.
3698   addi(src, src, 16);
3699 
3700   bdnz(Lfastloop);
3701 
3702   bind(Lslow);                    // Fallback to slow version
3703   rldicl_(tmp0, cnt, 0, 64-4);
3704   beq(CCR0, Lnoneg);
3705   mtctr(tmp0);
3706   bind(Lloop);
3707   lbz(tmp0, 0, src);
3708   addi(src, src, 1);
3709   andi_(tmp0, tmp0, 0x80);
3710   bne(CCR0, Ldone);               // Found negative byte.
3711   bdnz(Lloop);
3712   bind(Lnoneg);
3713   li(result, 0);
3714 
3715   bind(Ldone);
3716 }
3717 
3718 
3719 // Intrinsics for non-CompactStrings
3720 
3721 // Search for a single jchar in an jchar[].
3722 //
3723 // Assumes that result differs from all other registers.
3724 //
3725 // 'haystack' is the addresses of a jchar-array.
3726 // 'needle' is either the character to search for or R0.
3727 // 'needleChar' is the character to search for if 'needle' == R0..
3728 // 'haycnt' is the length of the haystack. We assume 'haycnt' >=1.
3729 //
3730 // Preserves haystack, haycnt, needle and kills all other registers.
3731 //
3732 // If needle == R0, we search for the constant needleChar.
3733 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
3734                                       Register needle, jchar needleChar,
3735                                       Register tmp1, Register tmp2) {
3736 
3737   assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
3738 
3739   Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
3740   Register addr = tmp1,


4146     //14:
4147     if (cntval & 2) {
4148       lwzx(R0, str1_reg, index_reg);
4149       lwzx(tmp2_reg, str2_reg, index_reg);
4150       cmpw(CCR0, R0, tmp2_reg);
4151       bne(CCR0, Ldone_false);
4152       if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
4153     }
4154     if (cntval & 1) {
4155       lhzx(R0, str1_reg, index_reg);
4156       lhzx(tmp2_reg, str2_reg, index_reg);
4157       cmpw(CCR0, R0, tmp2_reg);
4158       bne(CCR0, Ldone_false);
4159     }
4160     // fallthru: true
4161   }
4162   li(result_reg, 1);
4163   bind(Ldone_false);
4164 }
4165 
4166 #endif // Compiler2
4167 
4168 // Helpers for Intrinsic Emitters
4169 //
4170 // Revert the byte order of a 32bit value in a register
4171 //   src: 0x44556677
4172 //   dst: 0x77665544
4173 // Three steps to obtain the result:
4174 //  1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word
4175 //     into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared.
4176 //     This value initializes dst.
4177 //  2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost
4178 //     byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go.
4179 //     This value is mask inserted into dst with a [0..23] mask of 1s.
4180 //  3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position.
4181 //     This value is mask inserted into dst with a [8..15] mask of 1s.
4182 void MacroAssembler::load_reverse_32(Register dst, Register src) {
4183   assert_different_registers(dst, src);
4184 
4185   rldicl(dst, src, (4+1)*8, 56);       // Rotate byte 4 into position 7 (rightmost), clear all to the left.
4186   rlwimi(dst, src,     3*8,  0, 23);   // Insert byte 5 into position 6, 7 into 4, leave pos 7 alone.
4187   rlwimi(dst, src,     1*8,  8, 15);   // Insert byte 6 into position 5, leave the rest alone.


< prev index next >