28 #include "compiler/disassembler.hpp"
29 #include "gc/shared/cardTableModRefBS.hpp"
30 #include "gc/shared/collectedHeap.inline.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "nativeInst_ppc.hpp"
34 #include "prims/methodHandles.hpp"
35 #include "runtime/biasedLocking.hpp"
36 #include "runtime/icache.hpp"
37 #include "runtime/interfaceSupport.hpp"
38 #include "runtime/objectMonitor.hpp"
39 #include "runtime/os.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "utilities/macros.hpp"
43 #if INCLUDE_ALL_GCS
44 #include "gc/g1/g1CollectedHeap.inline.hpp"
45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
46 #include "gc/g1/heapRegion.hpp"
47 #endif // INCLUDE_ALL_GCS
48
49 #ifdef PRODUCT
50 #define BLOCK_COMMENT(str) // nothing
51 #else
52 #define BLOCK_COMMENT(str) block_comment(str)
53 #endif
54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
55
56 #ifdef ASSERT
57 // On RISC, there's no benefit to verifying instruction boundaries.
58 bool AbstractAssembler::pd_check_instruction_mark() { return false; }
59 #endif
60
61 void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) {
62 assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range");
63 if (Assembler::is_simm(si31, 16)) {
64 ld(d, si31, a);
65 if (emit_filler_nop) nop();
66 } else {
67 const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31);
3151 addi(base_ptr, base_ptr, cl_size);
3152 bdnz(fastloop);
3153 if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
3154 //20:
3155 bind(small_rest);
3156 cmpdi(CCR0, cnt_dwords, 0); // size 0?
3157 beq(CCR0, done); // rest == 0
3158 li(tmp, 0);
3159 mtctr(cnt_dwords); // Load counter.
3160 //24:
3161 bind(restloop); // Clear rest.
3162 std(tmp, 0, base_ptr); // Clear 8byte aligned block.
3163 addi(base_ptr, base_ptr, 8);
3164 bdnz(restloop);
3165 //27:
3166 bind(done);
3167 }
3168
3169 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3170
3171 // Search for a single jchar in an jchar[].
3172 //
3173 // Assumes that result differs from all other registers.
3174 //
3175 // 'haystack' is the addresses of a jchar-array.
3176 // 'needle' is either the character to search for or R0.
3177 // 'needleChar' is the character to search for if 'needle' == R0..
3178 // 'haycnt' is the length of the haystack. We assume 'haycnt' >=1.
3179 //
3180 // Preserves haystack, haycnt, needle and kills all other registers.
3181 //
3182 // If needle == R0, we search for the constant needleChar.
3183 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
3184 Register needle, jchar needleChar,
3185 Register tmp1, Register tmp2) {
3186
3187 assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
3188
3189 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
3190 Register addr = tmp1,
3596 //14:
3597 if (cntval & 2) {
3598 lwzx(R0, str1_reg, index_reg);
3599 lwzx(tmp2_reg, str2_reg, index_reg);
3600 cmpw(CCR0, R0, tmp2_reg);
3601 bne(CCR0, Ldone_false);
3602 if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
3603 }
3604 if (cntval & 1) {
3605 lhzx(R0, str1_reg, index_reg);
3606 lhzx(tmp2_reg, str2_reg, index_reg);
3607 cmpw(CCR0, R0, tmp2_reg);
3608 bne(CCR0, Ldone_false);
3609 }
3610 // fallthru: true
3611 }
3612 li(result_reg, 1);
3613 bind(Ldone_false);
3614 }
3615
3616 // Helpers for Intrinsic Emitters
3617 //
3618 // Revert the byte order of a 32bit value in a register
3619 // src: 0x44556677
3620 // dst: 0x77665544
3621 // Three steps to obtain the result:
3622 // 1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word
3623 // into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared.
3624 // This value initializes dst.
3625 // 2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost
3626 // byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go.
3627 // This value is mask inserted into dst with a [0..23] mask of 1s.
3628 // 3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position.
3629 // This value is mask inserted into dst with a [8..15] mask of 1s.
3630 void MacroAssembler::load_reverse_32(Register dst, Register src) {
3631 assert_different_registers(dst, src);
3632
3633 rldicl(dst, src, (4+1)*8, 56); // Rotate byte 4 into position 7 (rightmost), clear all to the left.
3634 rlwimi(dst, src, 3*8, 0, 23); // Insert byte 5 into position 6, 7 into 4, leave pos 7 alone.
3635 rlwimi(dst, src, 1*8, 8, 15); // Insert byte 6 into position 5, leave the rest alone.
|
28 #include "compiler/disassembler.hpp"
29 #include "gc/shared/cardTableModRefBS.hpp"
30 #include "gc/shared/collectedHeap.inline.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "nativeInst_ppc.hpp"
34 #include "prims/methodHandles.hpp"
35 #include "runtime/biasedLocking.hpp"
36 #include "runtime/icache.hpp"
37 #include "runtime/interfaceSupport.hpp"
38 #include "runtime/objectMonitor.hpp"
39 #include "runtime/os.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "utilities/macros.hpp"
43 #if INCLUDE_ALL_GCS
44 #include "gc/g1/g1CollectedHeap.inline.hpp"
45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
46 #include "gc/g1/heapRegion.hpp"
47 #endif // INCLUDE_ALL_GCS
48 #ifdef COMPILER2
49 #include "opto/intrinsicnode.hpp"
50 #endif
51
52 #ifdef PRODUCT
53 #define BLOCK_COMMENT(str) // nothing
54 #else
55 #define BLOCK_COMMENT(str) block_comment(str)
56 #endif
57 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
58
59 #ifdef ASSERT
60 // On RISC, there's no benefit to verifying instruction boundaries.
61 bool AbstractAssembler::pd_check_instruction_mark() { return false; }
62 #endif
63
64 void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) {
65 assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range");
66 if (Assembler::is_simm(si31, 16)) {
67 ld(d, si31, a);
68 if (emit_filler_nop) nop();
69 } else {
70 const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31);
3154 addi(base_ptr, base_ptr, cl_size);
3155 bdnz(fastloop);
3156 if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
3157 //20:
3158 bind(small_rest);
3159 cmpdi(CCR0, cnt_dwords, 0); // size 0?
3160 beq(CCR0, done); // rest == 0
3161 li(tmp, 0);
3162 mtctr(cnt_dwords); // Load counter.
3163 //24:
3164 bind(restloop); // Clear rest.
3165 std(tmp, 0, base_ptr); // Clear 8byte aligned block.
3166 addi(base_ptr, base_ptr, 8);
3167 bdnz(restloop);
3168 //27:
3169 bind(done);
3170 }
3171
3172 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
3173
3174 #ifdef COMPILER2
3175 // Intrinsics for CompactStrings
3176
3177 // Compress char[] to byte[] by compressing 16 bytes at once.
3178 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
3179 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
3180 Label& Lfailure) {
3181
3182 const Register tmp0 = R0;
3183 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3184 Label Lloop, Lslow;
3185
3186 // Check if cnt >= 8 (= 16 bytes)
3187 lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF00FF00FF
3188 srwi_(tmp2, cnt, 3);
3189 beq(CCR0, Lslow);
3190 ori(tmp1, tmp1, 0xFF);
3191 rldimi(tmp1, tmp1, 32, 0);
3192 mtctr(tmp2);
3193
3194 // 2x unrolled loop
3195 bind(Lloop);
3196 ld(tmp2, 0, src); // _0_1_2_3 (Big Endian)
3197 ld(tmp4, 8, src); // _4_5_6_7
3198
3199 orr(tmp0, tmp2, tmp4);
3200 rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
3201 rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_3
3202 rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
3203 rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_7
3204
3205 andc_(tmp0, tmp0, tmp1);
3206 bne(CCR0, Lfailure); // Not latin1.
3207 addi(src, src, 16);
3208
3209 rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
3210 srdi(tmp2, tmp2, 3*8); // ____0_2_
3211 rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
3212 srdi(tmp4, tmp4, 3*8); // ____4_6_
3213
3214 orr(tmp2, tmp2, tmp3); // ____0123
3215 orr(tmp4, tmp4, tmp5); // ____4567
3216
3217 stw(tmp2, 0, dst);
3218 stw(tmp4, 4, dst);
3219 addi(dst, dst, 8);
3220 bdnz(Lloop);
3221
3222 bind(Lslow); // Fallback to slow version
3223 }
3224
3225 // Compress char[] to byte[]. cnt must be positive int.
3226 void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
3227 Label Lloop;
3228 mtctr(cnt);
3229
3230 bind(Lloop);
3231 lhz(tmp, 0, src);
3232 cmplwi(CCR0, tmp, 0xff);
3233 bgt(CCR0, Lfailure); // Not latin1.
3234 addi(src, src, 2);
3235 stb(tmp, 0, dst);
3236 addi(dst, dst, 1);
3237 bdnz(Lloop);
3238 }
3239
3240 // Inflate byte[] to char[] by inflating 16 bytes at once.
3241 void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
3242 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
3243 const Register tmp0 = R0;
3244 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
3245 Label Lloop, Lslow;
3246
3247 // Check if cnt >= 8
3248 srwi_(tmp2, cnt, 3);
3249 beq(CCR0, Lslow);
3250 lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF
3251 ori(tmp1, tmp1, 0xFF);
3252 mtctr(tmp2);
3253
3254 // 2x unrolled loop
3255 bind(Lloop);
3256 lwz(tmp2, 0, src); // ____0123 (Big Endian)
3257 lwz(tmp4, 4, src); // ____4567
3258 addi(src, src, 8);
3259
3260 rldicl(tmp3, tmp2, 7*8, 64-8); // _______2
3261 rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
3262 rldicl(tmp5, tmp4, 7*8, 64-8); // _______6
3263 rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
3264
3265 andc(tmp0, tmp2, tmp1); // ____0_1_
3266 rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
3267 andc(tmp3, tmp4, tmp1); // ____4_5_
3268 rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
3269
3270 rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3
3271 rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7
3272
3273 std(tmp2, 0, dst);
3274 std(tmp4, 8, dst);
3275 addi(dst, dst, 16);
3276 bdnz(Lloop);
3277
3278 bind(Lslow); // Fallback to slow version
3279 }
3280
3281 // Inflate byte[] to char[]. cnt must be positive int.
3282 void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
3283 Label Lloop;
3284 mtctr(cnt);
3285
3286 bind(Lloop);
3287 lbz(tmp, 0, src);
3288 addi(src, src, 1);
3289 sth(tmp, 0, dst);
3290 addi(dst, dst, 2);
3291 bdnz(Lloop);
3292 }
3293
3294 void MacroAssembler::string_compare(Register str1, Register str2,
3295 Register cnt1, Register cnt2,
3296 Register tmp1, Register result, int ae) {
3297 const Register tmp0 = R0,
3298 diff = tmp1;
3299
3300 assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
3301 Label Ldone, Lslow, Lloop, Lreturn_diff;
3302
3303 // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
3304 // we interchange str1 and str2 in the UL case and negate the result.
3305 // Like this, str1 is always latin1 encoded, except for the UU case.
3306 // In addition, we need 0 (or sign which is 0) extend.
3307
3308 if (ae == StrIntrinsicNode::UU) {
3309 srwi(cnt1, cnt1, 1);
3310 } else {
3311 clrldi(cnt1, cnt1, 32);
3312 }
3313
3314 if (ae != StrIntrinsicNode::LL) {
3315 srwi(cnt2, cnt2, 1);
3316 } else {
3317 clrldi(cnt2, cnt2, 32);
3318 }
3319
3320 // See if the lengths are different, and calculate min in cnt1.
3321 // Save diff in case we need it for a tie-breaker.
3322 subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
3323 // if (diff > 0) { cnt1 = cnt2; }
3324 if (VM_Version::has_isel()) {
3325 isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
3326 } else {
3327 Label Lskip;
3328 blt(CCR0, Lskip);
3329 mr(cnt1, cnt2);
3330 bind(Lskip);
3331 }
3332
3333 // Rename registers
3334 Register chr1 = result;
3335 Register chr2 = tmp0;
3336
3337 // Compare multiple characters in fast loop (only implemented for same encoding).
3338 int stride1 = 8, stride2 = 8;
3339 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3340 int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
3341 Label Lfastloop, Lskipfast;
3342
3343 srwi_(tmp0, cnt1, log2_chars_per_iter);
3344 beq(CCR0, Lskipfast);
3345 rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
3346 li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
3347 mtctr(tmp0);
3348
3349 bind(Lfastloop);
3350 ld(chr1, 0, str1);
3351 ld(chr2, 0, str2);
3352 cmpd(CCR0, chr1, chr2);
3353 bne(CCR0, Lslow);
3354 addi(str1, str1, stride1);
3355 addi(str2, str2, stride2);
3356 bdnz(Lfastloop);
3357 mr(cnt1, cnt2); // Remaining characters.
3358 bind(Lskipfast);
3359 }
3360
3361 // Loop which searches the first difference character by character.
3362 cmpwi(CCR0, cnt1, 0);
3363 beq(CCR0, Lreturn_diff);
3364 bind(Lslow);
3365 mtctr(cnt1);
3366
3367 switch (ae) {
3368 case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
3369 case StrIntrinsicNode::UL: // fallthru (see comment above)
3370 case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
3371 case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
3372 default: ShouldNotReachHere(); break;
3373 }
3374
3375 bind(Lloop);
3376 if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
3377 if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
3378 subf_(result, chr2, chr1); // result = chr1 - chr2
3379 bne(CCR0, Ldone);
3380 addi(str1, str1, stride1);
3381 addi(str2, str2, stride2);
3382 bdnz(Lloop);
3383
3384 // If strings are equal up to min length, return the length difference.
3385 bind(Lreturn_diff);
3386 mr(result, diff);
3387
3388 // Otherwise, return the difference between the first mismatched chars.
3389 bind(Ldone);
3390 if (ae == StrIntrinsicNode::UL) {
3391 neg(result, result); // Negate result (see note above).
3392 }
3393 }
3394
3395 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
3396 Register limit, Register tmp1, Register result, bool is_byte) {
3397 const Register tmp0 = R0;
3398 assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
3399 Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
3400 bool limit_needs_shift = false;
3401
3402 if (is_array_equ) {
3403 const int length_offset = arrayOopDesc::length_offset_in_bytes();
3404 const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
3405
3406 // Return true if the same array.
3407 cmpd(CCR0, ary1, ary2);
3408 beq(CCR0, Lskiploop);
3409
3410 // Return false if one of them is NULL.
3411 cmpdi(CCR0, ary1, 0);
3412 cmpdi(CCR1, ary2, 0);
3413 li(result, 0);
3414 cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
3415 beq(CCR0, Ldone);
3416
3417 // Load the lengths of arrays.
3418 lwz(limit, length_offset, ary1);
3419 lwz(tmp0, length_offset, ary2);
3420
3421 // Return false if the two arrays are not equal length.
3422 cmpw(CCR0, limit, tmp0);
3423 bne(CCR0, Ldone);
3424
3425 // Load array addresses.
3426 addi(ary1, ary1, base_offset);
3427 addi(ary2, ary2, base_offset);
3428 } else {
3429 limit_needs_shift = !is_byte;
3430 li(result, 0); // Assume not equal.
3431 }
3432
3433 // Rename registers
3434 Register chr1 = tmp0;
3435 Register chr2 = tmp1;
3436
3437 // Compare 8 bytes per iteration in fast loop.
3438 const int log2_chars_per_iter = is_byte ? 3 : 2;
3439
3440 srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
3441 beq(CCR0, Lskipfast);
3442 mtctr(tmp0);
3443
3444 bind(Lfastloop);
3445 ld(chr1, 0, ary1);
3446 ld(chr2, 0, ary2);
3447 addi(ary1, ary1, 8);
3448 addi(ary2, ary2, 8);
3449 cmpd(CCR0, chr1, chr2);
3450 bne(CCR0, Ldone);
3451 bdnz(Lfastloop);
3452
3453 bind(Lskipfast);
3454 rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
3455 beq(CCR0, Lskiploop);
3456 mtctr(limit);
3457
3458 // Character by character.
3459 bind(Lloop);
3460 if (is_byte) {
3461 lbz(chr1, 0, ary1);
3462 lbz(chr2, 0, ary2);
3463 addi(ary1, ary1, 1);
3464 addi(ary2, ary2, 1);
3465 } else {
3466 lhz(chr1, 0, ary1);
3467 lhz(chr2, 0, ary2);
3468 addi(ary1, ary1, 2);
3469 addi(ary2, ary2, 2);
3470 }
3471 cmpw(CCR0, chr1, chr2);
3472 bne(CCR0, Ldone);
3473 bdnz(Lloop);
3474
3475 bind(Lskiploop);
3476 li(result, 1); // All characters are equal.
3477 bind(Ldone);
3478 }
3479
3480 void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
3481 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
3482 Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
3483
3484 // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
3485 Label L_TooShort, L_Found, L_NotFound, L_End;
3486 Register last_addr = haycnt, // Kill haycnt at the beginning.
3487 addr = tmp1,
3488 n_start = tmp2,
3489 ch1 = tmp3,
3490 ch2 = R0;
3491
3492 assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
3493 const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
3494 const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
3495
3496 // **************************************************************************************************
3497 // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
3498 // **************************************************************************************************
3499
3500 // Compute last haystack addr to use if no match gets found.
3501 clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value.
3502 addi(addr, haystack, -h_csize); // Accesses use pre-increment.
3503 if (needlecntval == 0) { // variable needlecnt
3504 cmpwi(CCR6, needlecnt, 2);
3505 clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value.
3506 blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.
3507 }
3508
3509 if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
3510
3511 if (needlecntval == 0) { // variable needlecnt
3512 subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.
3513 addi(needlecnt, needlecnt, -2); // Rest of needle.
3514 } else { // constant needlecnt
3515 guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
3516 assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
3517 addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.
3518 if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
3519 }
3520
3521 if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
3522
3523 if (ae ==StrIntrinsicNode::UL) {
3524 srwi(tmp4, n_start, 1*8); // ___0
3525 rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
3526 }
3527
3528 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
3529
3530 // Main Loop (now we have at least 2 characters).
3531 Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
3532 bind(L_OuterLoop); // Search for 1st 2 characters.
3533 Register addr_diff = tmp4;
3534 subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
3535 addi(addr, addr, h_csize); // This is the new address we want to use for comparing.
3536 srdi_(ch2, addr_diff, h_csize);
3537 beq(CCR0, L_FinalCheck); // 2 characters left?
3538 mtctr(ch2); // num of characters / 2
3539 bind(L_InnerLoop); // Main work horse (2x unrolled search loop)
3540 if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment).
3541 lwz(ch1, 0, addr);
3542 lwz(ch2, 2, addr);
3543 } else {
3544 lhz(ch1, 0, addr);
3545 lhz(ch2, 1, addr);
3546 }
3547 cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
3548 cmpw(CCR1, ch2, n_start);
3549 beq(CCR0, L_Comp1); // Did we find the needle start?
3550 beq(CCR1, L_Comp2);
3551 addi(addr, addr, 2 * h_csize);
3552 bdnz(L_InnerLoop);
3553 bind(L_FinalCheck);
3554 andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
3555 beq(CCR0, L_NotFound);
3556 if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
3557 cmpw(CCR1, ch1, n_start);
3558 beq(CCR1, L_Comp1);
3559 bind(L_NotFound);
3560 li(result, -1); // not found
3561 b(L_End);
3562
3563 // **************************************************************************************************
3564 // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
3565 // **************************************************************************************************
3566 if (needlecntval == 0) { // We have to handle these cases separately.
3567 Label L_OneCharLoop;
3568 bind(L_TooShort);
3569 mtctr(haycnt);
3570 if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
3571 bind(L_OneCharLoop);
3572 if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
3573 cmpw(CCR1, ch1, n_start);
3574 beq(CCR1, L_Found); // Did we find the one character needle?
3575 bdnz(L_OneCharLoop);
3576 li(result, -1); // Not found.
3577 b(L_End);
3578 }
3579
3580 // **************************************************************************************************
3581 // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
3582 // **************************************************************************************************
3583
3584 // Compare the rest
3585 bind(L_Comp2);
3586 addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit.
3587 bind(L_Comp1); // Addr points to possible needle start.
3588 if (needlecntval != 2) { // Const needlecnt==2?
3589 if (needlecntval != 3) {
3590 if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
3591 Register n_ind = tmp4,
3592 h_ind = n_ind;
3593 li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2.
3594 mtctr(needlecnt); // Decremented by 2, still > 0.
3595 Label L_CompLoop;
3596 bind(L_CompLoop);
3597 if (ae ==StrIntrinsicNode::UL) {
3598 h_ind = ch1;
3599 sldi(h_ind, n_ind, 1);
3600 }
3601 if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
3602 if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
3603 cmpw(CCR1, ch1, ch2);
3604 bne(CCR1, L_OuterLoop);
3605 addi(n_ind, n_ind, n_csize);
3606 bdnz(L_CompLoop);
3607 } else { // No loop required if there's only one needle character left.
3608 if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
3609 if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
3610 cmpw(CCR1, ch1, ch2);
3611 bne(CCR1, L_OuterLoop);
3612 }
3613 }
3614 // Return index ...
3615 bind(L_Found);
3616 subf(result, haystack, addr); // relative to haystack, ...
3617 if (h_csize == 2) { srdi(result, result, 1); } // in characters.
3618 bind(L_End);
3619 } // string_indexof
3620
3621 void MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
3622 Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
3623 assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
3624
3625 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
3626 Register addr = tmp1,
3627 ch1 = tmp2,
3628 ch2 = R0;
3629
3630 const int h_csize = is_byte ? 1 : 2;
3631
3632 //4:
3633 srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).
3634 mr(addr, haystack);
3635 beq(CCR0, L_FinalCheck);
3636 mtctr(tmp2); // Move to count register.
3637 //8:
3638 bind(L_InnerLoop); // Main work horse (2x unrolled search loop).
3639 if (!is_byte) {
3640 lhz(ch1, 0, addr);
3641 lhz(ch2, 2, addr);
3642 } else {
3643 lbz(ch1, 0, addr);
3644 lbz(ch2, 1, addr);
3645 }
3646 (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
3647 (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
3648 beq(CCR0, L_Found1); // Did we find the needle?
3649 beq(CCR1, L_Found2);
3650 addi(addr, addr, 2 * h_csize);
3651 bdnz(L_InnerLoop);
3652 //16:
3653 bind(L_FinalCheck);
3654 andi_(R0, haycnt, 1);
3655 beq(CCR0, L_NotFound);
3656 if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
3657 (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
3658 beq(CCR1, L_Found1);
3659 //21:
3660 bind(L_NotFound);
3661 li(result, -1); // Not found.
3662 b(L_End);
3663
3664 bind(L_Found2);
3665 addi(addr, addr, h_csize);
3666 //24:
3667 bind(L_Found1); // Return index ...
3668 subf(result, haystack, addr); // relative to haystack, ...
3669 if (!is_byte) { srdi(result, result, 1); } // in characters.
3670 bind(L_End);
3671 } // string_indexof_char
3672
3673
3674 void MacroAssembler::has_negatives(Register src, Register cnt, Register result,
3675 Register tmp1, Register tmp2) {
3676 const Register tmp0 = R0;
3677 assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
3678 Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
3679
3680 // Check if cnt >= 8 (= 16 bytes)
3681 lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080
3682 srwi_(tmp2, cnt, 4);
3683 li(result, 1); // Assume there's a negative byte.
3684 beq(CCR0, Lslow);
3685 ori(tmp1, tmp1, 0x8080);
3686 rldimi(tmp1, tmp1, 32, 0);
3687 mtctr(tmp2);
3688
3689 // 2x unrolled loop
3690 bind(Lfastloop);
3691 ld(tmp2, 0, src);
3692 ld(tmp0, 8, src);
3693
3694 orr(tmp0, tmp2, tmp0);
3695
3696 and_(tmp0, tmp0, tmp1);
3697 bne(CCR0, Ldone); // Found negative byte.
3698 addi(src, src, 16);
3699
3700 bdnz(Lfastloop);
3701
3702 bind(Lslow); // Fallback to slow version
3703 rldicl_(tmp0, cnt, 0, 64-4);
3704 beq(CCR0, Lnoneg);
3705 mtctr(tmp0);
3706 bind(Lloop);
3707 lbz(tmp0, 0, src);
3708 addi(src, src, 1);
3709 andi_(tmp0, tmp0, 0x80);
3710 bne(CCR0, Ldone); // Found negative byte.
3711 bdnz(Lloop);
3712 bind(Lnoneg);
3713 li(result, 0);
3714
3715 bind(Ldone);
3716 }
3717
3718
3719 // Intrinsics for non-CompactStrings
3720
3721 // Search for a single jchar in an jchar[].
3722 //
3723 // Assumes that result differs from all other registers.
3724 //
3725 // 'haystack' is the addresses of a jchar-array.
3726 // 'needle' is either the character to search for or R0.
3727 // 'needleChar' is the character to search for if 'needle' == R0..
3728 // 'haycnt' is the length of the haystack. We assume 'haycnt' >=1.
3729 //
3730 // Preserves haystack, haycnt, needle and kills all other registers.
3731 //
3732 // If needle == R0, we search for the constant needleChar.
3733 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
3734 Register needle, jchar needleChar,
3735 Register tmp1, Register tmp2) {
3736
3737 assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
3738
3739 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
3740 Register addr = tmp1,
4146 //14:
4147 if (cntval & 2) {
4148 lwzx(R0, str1_reg, index_reg);
4149 lwzx(tmp2_reg, str2_reg, index_reg);
4150 cmpw(CCR0, R0, tmp2_reg);
4151 bne(CCR0, Ldone_false);
4152 if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
4153 }
4154 if (cntval & 1) {
4155 lhzx(R0, str1_reg, index_reg);
4156 lhzx(tmp2_reg, str2_reg, index_reg);
4157 cmpw(CCR0, R0, tmp2_reg);
4158 bne(CCR0, Ldone_false);
4159 }
4160 // fallthru: true
4161 }
4162 li(result_reg, 1);
4163 bind(Ldone_false);
4164 }
4165
4166 #endif // Compiler2
4167
4168 // Helpers for Intrinsic Emitters
4169 //
4170 // Revert the byte order of a 32bit value in a register
4171 // src: 0x44556677
4172 // dst: 0x77665544
4173 // Three steps to obtain the result:
4174 // 1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word
4175 // into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared.
4176 // This value initializes dst.
4177 // 2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost
4178 // byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go.
4179 // This value is mask inserted into dst with a [0..23] mask of 1s.
4180 // 3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position.
4181 // This value is mask inserted into dst with a [8..15] mask of 1s.
4182 void MacroAssembler::load_reverse_32(Register dst, Register src) {
4183 assert_different_registers(dst, src);
4184
4185 rldicl(dst, src, (4+1)*8, 56); // Rotate byte 4 into position 7 (rightmost), clear all to the left.
4186 rlwimi(dst, src, 3*8, 0, 23); // Insert byte 5 into position 6, 7 into 4, leave pos 7 alone.
4187 rlwimi(dst, src, 1*8, 8, 15); // Insert byte 6 into position 5, leave the rest alone.
|