1405 Label no_reserved_zone_enabling;
1406
1407 ld_ptr(R0, JavaThread::reserved_stack_activation_offset(), R16_thread);
1408 cmpld(CCR0, R1_SP, R0);
1409 blt_predict_taken(CCR0, no_reserved_zone_enabling);
1410
1411 // Enable reserved zone again, throw stack overflow exception.
1412 push_frame_reg_args(0, R0);
1413 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), R16_thread);
1414 pop_frame();
1415 mtlr(return_pc);
1416 load_const_optimized(R0, StubRoutines::throw_delayed_StackOverflowError_entry());
1417 mtctr(R0);
1418 bctr();
1419
1420 should_not_reach_here();
1421
1422 bind(no_reserved_zone_enabling);
1423 }
1424
1425 // CmpxchgX sets condition register to cmpX(current, compare).
1426 void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value,
1427 Register compare_value, Register exchange_value,
1428 Register addr_base, int semantics, bool cmpxchgx_hint,
1429 Register int_flag_success, bool contention_hint, bool weak) {
1430 Label retry;
1431 Label failed;
1432 Label done;
1433
1434 // Save one branch if result is returned via register and
1435 // result register is different from the other ones.
1436 bool use_result_reg = (int_flag_success != noreg);
1437 bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
1438 int_flag_success != exchange_value && int_flag_success != addr_base);
1439 assert(!weak || flag == CCR0, "weak only supported with CCR0");
1440
1441 if (use_result_reg && preset_result_reg) {
1442 li(int_flag_success, 0); // preset (assume cas failed)
1443 }
1444
1445 // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
1446 if (contention_hint) { // Don't try to reserve if cmp fails.
1447 lwz(dest_current_value, 0, addr_base);
1448 cmpw(flag, dest_current_value, compare_value);
1449 bne(flag, failed);
1450 }
1451
1452 // release/fence semantics
1453 if (semantics & MemBarRel) {
1454 release();
1455 }
1456
1457 // atomic emulation loop
1458 bind(retry);
1459
1460 lwarx(dest_current_value, addr_base, cmpxchgx_hint);
1461 cmpw(flag, dest_current_value, compare_value);
1462 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1463 bne_predict_not_taken(flag, failed);
1464 } else {
1465 bne( flag, failed);
1466 }
1467 // branch to done => (flag == ne), (dest_current_value != compare_value)
1468 // fall through => (flag == eq), (dest_current_value == compare_value)
1469
1470 stwcx_(exchange_value, addr_base);
1471 if (!weak || use_result_reg) {
1472 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1473 bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
1474 } else {
1475 bne( CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
1476 }
1477 }
1478 // fall through => (flag == eq), (dest_current_value == compare_value), (swapped)
1479
1480 // Result in register (must do this at the end because int_flag_success can be the
1481 // same register as one above).
1482 if (use_result_reg) {
1483 li(int_flag_success, 1);
1484 }
1485
1486 if (semantics & MemBarFenceAfter) {
1487 fence();
1488 } else if (semantics & MemBarAcq) {
1489 isync();
1490 }
3732 and_(tmp0, tmp0, tmp1);
3733 bne(CCR0, Ldone); // Found negative byte.
3734 addi(src, src, 16);
3735
3736 bdnz(Lfastloop);
3737
3738 bind(Lslow); // Fallback to slow version
3739 rldicl_(tmp0, cnt, 0, 64-4);
3740 beq(CCR0, Lnoneg);
3741 mtctr(tmp0);
3742 bind(Lloop);
3743 lbz(tmp0, 0, src);
3744 addi(src, src, 1);
3745 andi_(tmp0, tmp0, 0x80);
3746 bne(CCR0, Ldone); // Found negative byte.
3747 bdnz(Lloop);
3748 bind(Lnoneg);
3749 li(result, 0);
3750
3751 bind(Ldone);
3752 }
3753
3754
3755 // Intrinsics for non-CompactStrings
3756
3757 // Search for a single jchar in an jchar[].
3758 //
3759 // Assumes that result differs from all other registers.
3760 //
3761 // 'haystack' is the addresses of a jchar-array.
3762 // 'needle' is either the character to search for or R0.
3763 // 'needleChar' is the character to search for if 'needle' == R0..
3764 // 'haycnt' is the length of the haystack. We assume 'haycnt' >=1.
3765 //
3766 // Preserves haystack, haycnt, needle and kills all other registers.
3767 //
3768 // If needle == R0, we search for the constant needleChar.
3769 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
3770 Register needle, jchar needleChar,
3771 Register tmp1, Register tmp2) {
3772
3773 assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
3774
3775 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
3776 Register addr = tmp1,
3777 ch1 = tmp2,
3778 ch2 = R0;
3779
3780 //3:
3781 dcbtct(haystack, 0x00); // Indicate R/O access to haystack.
3782
3783 srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).
3784 mr(addr, haystack);
3785 beq(CCR0, L_FinalCheck);
3786 mtctr(tmp2); // Move to count register.
3787 //8:
3788 bind(L_InnerLoop); // Main work horse (2x unrolled search loop).
3789 lhz(ch1, 0, addr); // Load characters from haystack.
3790 lhz(ch2, 2, addr);
3791 (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, needleChar);
3792 (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, needleChar);
3793 beq(CCR0, L_Found1); // Did we find the needle?
3794 beq(CCR1, L_Found2);
3795 addi(addr, addr, 4);
3796 bdnz(L_InnerLoop);
3797 //16:
3798 bind(L_FinalCheck);
3799 andi_(R0, haycnt, 1);
3800 beq(CCR0, L_NotFound);
3801 lhz(ch1, 0, addr); // One position left at which we have to compare.
3802 (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, needleChar);
3803 beq(CCR1, L_Found3);
3804 //21:
3805 bind(L_NotFound);
3806 li(result, -1); // Not found.
3807 b(L_End);
3808
3809 bind(L_Found2);
3810 addi(addr, addr, 2);
3811 //24:
3812 bind(L_Found1);
3813 bind(L_Found3); // Return index ...
3814 subf(addr, haystack, addr); // relative to haystack,
3815 srdi(result, addr, 1); // in characters.
3816 bind(L_End);
3817 }
3818
3819
3820 // Implementation of IndexOf for jchar arrays.
3821 //
3822 // The length of haystack and needle are not constant, i.e. passed in a register.
3823 //
3824 // Preserves registers haystack, needle.
3825 // Kills registers haycnt, needlecnt.
3826 // Assumes that result differs from all other registers.
3827 // Haystack, needle are the addresses of jchar-arrays.
3828 // Haycnt, needlecnt are the lengths of them, respectively.
3829 //
3830 // Needlecntval must be zero or 15-bit unsigned immediate and > 1.
3831 void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
3832 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
3833 Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
3834
3835 // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
3836 Label L_TooShort, L_Found, L_NotFound, L_End;
3837 Register last_addr = haycnt, // Kill haycnt at the beginning.
3838 addr = tmp1,
3839 n_start = tmp2,
3840 ch1 = tmp3,
3841 ch2 = R0;
3842
3843 // **************************************************************************************************
3844 // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
3845 // **************************************************************************************************
3846
3847 //1 (variable) or 3 (const):
3848 dcbtct(needle, 0x00); // Indicate R/O access to str1.
3849 dcbtct(haystack, 0x00); // Indicate R/O access to str2.
3850
3851 // Compute last haystack addr to use if no match gets found.
3852 if (needlecntval == 0) { // variable needlecnt
3853 //3:
3854 subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.
3855 addi(addr, haystack, -2); // Accesses use pre-increment.
3856 cmpwi(CCR6, needlecnt, 2);
3857 blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.
3858 slwi(ch1, ch1, 1); // Scale to number of bytes.
3859 lwz(n_start, 0, needle); // Load first 2 characters of needle.
3860 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
3861 addi(needlecnt, needlecnt, -2); // Rest of needle.
3862 } else { // constant needlecnt
3863 guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
3864 assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
3865 //5:
3866 addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.
3867 lwz(n_start, 0, needle); // Load first 2 characters of needle.
3868 addi(addr, haystack, -2); // Accesses use pre-increment.
3869 slwi(ch1, ch1, 1); // Scale to number of bytes.
3870 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
3871 li(needlecnt, needlecntval-2); // Rest of needle.
3872 }
3873
3874 // Main Loop (now we have at least 3 characters).
3875 //11:
3876 Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3;
3877 bind(L_OuterLoop); // Search for 1st 2 characters.
3878 Register addr_diff = tmp4;
3879 subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
3880 addi(addr, addr, 2); // This is the new address we want to use for comparing.
3881 srdi_(ch2, addr_diff, 2);
3882 beq(CCR0, L_FinalCheck); // 2 characters left?
3883 mtctr(ch2); // addr_diff/4
3884 //16:
3885 bind(L_InnerLoop); // Main work horse (2x unrolled search loop)
3886 lwz(ch1, 0, addr); // Load 2 characters of haystack (ignore alignment).
3887 lwz(ch2, 2, addr);
3888 cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
3889 cmpw(CCR1, ch2, n_start);
3890 beq(CCR0, L_Comp1); // Did we find the needle start?
3891 beq(CCR1, L_Comp2);
3892 addi(addr, addr, 4);
3893 bdnz(L_InnerLoop);
3894 //24:
3895 bind(L_FinalCheck);
3896 rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1.
3897 beq(CCR0, L_NotFound);
3898 lwz(ch1, 0, addr); // One position left at which we have to compare.
3899 cmpw(CCR1, ch1, n_start);
3900 beq(CCR1, L_Comp3);
3901 //29:
3902 bind(L_NotFound);
3903 li(result, -1); // not found
3904 b(L_End);
3905
3906
3907 // **************************************************************************************************
3908 // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
3909 // **************************************************************************************************
3910 //31:
3911 if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size.
3912 int nopcnt = 5;
3913 if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below).
3914 if (needlecntval == 0) { // We have to handle these cases separately.
3915 Label L_OneCharLoop;
3916 bind(L_TooShort);
3917 mtctr(haycnt);
3918 lhz(n_start, 0, needle); // First character of needle
3919 bind(L_OneCharLoop);
3920 lhzu(ch1, 2, addr);
3921 cmpw(CCR1, ch1, n_start);
3922 beq(CCR1, L_Found); // Did we find the one character needle?
3923 bdnz(L_OneCharLoop);
3924 li(result, -1); // Not found.
3925 b(L_End);
3926 } // 8 instructions, so no impact on alignment.
3927 for (int x = 0; x < nopcnt; ++x) nop();
3928 }
3929
3930 // **************************************************************************************************
3931 // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
3932 // **************************************************************************************************
3933
3934 // Compare the rest
3935 //36 if needlecntval==0, else 37:
3936 bind(L_Comp2);
3937 addi(addr, addr, 2); // First comparison has failed, 2nd one hit.
3938 bind(L_Comp1); // Addr points to possible needle start.
3939 bind(L_Comp3); // Could have created a copy and use a different return address but saving code size here.
3940 if (needlecntval != 2) { // Const needlecnt==2?
3941 if (needlecntval != 3) {
3942 if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2?
3943 Register ind_reg = tmp4;
3944 li(ind_reg, 2*2); // First 2 characters are already compared, use index 2.
3945 mtctr(needlecnt); // Decremented by 2, still > 0.
3946 //40:
3947 Label L_CompLoop;
3948 bind(L_CompLoop);
3949 lhzx(ch2, needle, ind_reg);
3950 lhzx(ch1, addr, ind_reg);
3951 cmpw(CCR1, ch1, ch2);
3952 bne(CCR1, L_OuterLoop);
3953 addi(ind_reg, ind_reg, 2);
3954 bdnz(L_CompLoop);
3955 } else { // No loop required if there's only one needle character left.
3956 lhz(ch2, 2*2, needle);
3957 lhz(ch1, 2*2, addr);
3958 cmpw(CCR1, ch1, ch2);
3959 bne(CCR1, L_OuterLoop);
3960 }
3961 }
3962 // Return index ...
3963 //46:
3964 bind(L_Found);
3965 subf(addr, haystack, addr); // relative to haystack, ...
3966 srdi(result, addr, 1); // in characters.
3967 //48:
3968 bind(L_End);
3969 }
3970
3971 // Implementation of Compare for jchar arrays.
3972 //
3973 // Kills the registers str1, str2, cnt1, cnt2.
3974 // Kills cr0, ctr.
3975 // Assumes that result differes from the input registers.
3976 void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg,
3977 Register result_reg, Register tmp_reg) {
3978 assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg);
3979
3980 Label Ldone, Lslow_case, Lslow_loop, Lfast_loop;
3981 Register cnt_diff = R0,
3982 limit_reg = cnt1_reg,
3983 chr1_reg = result_reg,
3984 chr2_reg = cnt2_reg,
3985 addr_diff = str2_reg;
3986
3987 // 'cnt_reg' contains the number of characters in the string's character array for the
3988 // pre-CompactStrings strings implementation and the number of bytes in the string's
3989 // byte array for the CompactStrings strings implementation.
3990 const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array
3991
3992 // Offset 0 should be 32 byte aligned.
3993 //-6:
3994 srawi(cnt1_reg, cnt1_reg, HAS_COMPACT_STRING);
3995 srawi(cnt2_reg, cnt2_reg, HAS_COMPACT_STRING);
3996 //-4:
3997 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1.
3998 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2.
3999 //-2:
4000 // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters).
4001 subf(result_reg, cnt2_reg, cnt1_reg); // difference between cnt1/2
4002 subf_(addr_diff, str1_reg, str2_reg); // alias?
4003 beq(CCR0, Ldone); // return cnt difference if both ones are identical
4004 srawi(limit_reg, result_reg, 31); // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow)
4005 mr(cnt_diff, result_reg);
4006 andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt1<cnt2 ? cnt1-cnt2 : 0
4007 add_(limit_reg, cnt2_reg, limit_reg); // min(cnt1, cnt2)==0?
4008 beq(CCR0, Ldone); // return cnt difference if one has 0 length
4009
4010 lhz(chr1_reg, 0, str1_reg); // optional: early out if first characters mismatch
4011 lhzx(chr2_reg, str1_reg, addr_diff); // optional: early out if first characters mismatch
4012 addi(tmp_reg, limit_reg, -1); // min(cnt1, cnt2)-1
4013 subf_(result_reg, chr2_reg, chr1_reg); // optional: early out if first characters mismatch
4014 bne(CCR0, Ldone); // optional: early out if first characters mismatch
4015
4016 // Set loop counter by scaling down tmp_reg
4017 srawi_(chr2_reg, tmp_reg, exact_log2(4)); // (min(cnt1, cnt2)-1)/4
4018 ble(CCR0, Lslow_case); // need >4 characters for fast loop
4019 andi(limit_reg, tmp_reg, 4-1); // remaining characters
4020
4021 // Adapt str1_reg str2_reg for the first loop iteration
4022 mtctr(chr2_reg); // (min(cnt1, cnt2)-1)/4
4023 addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop
4024 //16:
4025 // Compare the rest of the characters
4026 bind(Lfast_loop);
4027 ld(chr1_reg, 0, str1_reg);
4028 ldx(chr2_reg, str1_reg, addr_diff);
4029 cmpd(CCR0, chr2_reg, chr1_reg);
4030 bne(CCR0, Lslow_case); // return chr1_reg
4031 addi(str1_reg, str1_reg, 4*2);
4032 bdnz(Lfast_loop);
4033 addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing
4034 //23:
4035 bind(Lslow_case);
4036 mtctr(limit_reg);
4037 //24:
4038 bind(Lslow_loop);
4039 lhz(chr1_reg, 0, str1_reg);
4040 lhzx(chr2_reg, str1_reg, addr_diff);
4041 subf_(result_reg, chr2_reg, chr1_reg);
4042 bne(CCR0, Ldone); // return chr1_reg
4043 addi(str1_reg, str1_reg, 1*2);
4044 bdnz(Lslow_loop);
4045 //30:
4046 // If strings are equal up to min length, return the length difference.
4047 mr(result_reg, cnt_diff);
4048 nop(); // alignment
4049 //32:
4050 // Otherwise, return the difference between the first mismatched chars.
4051 bind(Ldone);
4052 }
4053
4054
4055 // Compare char[] arrays.
4056 //
4057 // str1_reg USE only
4058 // str2_reg USE only
4059 // cnt_reg USE_DEF, due to tmp reg shortage
4060 // result_reg DEF only, might compromise USE only registers
4061 void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg,
4062 Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg,
4063 Register tmp5_reg) {
4064
4065 // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
4066 assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
4067 assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
4068
4069 // Offset 0 should be 32 byte aligned.
4070 Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false;
4071 Register index_reg = tmp5_reg;
4072 Register cbc_iter = tmp4_reg;
4073
4074 // 'cnt_reg' contains the number of characters in the string's character array for the
4075 // pre-CompactStrings strings implementation and the number of bytes in the string's
4076 // byte array for the CompactStrings strings implementation.
4077 const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array
4078
4079 //-1:
4080 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1.
4081 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2.
4082 //1:
4083 // cbc_iter: remaining characters after the '4 java characters per iteration' loop.
4084 rlwinm(cbc_iter, cnt_reg, 32 - HAS_COMPACT_STRING, 30, 31); // (cnt_reg % (HAS_COMPACT_STRING ? 8 : 4)) >> HAS_COMPACT_STRING
4085 li(index_reg, 0); // init
4086 li(result_reg, 0); // assume false
4087 // tmp2_reg: units of 4 java characters (i.e. 8 bytes) per iteration (main loop).
4088 srwi_(tmp2_reg, cnt_reg, exact_log2(4 << HAS_COMPACT_STRING)); // cnt_reg / (HAS_COMPACT_STRING ? 8 : 4)
4089
4090 cmpwi(CCR1, cbc_iter, 0); // CCR1 = (cbc_iter==0)
4091 beq(CCR0, Linit_cbc); // too short
4092 mtctr(tmp2_reg);
4093 //8:
4094 bind(Lloop);
4095 ldx(tmp1_reg, str1_reg, index_reg);
4096 ldx(tmp2_reg, str2_reg, index_reg);
4097 cmpd(CCR0, tmp1_reg, tmp2_reg);
4098 bne(CCR0, Ldone_false); // Unequal char pair found -> done.
4099 addi(index_reg, index_reg, 4*sizeof(jchar));
4100 bdnz(Lloop);
4101 //14:
4102 bind(Linit_cbc);
4103 beq(CCR1, Ldone_true);
4104 mtctr(cbc_iter);
4105 //16:
4106 bind(Lcbc);
4107 lhzx(tmp1_reg, str1_reg, index_reg);
4108 lhzx(tmp2_reg, str2_reg, index_reg);
4109 cmpw(CCR0, tmp1_reg, tmp2_reg);
4110 bne(CCR0, Ldone_false); // Unequal char pair found -> done.
4111 addi(index_reg, index_reg, 1*sizeof(jchar));
4112 bdnz(Lcbc);
4113 nop();
4114 bind(Ldone_true);
4115 li(result_reg, 1);
4116 //24:
4117 bind(Ldone_false);
4118 }
4119
4120
4121 void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
4122 Register tmp1_reg, Register tmp2_reg) {
4123 // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
4124 assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg);
4125 assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg);
4126 assert(sizeof(jchar) == 2, "must be");
4127 assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate");
4128
4129 // 'cntval' contains the number of characters in the string's character array for the
4130 // pre-CompactStrings strings implementation and the number of bytes in the string's
4131 // byte array for the CompactStrings strings implementation.
4132 cntval >>= (java_lang_String::has_coder_field() ? 1 : 0); // '1' = byte array strings, '0' = char array strings
4133
4134 Label Ldone_false;
4135
4136 if (cntval < 16) { // short case
4137 if (cntval != 0) li(result_reg, 0); // assume false
4138
4139 const int num_bytes = cntval*sizeof(jchar);
4140 int index = 0;
4141 for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) {
4142 ld(tmp1_reg, index, str1_reg);
4143 ld(tmp2_reg, index, str2_reg);
4144 cmpd(CCR0, tmp1_reg, tmp2_reg);
4145 bne(CCR0, Ldone_false);
4146 }
4147 if (cntval & 2) {
4148 lwz(tmp1_reg, index, str1_reg);
4149 lwz(tmp2_reg, index, str2_reg);
4150 cmpw(CCR0, tmp1_reg, tmp2_reg);
4151 bne(CCR0, Ldone_false);
4152 index += 4;
4153 }
4154 if (cntval & 1) {
4155 lhz(tmp1_reg, index, str1_reg);
4156 lhz(tmp2_reg, index, str2_reg);
4157 cmpw(CCR0, tmp1_reg, tmp2_reg);
4158 bne(CCR0, Ldone_false);
4159 }
4160 // fallthrough: true
4161 } else {
4162 Label Lloop;
4163 Register index_reg = tmp1_reg;
4164 const int loopcnt = cntval/4;
4165 assert(loopcnt > 0, "must be");
4166 // Offset 0 should be 32 byte aligned.
4167 //2:
4168 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1.
4169 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2.
4170 li(tmp2_reg, loopcnt);
4171 li(index_reg, 0); // init
4172 li(result_reg, 0); // assume false
4173 mtctr(tmp2_reg);
4174 //8:
4175 bind(Lloop);
4176 ldx(R0, str1_reg, index_reg);
4177 ldx(tmp2_reg, str2_reg, index_reg);
4178 cmpd(CCR0, R0, tmp2_reg);
4179 bne(CCR0, Ldone_false); // Unequal char pair found -> done.
4180 addi(index_reg, index_reg, 4*sizeof(jchar));
4181 bdnz(Lloop);
4182 //14:
4183 if (cntval & 2) {
4184 lwzx(R0, str1_reg, index_reg);
4185 lwzx(tmp2_reg, str2_reg, index_reg);
4186 cmpw(CCR0, R0, tmp2_reg);
4187 bne(CCR0, Ldone_false);
4188 if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
4189 }
4190 if (cntval & 1) {
4191 lhzx(R0, str1_reg, index_reg);
4192 lhzx(tmp2_reg, str2_reg, index_reg);
4193 cmpw(CCR0, R0, tmp2_reg);
4194 bne(CCR0, Ldone_false);
4195 }
4196 // fallthru: true
4197 }
4198 li(result_reg, 1);
4199 bind(Ldone_false);
4200 }
4201
4202 #endif // Compiler2
4203
4204 // Helpers for Intrinsic Emitters
4205 //
4206 // Revert the byte order of a 32bit value in a register
4207 // src: 0x44556677
4208 // dst: 0x77665544
4209 // Three steps to obtain the result:
4210 // 1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word
4211 // into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared.
4212 // This value initializes dst.
4213 // 2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost
4214 // byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go.
4215 // This value is mask inserted into dst with a [0..23] mask of 1s.
4216 // 3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position.
4217 // This value is mask inserted into dst with a [8..15] mask of 1s.
4218 void MacroAssembler::load_reverse_32(Register dst, Register src) {
4219 assert_different_registers(dst, src);
|
1405 Label no_reserved_zone_enabling;
1406
1407 ld_ptr(R0, JavaThread::reserved_stack_activation_offset(), R16_thread);
1408 cmpld(CCR0, R1_SP, R0);
1409 blt_predict_taken(CCR0, no_reserved_zone_enabling);
1410
1411 // Enable reserved zone again, throw stack overflow exception.
1412 push_frame_reg_args(0, R0);
1413 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), R16_thread);
1414 pop_frame();
1415 mtlr(return_pc);
1416 load_const_optimized(R0, StubRoutines::throw_delayed_StackOverflowError_entry());
1417 mtctr(R0);
1418 bctr();
1419
1420 should_not_reach_here();
1421
1422 bind(no_reserved_zone_enabling);
1423 }
1424
1425 void MacroAssembler::getandsetd(Register dest_current_value, Register exchange_value, Register addr_base,
1426 bool cmpxchgx_hint) {
1427 Label retry;
1428 bind(retry);
1429 ldarx(dest_current_value, addr_base, cmpxchgx_hint);
1430 stdcx_(exchange_value, addr_base);
1431 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1432 bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
1433 } else {
1434 bne( CCR0, retry); // StXcx_ sets CCR0.
1435 }
1436 }
1437
1438 void MacroAssembler::getandaddd(Register dest_current_value, Register inc_value, Register addr_base,
1439 Register tmp, bool cmpxchgx_hint) {
1440 Label retry;
1441 bind(retry);
1442 ldarx(dest_current_value, addr_base, cmpxchgx_hint);
1443 add(tmp, dest_current_value, inc_value);
1444 stdcx_(tmp, addr_base);
1445 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1446 bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
1447 } else {
1448 bne( CCR0, retry); // StXcx_ sets CCR0.
1449 }
1450 }
1451
1452 // Word/sub-word atomic helper functions
1453
1454 // Temps and addr_base are killed if size < 4 and processor does not support respective instructions.
1455 // Atomic add always kills tmp1.
1456 void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value,
1457 Register addr_base, Register tmp1, Register tmp2, Register tmp3,
1458 bool cmpxchgx_hint, bool is_add, int size) {
1459 int instruction_type = VM_Version::has_lqarx() ? size : 4; // Sub-word instructions available since Power 8.
1460
1461 Label retry;
1462 Register shift_amount = noreg,
1463 val32 = dest_current_value,
1464 modval = is_add ? tmp1 : exchange_value;
1465
1466 if (instruction_type != size) {
1467 assert_different_registers(tmp1, tmp2, tmp3, dest_current_value, exchange_value, addr_base);
1468 modval = tmp1;
1469 shift_amount = tmp2;
1470 val32 = tmp3;
1471 // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned.
1472 #ifdef VM_LITTLE_ENDIAN
1473 rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8;
1474 clrrdi(addr_base, addr_base, 2);
1475 #else
1476 xori(shift_amount, addr_base, (size == 1) ? 3 : 2);
1477 clrrdi(addr_base, addr_base, 2);
1478 rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16;
1479 #endif
1480 }
1481
1482 // atomic emulation loop
1483 bind(retry);
1484
1485 switch (instruction_type) {
1486 case 4: lwarx(val32, addr_base, cmpxchgx_hint); break;
1487 case 2: lharx(val32, addr_base, cmpxchgx_hint); break;
1488 case 1: lbarx(val32, addr_base, cmpxchgx_hint); break;
1489 default: ShouldNotReachHere();
1490 }
1491
1492 if (instruction_type != size) {
1493 srw(dest_current_value, val32, shift_amount);
1494 }
1495
1496 if (is_add) { add(modval, dest_current_value, exchange_value); }
1497
1498 if (instruction_type != size) {
1499 // Transform exchange value such that the replacement can be done by one xor instruction
1500 xorr(modval, dest_current_value, is_add ? modval : exchange_value);
1501 clrldi(modval, modval, (size == 1) ? 56 : 48);
1502 slw(modval, modval, shift_amount);
1503 xorr(modval, val32, modval);
1504 }
1505
1506 switch (instruction_type) {
1507 case 4: stwcx_(modval, addr_base); break;
1508 case 2: sthcx_(modval, addr_base); break;
1509 case 1: stbcx_(modval, addr_base); break;
1510 default: ShouldNotReachHere();
1511 }
1512
1513 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1514 bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
1515 } else {
1516 bne( CCR0, retry); // StXcx_ sets CCR0.
1517 }
1518
1519 if (size == 1) {
1520 extsb(dest_current_value, dest_current_value);
1521 } else if (size == 2) {
1522 extsh(dest_current_value, dest_current_value);
1523 };
1524 }
1525
1526 // Temps, addr_base and exchange_value are killed if size < 4 and processor does not support respective instructions.
1527 void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value,
1528 Register compare_value, Register exchange_value,
1529 Register addr_base, Register tmp1, Register tmp2,
1530 Label &retry, Label &failed, bool cmpxchgx_hint, int size) {
1531 int instruction_type = VM_Version::has_lqarx() ? size : 4; // Sub-word instructions available since Power 8.
1532
1533 Register shift_amount = noreg,
1534 val32 = dest_current_value,
1535 modval = exchange_value;
1536
1537 if (instruction_type != size) {
1538 assert_different_registers(tmp1, tmp2, dest_current_value, compare_value, exchange_value, addr_base);
1539 shift_amount = tmp1;
1540 val32 = tmp2;
1541 modval = tmp2;
1542 // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned.
1543 #ifdef VM_LITTLE_ENDIAN
1544 rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8;
1545 clrrdi(addr_base, addr_base, 2);
1546 #else
1547 xori(shift_amount, addr_base, (size == 1) ? 3 : 2);
1548 clrrdi(addr_base, addr_base, 2);
1549 rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16;
1550 #endif
1551 // Transform exchange value such that the replacement can be done by one xor instruction.
1552 xorr(exchange_value, compare_value, exchange_value);
1553 clrldi(exchange_value, exchange_value, (size == 1) ? 56 : 48);
1554 slw(exchange_value, exchange_value, shift_amount);
1555 }
1556
1557 // atomic emulation loop
1558 bind(retry);
1559
1560 switch (instruction_type) {
1561 case 4: lwarx(val32, addr_base, cmpxchgx_hint); break;
1562 case 2: lharx(val32, addr_base, cmpxchgx_hint); break;
1563 case 1: lbarx(val32, addr_base, cmpxchgx_hint); break;
1564 default: ShouldNotReachHere();
1565 }
1566
1567 if (instruction_type != size) {
1568 srw(dest_current_value, val32, shift_amount);
1569 }
1570 if (size == 1) {
1571 extsb(dest_current_value, dest_current_value);
1572 } else if (size == 2) {
1573 extsh(dest_current_value, dest_current_value);
1574 };
1575
1576 cmpw(flag, dest_current_value, compare_value);
1577 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1578 bne_predict_not_taken(flag, failed);
1579 } else {
1580 bne( flag, failed);
1581 }
1582 // branch to done => (flag == ne), (dest_current_value != compare_value)
1583 // fall through => (flag == eq), (dest_current_value == compare_value)
1584
1585 if (instruction_type != size) {
1586 xorr(modval, val32, exchange_value);
1587 }
1588
1589 switch (instruction_type) {
1590 case 4: stwcx_(modval, addr_base); break;
1591 case 2: sthcx_(modval, addr_base); break;
1592 case 1: stbcx_(modval, addr_base); break;
1593 default: ShouldNotReachHere();
1594 }
1595 }
1596
1597 // CmpxchgX sets condition register to cmpX(current, compare).
1598 void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_current_value,
1599 Register compare_value, Register exchange_value,
1600 Register addr_base, Register tmp1, Register tmp2,
1601 int semantics, bool cmpxchgx_hint,
1602 Register int_flag_success, bool contention_hint, bool weak, int size) {
1603 Label retry;
1604 Label failed;
1605 Label done;
1606
1607 // Save one branch if result is returned via register and
1608 // result register is different from the other ones.
1609 bool use_result_reg = (int_flag_success != noreg);
1610 bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
1611 int_flag_success != exchange_value && int_flag_success != addr_base &&
1612 int_flag_success != tmp1 && int_flag_success != tmp2);
1613 assert(!weak || flag == CCR0, "weak only supported with CCR0");
1614 assert(size == 1 || size == 2 || size == 4, "unsupported");
1615
1616 if (use_result_reg && preset_result_reg) {
1617 li(int_flag_success, 0); // preset (assume cas failed)
1618 }
1619
1620 // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
1621 if (contention_hint) { // Don't try to reserve if cmp fails.
1622 switch (size) {
1623 case 1: lbz(dest_current_value, 0, addr_base); extsb(dest_current_value, dest_current_value); break;
1624 case 2: lha(dest_current_value, 0, addr_base); break;
1625 case 4: lwz(dest_current_value, 0, addr_base); break;
1626 default: ShouldNotReachHere();
1627 }
1628 cmpw(flag, dest_current_value, compare_value);
1629 bne(flag, failed);
1630 }
1631
1632 // release/fence semantics
1633 if (semantics & MemBarRel) {
1634 release();
1635 }
1636
1637 cmpxchg_loop_body(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
1638 retry, failed, cmpxchgx_hint, size);
1639 if (!weak || use_result_reg) {
1640 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1641 bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
1642 } else {
1643 bne( CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
1644 }
1645 }
1646 // fall through => (flag == eq), (dest_current_value == compare_value), (swapped)
1647
1648 // Result in register (must do this at the end because int_flag_success can be the
1649 // same register as one above).
1650 if (use_result_reg) {
1651 li(int_flag_success, 1);
1652 }
1653
1654 if (semantics & MemBarFenceAfter) {
1655 fence();
1656 } else if (semantics & MemBarAcq) {
1657 isync();
1658 }
3900 and_(tmp0, tmp0, tmp1);
3901 bne(CCR0, Ldone); // Found negative byte.
3902 addi(src, src, 16);
3903
3904 bdnz(Lfastloop);
3905
3906 bind(Lslow); // Fallback to slow version
3907 rldicl_(tmp0, cnt, 0, 64-4);
3908 beq(CCR0, Lnoneg);
3909 mtctr(tmp0);
3910 bind(Lloop);
3911 lbz(tmp0, 0, src);
3912 addi(src, src, 1);
3913 andi_(tmp0, tmp0, 0x80);
3914 bne(CCR0, Ldone); // Found negative byte.
3915 bdnz(Lloop);
3916 bind(Lnoneg);
3917 li(result, 0);
3918
3919 bind(Ldone);
3920 }
3921
3922 #endif // Compiler2
3923
3924 // Helpers for Intrinsic Emitters
3925 //
3926 // Revert the byte order of a 32bit value in a register
3927 // src: 0x44556677
3928 // dst: 0x77665544
3929 // Three steps to obtain the result:
3930 // 1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word
3931 // into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared.
3932 // This value initializes dst.
3933 // 2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost
3934 // byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go.
3935 // This value is mask inserted into dst with a [0..23] mask of 1s.
3936 // 3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position.
3937 // This value is mask inserted into dst with a [8..15] mask of 1s.
3938 void MacroAssembler::load_reverse_32(Register dst, Register src) {
3939 assert_different_registers(dst, src);
|