< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page




4009     __ generate_dsin_dcos(isCos, (address)StubRoutines::aarch64::_npio2_hw,
4010         (address)StubRoutines::aarch64::_two_over_pi,
4011         (address)StubRoutines::aarch64::_pio2,
4012         (address)StubRoutines::aarch64::_dsin_coef,
4013         (address)StubRoutines::aarch64::_dcos_coef);
4014     return start;
4015   }
4016 
4017   address generate_dlog() {
4018     __ align(CodeEntryAlignment);
4019     StubCodeMark mark(this, "StubRoutines", "dlog");
4020     address entry = __ pc();
4021     FloatRegister vtmp0 = v0, vtmp1 = v1, vtmp2 = v2, vtmp3 = v3, vtmp4 = v4,
4022         vtmp5 = v5, tmpC1 = v16, tmpC2 = v17, tmpC3 = v18, tmpC4 = v19;
4023     Register tmp1 = r0, tmp2 = r1, tmp3 = r2, tmp4 = r3, tmp5 = r4;
4024     __ fast_log(vtmp0, vtmp1, vtmp2, vtmp3, vtmp4, vtmp5, tmpC1, tmpC2, tmpC3,
4025         tmpC4, tmp1, tmp2, tmp3, tmp4, tmp5);
4026     return entry;
4027   }
4028 
4029   // code for comparing 16 bytes of strings with same encoding

















4030   void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
4031     Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11;
4032     __ ldr(rscratch1, Address(__ post(str1, 8)));
4033     __ eor(rscratch2, tmp1, tmp2);
4034     __ ldr(cnt1, Address(__ post(str2, 8)));
4035     __ cbnz(rscratch2, DIFF1);
4036     __ ldr(tmp1, Address(__ post(str1, 8)));
4037     __ eor(rscratch2, rscratch1, cnt1);
4038     __ ldr(tmp2, Address(__ post(str2, 8)));
4039     __ cbnz(rscratch2, DIFF2);
4040   }
4041 
4042   // code for comparing 16 characters of strings with Latin1 and Utf16 encoding



































4043   void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
4044       Label &DIFF2) {
4045     Register cnt1 = r2, tmp1 = r10, tmp2 = r11, tmp3 = r12;
4046     FloatRegister vtmp = v1, vtmpZ = v0, vtmp3 = v2;
4047 
4048     __ ldrq(vtmp, Address(__ post(tmp2, 16)));
4049     __ ldr(tmpU, Address(__ post(cnt1, 8)));
4050     __ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
4051     // now we have 32 bytes of characters (converted to U) in vtmp:vtmp3
4052 
4053     __ fmovd(tmpL, vtmp3);
4054     __ eor(rscratch2, tmp3, tmpL);
4055     __ cbnz(rscratch2, DIFF2);
4056 
4057     __ ldr(tmp3, Address(__ post(cnt1, 8)));
4058     __ umov(tmpL, vtmp3, __ D, 1);
4059     __ eor(rscratch2, tmpU, tmpL);
4060     __ cbnz(rscratch2, DIFF1);
4061 
4062     __ zip2(vtmp, __ T16B, vtmp, vtmpZ);
4063     __ ldr(tmpU, Address(__ post(cnt1, 8)));
4064     __ fmovd(tmpL, vtmp);
4065     __ eor(rscratch2, tmp3, tmpL);
4066     __ cbnz(rscratch2, DIFF2);
4067 
4068     __ ldr(tmp3, Address(__ post(cnt1, 8)));
4069     __ umov(tmpL, vtmp, __ D, 1);
4070     __ eor(rscratch2, tmpU, tmpL);
4071     __ cbnz(rscratch2, DIFF1);
4072   }
4073 
4074   // r0  = result
4075   // r1  = str1
4076   // r2  = cnt1
4077   // r3  = str2
4078   // r4  = cnt2
4079   // r10 = tmp1
4080   // r11 = tmp2


































































































































































































4081   address generate_compare_long_string_different_encoding(bool isLU) {
4082     __ align(CodeEntryAlignment);
4083     StubCodeMark mark(this, "StubRoutines", isLU
4084         ? "compare_long_string_different_encoding LU"
4085         : "compare_long_string_different_encoding UL");
4086     address entry = __ pc();
4087     Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
4088         DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, NO_PREFETCH,
4089         LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
4090     Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
4091         tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14;
4092     FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
4093     RegSet spilled_regs = RegSet::of(tmp3, tmp4);
4094 
4095     int prefetchLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance/2);
4096 
4097     __ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
4098     // cnt2 == amount of characters left to compare
4099     // Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL))
4100     __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
4101     __ add(str1, str1, isLU ? wordSize/2 : wordSize);
4102     __ add(str2, str2, isLU ? wordSize : wordSize/2);
4103     __ fmovd(isLU ? tmp1 : tmp2, vtmp);
4104     __ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
4105     __ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1);
4106     __ eor(rscratch2, tmp1, tmp2);
4107     __ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0);
4108     __ mov(rscratch1, tmp2);
4109     __ cbnz(rscratch2, CALCULATE_DIFFERENCE);
4110     Register strU = isLU ? str2 : str1,
4111              strL = isLU ? str1 : str2,
4112              tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison
4113              tmpL = isLU ? tmp1 : rscratch1; // where to keep L for comparison


4114     __ push(spilled_regs, sp);
4115     __ sub(tmp2, strL, cnt2); // strL pointer to load from
4116     __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
4117 
4118     __ ldr(tmp3, Address(__ post(cnt1, 8)));

4119 
4120     if (SoftwarePrefetchHintDistance >= 0) {
4121       __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
4122       __ br(__ LT, NO_PREFETCH);
4123       __ bind(LARGE_LOOP_PREFETCH);
4124         __ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
4125         __ mov(tmp4, 2);
4126         __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
4127         __ bind(LARGE_LOOP_PREFETCH_REPEAT1);
4128           compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4129           __ subs(tmp4, tmp4, 1);
4130           __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1);
4131           __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
4132           __ mov(tmp4, 2);
4133         __ bind(LARGE_LOOP_PREFETCH_REPEAT2);
4134           compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4135           __ subs(tmp4, tmp4, 1);
4136           __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
4137           __ sub(cnt2, cnt2, 64);
4138           __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
4139           __ br(__ GE, LARGE_LOOP_PREFETCH);
4140     }
4141     __ cbz(cnt2, LOAD_LAST); // no characters left except last load
4142     __ bind(NO_PREFETCH);



4143     __ subs(cnt2, cnt2, 16);
4144     __ br(__ LT, TAIL);
4145     __ bind(SMALL_LOOP); // smaller loop
4146       __ subs(cnt2, cnt2, 16);
4147       compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4148       __ br(__ GE, SMALL_LOOP);
4149       __ cmn(cnt2, (u1)16);
4150       __ br(__ EQ, LOAD_LAST);
4151     __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
4152       __ add(cnt1, cnt1, cnt2, __ LSL, 1); // Address of 8 bytes before last 4 characters in UTF-16 string
4153       __ add(tmp2, tmp2, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string
4154       __ ldr(tmp3, Address(cnt1, -8));
4155       compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // last 16 characters before last load
4156       __ b(LOAD_LAST);
4157     __ bind(DIFF2);
4158       __ mov(tmpU, tmp3);
4159     __ bind(DIFF1);
4160       __ pop(spilled_regs, sp);
4161       __ b(CALCULATE_DIFFERENCE);
4162     __ bind(LOAD_LAST);
4163       // Last 4 UTF-16 characters are already pre-loaded into tmp3 by compare_string_16_x_LU.
4164       // No need to load it again
4165       __ mov(tmpU, tmp3);
4166       __ pop(spilled_regs, sp);
4167 
4168       __ ldrs(vtmp, Address(strL));
4169       __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
4170       __ fmovd(tmpL, vtmp);
4171 
4172       __ eor(rscratch2, tmpU, tmpL);
4173       __ cbz(rscratch2, DONE);
4174 
4175     // Find the first different characters in the longwords and
4176     // compute their difference.
4177     __ bind(CALCULATE_DIFFERENCE);
4178       __ rev(rscratch2, rscratch2);
4179       __ clz(rscratch2, rscratch2);
4180       __ andr(rscratch2, rscratch2, -16);
4181       __ lsrv(tmp1, tmp1, rscratch2);
4182       __ uxthw(tmp1, tmp1);
4183       __ lsrv(rscratch1, rscratch1, rscratch2);
4184       __ uxthw(rscratch1, rscratch1);
4185       __ subw(result, tmp1, rscratch1);
4186     __ bind(DONE);
4187       __ ret(lr);
4188     return entry;
4189   }
4190 
4191   // r0  = result
4192   // r1  = str1
4193   // r2  = cnt1
4194   // r3  = str2
4195   // r4  = cnt2
4196   // r10 = tmp1
4197   // r11 = tmp2


























































































































































4198   address generate_compare_long_string_same_encoding(bool isLL) {



4199     __ align(CodeEntryAlignment);
4200     StubCodeMark mark(this, "StubRoutines", isLL
4201         ? "compare_long_string_same_encoding LL"
4202         : "compare_long_string_same_encoding UU");
4203     address entry = __ pc();
4204     Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
4205         tmp1 = r10, tmp2 = r11;
4206     Label SMALL_LOOP, LARGE_LOOP_PREFETCH, CHECK_LAST, DIFF2, TAIL,
4207         LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF,
4208         DIFF_LAST_POSITION, DIFF_LAST_POSITION2;
4209     // exit from large loop when less than 64 bytes left to read or we're about
4210     // to prefetch memory behind array border
4211     int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
4212     // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
4213     // update cnt2 counter with already loaded 8 bytes
4214     __ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
4215     // update pointers, because of previous read
4216     __ add(str1, str1, wordSize);
4217     __ add(str2, str2, wordSize);
4218     if (SoftwarePrefetchHintDistance >= 0) {
4219       __ bind(LARGE_LOOP_PREFETCH);
4220         __ prfm(Address(str1, SoftwarePrefetchHintDistance));
4221         __ prfm(Address(str2, SoftwarePrefetchHintDistance));
4222         compare_string_16_bytes_same(DIFF, DIFF2);
4223         compare_string_16_bytes_same(DIFF, DIFF2);
4224         __ sub(cnt2, cnt2, isLL ? 64 : 32);
4225         compare_string_16_bytes_same(DIFF, DIFF2);
4226         __ subs(rscratch2, cnt2, largeLoopExitCondition);
4227         compare_string_16_bytes_same(DIFF, DIFF2);
4228         __ br(__ GT, LARGE_LOOP_PREFETCH);
4229         __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
4230     }
4231     // less than 16 bytes left?
4232     __ subs(cnt2, cnt2, isLL ? 16 : 8);
4233     __ br(__ LT, TAIL);
4234     __ bind(SMALL_LOOP);
4235       compare_string_16_bytes_same(DIFF, DIFF2);
4236       __ subs(cnt2, cnt2, isLL ? 16 : 8);
4237       __ br(__ GE, SMALL_LOOP);


4238     __ bind(TAIL);
4239       __ adds(cnt2, cnt2, isLL ? 16 : 8);
4240       __ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF);
4241       __ subs(cnt2, cnt2, isLL ? 8 : 4);
4242       __ br(__ LE, CHECK_LAST);
4243       __ eor(rscratch2, tmp1, tmp2);
4244       __ cbnz(rscratch2, DIFF);
4245       __ ldr(tmp1, Address(__ post(str1, 8)));
4246       __ ldr(tmp2, Address(__ post(str2, 8)));
4247       __ sub(cnt2, cnt2, isLL ? 8 : 4);
4248     __ bind(CHECK_LAST);
4249       if (!isLL) {
4250         __ add(cnt2, cnt2, cnt2); // now in bytes
4251       }
4252       __ eor(rscratch2, tmp1, tmp2);
4253       __ cbnz(rscratch2, DIFF);
4254       __ ldr(rscratch1, Address(str1, cnt2));
4255       __ ldr(cnt1, Address(str2, cnt2));
4256       __ eor(rscratch2, rscratch1, cnt1);
4257       __ cbz(rscratch2, LENGTH_DIFF);
4258       // Find the first different characters in the longwords and
4259       // compute their difference.
4260     __ bind(DIFF2);
4261       __ rev(rscratch2, rscratch2);
4262       __ clz(rscratch2, rscratch2);
4263       __ andr(rscratch2, rscratch2, isLL ? -8 : -16);
4264       __ lsrv(rscratch1, rscratch1, rscratch2);
4265       if (isLL) {
4266         __ lsrv(cnt1, cnt1, rscratch2);
4267         __ uxtbw(rscratch1, rscratch1);
4268         __ uxtbw(cnt1, cnt1);
4269       } else {
4270         __ lsrv(cnt1, cnt1, rscratch2);
4271         __ uxthw(rscratch1, rscratch1);
4272         __ uxthw(cnt1, cnt1);
4273       }
4274       __ subw(result, rscratch1, cnt1);
4275       __ b(LENGTH_DIFF);
4276     __ bind(DIFF);
4277       __ rev(rscratch2, rscratch2);
4278       __ clz(rscratch2, rscratch2);
4279       __ andr(rscratch2, rscratch2, isLL ? -8 : -16);
4280       __ lsrv(tmp1, tmp1, rscratch2);
4281       if (isLL) {
4282         __ lsrv(tmp2, tmp2, rscratch2);
4283         __ uxtbw(tmp1, tmp1);
4284         __ uxtbw(tmp2, tmp2);
4285       } else {
4286         __ lsrv(tmp2, tmp2, rscratch2);
4287         __ uxthw(tmp1, tmp1);
4288         __ uxthw(tmp2, tmp2);
4289       }
4290       __ subw(result, tmp1, tmp2);
4291       __ b(LENGTH_DIFF);
4292     __ bind(LAST_CHECK_AND_LENGTH_DIFF);
4293       __ eor(rscratch2, tmp1, tmp2);
4294       __ cbnz(rscratch2, DIFF);
4295     __ bind(LENGTH_DIFF);
4296       __ ret(lr);
4297     return entry;
4298   }
4299 
4300   void generate_compare_long_strings() {
4301       StubRoutines::aarch64::_compare_long_string_LL
4302           = generate_compare_long_string_same_encoding(true);
4303       StubRoutines::aarch64::_compare_long_string_UU
4304           = generate_compare_long_string_same_encoding(false);
4305       StubRoutines::aarch64::_compare_long_string_LU
4306           = generate_compare_long_string_different_encoding(true);
4307       StubRoutines::aarch64::_compare_long_string_UL
4308           = generate_compare_long_string_different_encoding(false);
4309   }
4310 
4311   // R0 = result
4312   // R1 = str2
4313   // R2 = cnt1
4314   // R3 = str1
4315   // R4 = cnt2




4009     __ generate_dsin_dcos(isCos, (address)StubRoutines::aarch64::_npio2_hw,
4010         (address)StubRoutines::aarch64::_two_over_pi,
4011         (address)StubRoutines::aarch64::_pio2,
4012         (address)StubRoutines::aarch64::_dsin_coef,
4013         (address)StubRoutines::aarch64::_dcos_coef);
4014     return start;
4015   }
4016 
4017   address generate_dlog() {
4018     __ align(CodeEntryAlignment);
4019     StubCodeMark mark(this, "StubRoutines", "dlog");
4020     address entry = __ pc();
4021     FloatRegister vtmp0 = v0, vtmp1 = v1, vtmp2 = v2, vtmp3 = v3, vtmp4 = v4,
4022         vtmp5 = v5, tmpC1 = v16, tmpC2 = v17, tmpC3 = v18, tmpC4 = v19;
4023     Register tmp1 = r0, tmp2 = r1, tmp3 = r2, tmp4 = r3, tmp5 = r4;
4024     __ fast_log(vtmp0, vtmp1, vtmp2, vtmp3, vtmp4, vtmp5, tmpC1, tmpC2, tmpC3,
4025         tmpC4, tmp1, tmp2, tmp3, tmp4, tmp5);
4026     return entry;
4027   }
4028 
4029   // Summary: part of string compareTo implementation. Called for code generation in multiple points.
4030   //          1) load 8 bytes and advance pointers of both strings and compare
4031   //             previously loaded 8 bytes. jump to DIFF1 if different characters found
4032   //          2) load 8 bytes and advance pointers of both strings and compare
4033   //             previously loaded 8 bytes. jump to DIFF2 if different characters found
4034   //
4035   // Input:
4036   //          str1  (r1): pointer for next load from 1st string
4037   //          cnt1  (r2): register to use for loading data from 2nd string
4038   //          str2  (r3): pointer for next load from 2nd string
4039   //          tmp1 (r10): already loaded 8 bytes of 1st string.
4040   //          tmp2 (r11): already loaded 8 bytes of 2nd string.
4041   //
4042   // Output:
4043   //          rscratch2: result of last comparison
4044   //          tmp1, tmp2: contains different parts of 1st and 2nd strings if exit via DIFF1 label. Not used for normal and DIFF2 exits
4045   //          rscratch1, cnt1: contains different parts of 1st and 2nd strings if exit via DIFF2 label. Not used for normal and DIFF1 exits
4046   //
4047   void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
4048     Register str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11;
4049     __ ldr(rscratch1, Address(__ post(str1, 8)));
4050     __ eor(rscratch2, tmp1, tmp2);
4051     __ ldr(cnt1, Address(__ post(str2, 8)));
4052     __ cbnz(rscratch2, DIFF1);
4053     __ ldr(tmp1, Address(__ post(str1, 8)));
4054     __ eor(rscratch2, rscratch1, cnt1);
4055     __ ldr(tmp2, Address(__ post(str2, 8)));
4056     __ cbnz(rscratch2, DIFF2);
4057   }
4058 
4059   // Summary: part of string compare implementation. Called for code generation in multiple points.
4060   //          - expecting 4 UTF-16 string characters preloaded into tmp3
4061   //          - load 16 characters from each string
4062   //          - convert Latin1 characters to UTF-16
4063   //          - compare preloaded 4 characters with 4 first converted Latin1 characters
4064   //          - compare next 12 loaded and converted characters from each string
4065   //          - compared data is in tmpU and tmpL registers or in tmp3 and tmpL
4066   //          - in case different characters are found while comparing tmpU and
4067   //            tmpL, jumps to DIFF1. Jumps to DIFF2 in case different character
4068   //            was found while comparing tmp3 and tmpL
4069   //          - string pointers are increased by amount of loaded bytes
4070   //
4071   // Input:
4072   //          strUnext  (r2): pointer for next load from UTF-16 string
4073   //          strLnext (r11): pointer for next load from Lating1 string
4074   //          tmp3     (r12): used to store parts of UTF-16 string
4075   //          vmptZ     (v0): zeroed register for conversion from Latin1 to UTF-16
4076   //
4077   // Temporary registers:
4078   //          vtmp      (v1): used to load 16 Latin1 characters and part of converted Latin1 string
4079   //          vtmp3     (v2): used for part of converted Latin1 string
4080   //
4081   // Output: rscratch2: result of last comparison
4082   //         tmpL: last compared part of converted Latin1 string
4083   //         tmpU: in case of exit via DIFF1 or normal exit: contains last compared part of UTF-16 string.
4084   //               Contains part of UTF-16 string compared before last comparison otherwise.
4085   //         tmp3: in case of exit via DIFF2: contains last compared part of UTF-16 string.
4086   //               in case of exit via DIFF1: contains part of UTF-16 string compared before last comparison otherwise.
4087   //               in case of normal exit: contains preloaded 8 bytes of UTF-16 string for next comparisons
4088   //
4089   // Parameters:
4090   //          tmpL: holds parts of converted Latin1 string
4091   //          tmpU: holds parts of UTF-16 string
4092   //          DIFF1: label to jump to if different characters are found in tmpU and tmpL
4093   //          DIFF2: label to jump to if different characters are found in tmp3 and tmpL
4094   //
4095   void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
4096       Label &DIFF2) {
4097     Register strUnext = r2, tmp1 = r10, strLnext = r11, tmp3 = r12;
4098     FloatRegister vtmp = v1, vtmpZ = v0, vtmp3 = v2;
4099 
4100     __ ldrq(vtmp, Address(__ post(strLnext, 16)));
4101     __ ldr(tmpU, Address(__ post(strUnext, 8)));
4102     __ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
4103     // now we have 32 bytes of characters (converted to U) in vtmp:vtmp3
4104 
4105     __ fmovd(tmpL, vtmp3);
4106     __ eor(rscratch2, tmp3, tmpL);
4107     __ cbnz(rscratch2, DIFF2);
4108 
4109     __ ldr(tmp3, Address(__ post(strUnext, 8)));
4110     __ umov(tmpL, vtmp3, __ D, 1);
4111     __ eor(rscratch2, tmpU, tmpL);
4112     __ cbnz(rscratch2, DIFF1);
4113 
4114     __ zip2(vtmp, __ T16B, vtmp, vtmpZ);
4115     __ ldr(tmpU, Address(__ post(strUnext, 8)));
4116     __ fmovd(tmpL, vtmp);
4117     __ eor(rscratch2, tmp3, tmpL);
4118     __ cbnz(rscratch2, DIFF2);
4119 
4120     __ ldr(tmp3, Address(__ post(strUnext, 8)));
4121     __ umov(tmpL, vtmp, __ D, 1);
4122     __ eor(rscratch2, tmpU, tmpL);
4123     __ cbnz(rscratch2, DIFF1);
4124   }
4125 
4126   // Summary: Compare long strings intrinsic implementation for different encodings.
4127   //          Comparison is performed in lexical order.
4128   //
4129   // Prerequisites: string length >= 72 characters
4130   //
4131   // Input: result (r0): length difference
4132   //          str1 (r1): pointer to 1st string
4133   //          str2 (r2): pointer to 2nd string
4134   //          cnt1 (r3): number of characters in 1st string
4135   //          cnt2 (r4): minimum of str1 and str2 length. Used as counter
4136   //          tmp1 (r10): starting 8 bytes of 1st string for UTF-16 string
4137   //          tmp2 (r11): starting 8 bytes of 2nd string for UTF-16 string
4138   //          vtmpZ (v0): used to convert encodings by providing zero values
4139   //          vtmp  (v1): starting bytes of Latin1 string. Also used as temporary register
4140   //          vtmp3 (v2): temporary register
4141   //
4142   // Temporary registers:
4143   //          rscratch1, rscratch2: clobbered on exit
4144   //          preloadedChunk (r12), smallLoopCounter (r14): pushed on stack, then restored on exit
4145   //
4146   //
4147   // Output:  result - return 0 if strings are equal. Returns positive value
4148   //          if 1st string > 2nd string in lexical order. Return
4149   //          negative value if 1st string < 2nd string.
4150   //
4151   // Side effects: str1, str2, cnt1, cnt2, tmp1, tmp2, rscratch1, rscratch2: clobbered.
4152   //
4153   // Algorithm parameters:
4154   //          isLU: true if 1st string is Latin1.
4155   //
4156   // Calculated constants:
4157   //          largeLoopExitCondition: Exit condition for loop with prefetch.
4158   //
4159   //
4160   // PSEUDO CODE:
4161   //   // Code below uses <compare_string_16_x_LU> code block which:
4162   //   // - loads 16 Latin1 characters at once. Then converts it to UTF-16 and move to GPR
4163   //   // - issues 4 smaller loads of 4 UTF-16 characters and for each load compare it with converted Latin1 characters
4164   //   // - smaller loads are using 2 different registers to break register dependencies
4165   //   // - jump to DIFF or DIFF2 label depending on which register has a character different from converted Latin1 character
4166   //
4167   //   <push preloadedChunk and smallLoopCounter on stack>;
4168   //   <convert already loaded Latin1 characters to UTF-16 and compare it>;
4169   //   <advance string pointers by the number of loaded byte>;
4170   //   <calculate strUnext and strLnext == pointers to load next chunks from UTF-16 and Latin1 strings>;
4171   //   <preload first 4 UTF characters>;
4172   //   cnt2 = cnt2 - 4;                              // keep characters counter reduced by 4, because last 4 characters are compared separately
4173   //   if (SoftwarePrefetchHintDistance >= 0) {      // need prefetch
4174   //     if (cnt2 < largeLoopExitConditioni) goto NO_PREFETCH; // don't use loop with prefetch in case prefetch distance is too far away
4175   //     do {                                        // 64-characters loop with prefetch.
4176   //       // Each iteration has 2 prefetch instructions for UTF-16 string and 1 for Latin1 string
4177   //       // contains 2-iterations loops (16 characters each) between prefetch instructions
4178   //       // to avoid huge code generation
4179   //       <prefetch strLnext at SoftwarePrefetchHintDistance>;
4180   //       <prefetch strUnext at SoftwarePrefetchHintDistance>;
4181   //       for (smallLoopCounter = 0; smallLoopCounter < 2; smallLoopCounter++) {
4182   //         <compare_string_16_x_LU>;
4183   //       }
4184   //       <prefetch strUnext at SoftwarePrefetchHintDistance>;
4185   //       for (smallLoopCounter = 0; smallLoopCounter < 2; smallLoopCounter++) {
4186   //         <compare_string_16_x_LU>;
4187   //       }
4188   //       cnt2 = cnt2 - 64;                          // update counter by the number of loaded characters
4189   //     } while(cnt2 >= largeLoopExitCondition);
4190   //   }
4191   //   if (cnt2 == 0) goto LOAD_LAST;                 // load and compare last 4 characters
4192   //  NO_PREFETCH:
4193   //   if (<less than 16 characters left to load) goto TAIL;
4194   //
4195   //   // smaller by-16-characters loop
4196   //   do {
4197   //     cnt2 = cnt2 - 16;
4198   //     <compare_string_16_x_LU>;
4199   //   } while(<has at least 16 characters to load>);
4200   //
4201   //   if (cnt2 == 0) goto LOAD_LAST;
4202   // TAIL:
4203   //   <adjust pointers to point to 16 characters before last load>;
4204   //   <preload first 4 UTF characters>
4205   //   <load and compare 16 characters before last load>;
4206   //   goto LOAD_LAST;
4207   // DIFF1:
4208   //   <move utf string data to same comparison register as for DIFF2>;
4209   //   // fallthrough
4210   // DIFF2:
4211   //   <pop preloadedChunk and smallLoopCounter from stack>;
4212   //   goto CALCULATE_DIFFERENCE;
4213   // LOAD_LAST:
4214   //   <pop preloadedChunk and smallLoopCounter from stack>;
4215   //   <load and compare last 4 characters>;
4216   //   if (<difference not found>) return;           // result = already calculated length difference
4217   // CALCULATE_DIFFERENCE:
4218   //   result = <calculate character difference>;
4219   // DONE:
4220   //   return;
4221   //
4222   //
4223   //
4224   //
4225   //
4226   // DETAILED CODE:
4227   //  vtmpZ = 0;                                     // used to convert encodings
4228   //  vtmp = CONVERT_LATIN1_TO_UTF16(vtmp, vtmpZ);   // implemented as zip1 instruction
4229   //
4230   //  // update string pointers by the number of loaded bytes
4231   //  str1 = str1 + (isLU ? wordSize/2 : wordSize);
4232   //  str2 = str2 + (isLU ? wordSize : wordSize/2);
4233   //
4234   //  // copy converted string into GPR
4235   //  if (isLU) tmp1 = vtmp;
4236   //  else tmp2 = vtmp;
4237   //
4238   //  cnt2 = cnt2 - 8;                                // reduce cnt2 by the number of already loaded characters. And reduce by 4 more characters
4239   //  str1 = str1 + cnt2 << (isLU ? 0 : 1);           // address of 1st string last 4 characters
4240   //  rscratch1 = BIT_XOR(tmp1, tmp2);                // begin loaded chunks comparison
4241   //  str2 = str2 + cnt2 << (isLU ? 1 : 0);           // address of 2nd string last 4 characters
4242   //  rscratch1 = tmp2;                               // copy 2nd string chunk
4243   //  if (rscratch2 != 0) CALCULATE_DIFFERENCE;       // found different character
4244   //
4245   //  // several redefinitions below to have meaningful names
4246   //  void* strU = isLU ? str2 : str1;                // UTF-16 string pointer to last 4 characters
4247   //  void* strL = isLU ? str1 : str2;                // Latin1 string pointer to last 4 characters
4248   //  long tmpU = isLU ? rscratch1 : tmp1;            // UTF-16 characters holder
4249   //  long tmpL = isLU ? tmp1 : rscratch1;            // Latin1 characters holder
4250   //  void* strLnext = tmp2;                          // Latin1 string pointer to load next character(s)
4251   //  void* strUnext = cnt1;                          // UTF-16 string pointer to load next character(s)
4252   //
4253   //  PUSH_ON_STACK(preloadedChunk, smallLoopCounter);
4254   //  strLnext = strL - cnt2;                         // initialize pointer to Latin1 string next load
4255   //  strUnext = strU - cnt2 << 1;                    // initialize pointer to UTF-16 string next load
4256   //
4257   //  preloadedChunk = LOAD8BYTES(strUnext, 8);       // pre-load next 8 bytes of UTF-16 string
4258   //  strUnext = strUnext + 8;                        // merged with load above as post-increment
4259   //
4260   //  if (SoftwarePrefetchHintDistance >= 0) {
4261   //    rscratch2 = cnt2 - prefetchLoopExitCondition;
4262   //    if (rscratch2 < 0) goto NO_PREFETCH;
4263   //   LARGE_LOOP_PREFETCH:                           // 64-characters loop
4264   //    PREFETCH(strLnext, SoftwarePrefetchHintDistance);
4265   //    smallLoopCounter = 2;                         // initialize inner loop counter
4266   //    PREFETCH(strUnext, SoftwarePrefetchHintDistance);
4267   //   LARGE_LOOP_PREFETCH_REPEAT1: {                 // 16 characters inner loop with 2 iterations
4268   //      compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // see compare_string_16_x_LU comments
4269   //      smallLoopCounter--;
4270   //      bool smallLoopRepeat = (smallLoopCounter > 0); // kept in flags
4271   //      if (smallLoopRepeat) goto LARGE_LOOP_PREFETCH_REPEAT1;
4272   //   }
4273   //    PREFETCH(strUnext, SoftwarePrefetchHintDistance);
4274   //    smallLoopCounter = 2;                         // initialize inner loop counter
4275   //   LARGE_LOOP_PREFETCH_REPEAT2: {                 // one more 16 characters inner loop with 2 iterations
4276   //      compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // see compare_string_16_x_LU comments
4277   //      smallLoopCounter--;
4278   //      bool smallLoopRepeat = (smallLoopCounter > 0); // kept in flags
4279   //      if (smallLoopRepeat) goto LARGE_LOOP_PREFETCH_REPEAT2;
4280   //   }
4281   //    cnt2 = cnt2 - 64;
4282   //    rscratch2 = cnt2 - prefetchLoopExitCondition;
4283   //    if (rscratch2 >= 0) goto LARGE_LOOP_PREFETCH;
4284   //  } // end of 64-characters loop
4285   //
4286   //  if (cnt2 == 0) goto LOAD_LAST;                  // no more characters left except last 4 characters reserved earlier
4287   // NO_PREFETCH:                                     // all further loads doesn't require prefetch instruction
4288   //  cnt2 = cnt2 - 16;                               // keep cnt2 counter reduced by 16
4289   //  if (cnt2 < 0) goto TAIL;                        // less than 16 characters left to load until last 4 reserved characters
4290   // SMALL_LOOP:                                      // 16-characters loop
4291   //  compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4292   //  cnt2 = cnt2 - 16;                               // decrement counter by previously loaded 16 characters
4293   //  bool repeatSmallLoop = cnt2 >= 0;               // kept in flags
4294   //  if (repeatSmallLoop) goto SMALL_LOOP;
4295   //  if (cnt2 == -16) goto LOAD_LAST;
4296   // TAIL:
4297   //  strUnext = strUnext + cnt2 << 1;                // pointer to UTF-16 last 16 characters + 8 bytes
4298   //  strLnext = strLnext + cnt2;                     // pointer to Latin1 last 16 characters
4299   //  preloadedChunk = LOAD8BYTES(strUnext, -8);
4300   //  compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4301   //  goto LOAD_LAST;
4302   // DIFF2:
4303   //  tmpU = preloadedChunk;
4304   // DIFF1:
4305   //  pop(preloadedChunk, smallLoopCounter);
4306   //  goto CALCULATE_DIFFERENCE;
4307   // LOAD_LAST:
4308   //  tmpU = preloadedChunk;                         // already loaded last 4 UTF-16 characters. Just copy to required register
4309   //  pop(preloadedChunk, smallLoopCounter);
4310   //  vtmp = LOAD4BYTES(strL);
4311   //  vtmp = CONVERT_LATIN1_TO_UTF16(vtmp, vtmpZ);
4312   //  tmpL = vtmp;
4313   //  rscratch2 = BIT_XOR(tmpU, tmpL);
4314   //  if (rscratch2 == 0) goto DONE;
4315   // CALCULATE_DIFFERENCE:
4316   //  // No count trailing zeroes instruction is available. Reverse bits and count leading zeroes instead.
4317   //  rscratch2 = REVERSE_BITS(rscratch2);
4318   //  rscratch2 = COUNT_LEADING_ZEROES(rscratch2);
4319   //  rscratch2 = rscratch2 & -16;                   // clear lowest 4 bits to have number of bits until different character
4320   //  tmp1 = tmp1 >> rscratch2;                      // shift off same symbols from 1st string data
4321   //  tmp1 = UNSIGNED_EXTEND_SHORT2INT(tmp1);        // only first different symbol remains in 1st string data
4322   //  rscratch1 = rscratch1 >> rscratch2;            // shift off same symbols from 2nd string data
4323   //  rscratch1 = UNSIGNED_EXTEND_SHORT2INT(rscratch1); // only first different symbol remains in 2nd string data
4324   //  result = tmp1 - rscratch1;                     // character difference
4325   // DONE:
4326   //  return;
4327   address generate_compare_long_string_different_encoding(bool isLU) {
4328     __ align(CodeEntryAlignment);
4329     StubCodeMark mark(this, "StubRoutines", isLU
4330         ? "compare_long_string_different_encoding LU"
4331         : "compare_long_string_different_encoding UL");
4332     address entry = __ pc();
4333     Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
4334         DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, NO_PREFETCH,
4335         LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
4336     Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
4337         tmp1 = r10, tmp2 = r11, preloadedChunk = r12, smallLoopCounter = r14;
4338     FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
4339     RegSet spilled_regs = RegSet::of(preloadedChunk, smallLoopCounter);
4340 
4341     int prefetchLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance/2);
4342 
4343     __ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
4344     // cnt2 == amount of characters left to compare
4345     // Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL))
4346     __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
4347     __ add(str1, str1, isLU ? wordSize/2 : wordSize);
4348     __ add(str2, str2, isLU ? wordSize : wordSize/2);
4349     __ fmovd(isLU ? tmp1 : tmp2, vtmp);
4350     __ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
4351     __ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1);
4352     __ eor(rscratch2, tmp1, tmp2);
4353     __ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0);
4354     __ mov(rscratch1, tmp2);
4355     __ cbnz(rscratch2, CALCULATE_DIFFERENCE);
4356     Register strU = isLU ? str2 : str1,
4357              strL = isLU ? str1 : str2,
4358              tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison
4359              tmpL = isLU ? tmp1 : rscratch1, // where to keep L for comparison
4360              strLnext = tmp2,
4361              strUnext = cnt1;
4362     __ push(spilled_regs, sp);
4363     __ sub(strLnext, strL, cnt2);            // strL pointer to load from
4364     __ sub(strUnext, strU, cnt2, __ LSL, 1); // strU pointer to load from
4365 
4366     // safe to read ahead 4 characters, because string length >= 72 characters
4367     __ ldr(preloadedChunk, Address(__ post(strUnext, 8)));
4368 
4369     if (SoftwarePrefetchHintDistance >= 0) {
4370       __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
4371       __ br(__ LT, NO_PREFETCH);
4372       __ bind(LARGE_LOOP_PREFETCH);
4373         __ prfm(Address(strLnext, SoftwarePrefetchHintDistance));
4374         __ mov(smallLoopCounter, 2);
4375         __ prfm(Address(strUnext, SoftwarePrefetchHintDistance));
4376         __ bind(LARGE_LOOP_PREFETCH_REPEAT1);
4377           compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4378           __ subs(smallLoopCounter, smallLoopCounter, 1);
4379           __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1);
4380           __ prfm(Address(strUnext, SoftwarePrefetchHintDistance));
4381           __ mov(smallLoopCounter, 2);
4382         __ bind(LARGE_LOOP_PREFETCH_REPEAT2);
4383           compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4384           __ subs(smallLoopCounter, smallLoopCounter, 1);
4385           __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
4386           __ sub(cnt2, cnt2, 64);
4387           __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
4388           __ br(__ GE, LARGE_LOOP_PREFETCH);
4389     }
4390     __ cbz(cnt2, LOAD_LAST); // no characters left except last load
4391     __ bind(NO_PREFETCH);
4392     // Load and compare cnt2 characters using 16 characters loop with
4393     // compare_string_16_x_LU primitive. In case 1..15 characters left:
4394     // use same compare_string_16_x_LU primitive with partial overlapping
4395     __ subs(cnt2, cnt2, 16);
4396     __ br(__ LT, TAIL);
4397     __ bind(SMALL_LOOP); // smaller loop
4398       __ subs(cnt2, cnt2, 16);
4399       compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4400       __ br(__ GE, SMALL_LOOP);
4401       __ cmn(cnt2, (u1)16);
4402       __ br(__ EQ, LOAD_LAST);
4403     __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
4404       __ add(strUnext, strUnext, cnt2, __ LSL, 1); // Address of 8 bytes before last 4 characters in UTF-16 string
4405       __ add(strLnext, strLnext, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string
4406       __ ldr(preloadedChunk, Address(strUnext, -8));
4407       compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // last 16 characters before last load
4408       __ b(LOAD_LAST);
4409     __ bind(DIFF2);
4410       __ mov(tmpU, preloadedChunk);
4411     __ bind(DIFF1);
4412       __ pop(spilled_regs, sp);
4413       __ b(CALCULATE_DIFFERENCE);
4414     __ bind(LOAD_LAST);
4415       // Last 4 UTF-16 characters are already pre-loaded into preloadedChunk by compare_string_16_x_LU.
4416       // No need to load it again
4417       __ mov(tmpU, preloadedChunk);
4418       __ pop(spilled_regs, sp);
4419 
4420       __ ldrs(vtmp, Address(strL));
4421       __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
4422       __ fmovd(tmpL, vtmp);
4423 
4424       __ eor(rscratch2, tmpU, tmpL);
4425       __ cbz(rscratch2, DONE);
4426 
4427     // Find the first different characters in the longwords and
4428     // compute their difference.
4429     __ bind(CALCULATE_DIFFERENCE);
4430       __ rev(rscratch2, rscratch2);
4431       __ clz(rscratch2, rscratch2);
4432       __ andr(rscratch2, rscratch2, -16);
4433       __ lsrv(tmp1, tmp1, rscratch2);
4434       __ uxthw(tmp1, tmp1);
4435       __ lsrv(rscratch1, rscratch1, rscratch2);
4436       __ uxthw(rscratch1, rscratch1);
4437       __ subw(result, tmp1, rscratch1);
4438     __ bind(DONE);
4439       __ ret(lr);
4440     return entry;
4441   }
4442 
4443   // Summary: Compare long strings intrinsic implementation for same encodings.
4444   //          Comparison is performed in lexical order.
4445   //
4446   // Prerequisites: string length >= 72 characters
4447   //
4448   // Input: result (r0): length difference
4449   //          str1 (r1): pointer to 1st string
4450   //          str2 (r2): pointer to 2nd string
4451   //          cnt1 (r3): amount of characters in 1st string
4452   //          cnt2 (r4): minimum of str1 and str2 length. Used as counter
4453   //          tmp1 (r10): starting 8 bytes of 1st string
4454   //          tmp2 (r11): starting 8 bytes of 2nd string
4455   //
4456   // Temporary registers:
4457   //          rscratch1, rscratch2
4458   //
4459   //
4460   // Output:  result - return 0 if strings are equal. Returns positive value
4461   //          if 1st string > 2nd string in lexical order. Returns
4462   //          negative value if 1st string < 2nd string.
4463   //
4464   // Side effects: str1, str2, cnt1, cnt2, tmp1, tmp2, rscratch1, rscratch2: clobbered.
4465   //
4466   // Algorithm parameters:
4467   //          isLL: true if both string are Latin1. false if both are UTF-16.
4468   //                Used to generate code for both Latin1 - Latin1 (LL) case and
4469   //                UTF-16 - UTF-16 (UU) case.
4470   // Calculated constants:
4471   //          largeLoopExitCondition: MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
4472   //                Exit condition for loop with prefetch.
4473   //          characters_in_word: isLL ? 8 : 4
4474   //                Number of characters fit in work (1 register)
4475   //          characters_in_dword: 2 * characters_in_word
4476   //                Number of characaters fit in double word (2 registers)
4477   //          byte_to_char_shift: isLL ? 0 : 1
4478   //                Shift value to convert between byte and character counters
4479   //
4480   // PSEUDO CODE:
4481   //   // Code below uses <compare_string_16_bytes_same> code block, which:
4482   //   // - compares already loaded tmp1 and tmp2. goto DIFF label if it's not equal
4483   //   // - loads and compares next 8 bytes of both strings (stored in rscratch1 and cnt1). goto DIFF2 label if it's not equal
4484   //   // - loads next 8 bytes of both strings into tmp1 and tmp2
4485   //   // - on each load string pointers are updated to point at character after loaded block
4486   //
4487   //   <update cnt2 counter and str1, str2 pointers by already loaded characters>;
4488   //   if (SoftwarePrefetchHintDistance >= 0) {     // need prefetch
4489   //     do { // 64-byte loop with prefetch
4490   //       <prefetch str1 at SoftwarePrefetchHintDistance>;
4491   //       <prefetch str2 at SoftwarePrefetchHintDistance>;
4492   //       <compare_string_16_bytes_same>;
4493   //       <compare_string_16_bytes_same>;
4494   //       cnt2 = cnt2 - (8 * characters_in_word);            // update counter by amount
4495   //       <compare_string_16_bytes_same>;
4496   //       <compare_string_16_bytes_same>;
4497   //     } while(cnt2 >= largeLoopExitCondition);
4498   //   }
4499   //   if (cnt2 == 0)
4500   //   while(cnt2 > characters_in_dword) {
4501   //     <compare_string_16_bytes_same>;
4502   //     cnt2 = cnt2 - characters_in_dword;
4503   //   }
4504   //   <adjust pointer to last 16 bytes for each string>;
4505   //   <preload 8 bytes before current pointers into tmp1 and tmp2)>;
4506   //   <compare_string_16_bytes_same>;
4507   //   goto LAST_CHECK;
4508   // DIFF2:
4509   //   <copy string chunks to tmp1 and tmp2 for further analysis>;
4510   //   // fallthrough to DIFF
4511   // DIFF:
4512   //   <get different characters from tmp1 and tmp2. Then calculate characters difference>
4513   //   goto DONE;
4514   // LAST_CHECK: // label to jump to when last chunk of data has to be checked.
4515   //   // Return character difference if different characters are found.
4516   //   // Return length difference (already calculated) otherwise.
4517   //   <check last loaded chunks and return result>
4518   // DONE:
4519   //   return;
4520   //
4521   //
4522   // DETAILED CODE:
4523   //  // N.B.: compare_string_16_bytes_same and call below is not
4524   //  // an actual call at runtime. It is called at code generation time.
4525   //
4526   //  cnt2 = cnt2 - characters_in_word);
4527   //  str1 = str1 + wordSize;
4528   //  str2 = str2 + wordSize;
4529   //  if (SoftwarePrefetchHintDistance >= 0) {
4530   //    LARGE_LOOP_PREFETCH:
4531   //      LOAD_PREFETCH(str1, SoftwarePrefetchHintDistance);
4532   //      LOAD_PREFETCH(str2, SoftwarePrefetchHintDistance);
4533   //      compare_string_16_bytes_same(DIFF, DIFF2);
4534   //      compare_string_16_bytes_same(DIFF, DIFF2);
4535   //      cnt2 = cnt2 - 8 * characters_in_word;
4536   //      compare_string_16_bytes_same(DIFF, DIFF2);
4537   //      rscratch2 = cnt2 - largeLoopExitCondition; // rscratch2 is not used. Use subs instead of cmp in case of potentially large constants
4538   //      bool canLoop = rscratch2 > 0;              // kept in flags
4539   //      compare_string_16_bytes_same(DIFF, DIFF2);
4540   //      if (canLoop) LARGE_LOOP_PREFETCH;
4541   //      if (cnt2 == 0) goto LAST_CHECK;            // no more loads left
4542   //  }
4543   //
4544   //  cnt2 = cnt2 - characters_in_dword;             // keep cnt2 counter reduced by 16 (LL) or 8 (UU)
4545   //  bool lessThan16bytesLeft = cnt2 < 0;           // kept in flags
4546   //  if (lessThan16bytesLeft) goto TAIL;
4547   //  SMALL_LOOP:                                // 16 byte loop
4548   //    compare_string_16_bytes_same(DIFF, DIFF2);
4549   //    cnt2 = cnt2 - characters_in_dword;
4550   //    bool canLoop = cnt2 >= 16;
4551   //    if (canLoop) goto SMALL_LOOP;
4552   //    bool lastCheckLeft = cnt2 == -characters_in_dword;
4553   //    if (lastCheckLeft) goto LAST_CHECK;
4554   //  TAIL: // less than 16 bytes left to load. And 8 bytes were loaded but not
4555   //    // compared. Reuse <compare_string_16_bytes_same> primitive. Handle last
4556   //    // 24 string bytes by preloading first 8 of these 24 bytes, then use
4557   //    // <compare_string_16_bytes_same>. And then compare last 8 bytes loaded
4558   //    // by <compare_string_16_bytes_same>
4559   //    // This will partially overlap with previous load and comparison, but
4560   //    // makes code more simple
4561   //    str1 = str1 + cnt2 << byte_to_char_shift;
4562   //    str2 = str2 + cnt2 << byte_to_char_shift;
4563   //    tmp1 = LOAD8BYTES(str1, -8);
4564   //    tmp2 = LOAD8BYTES(str2, -8);
4565   //    compare_string_16_bytes_same(DIFF, DIFF2);
4566   //    goto LAST_CHECK;
4567   //  DIFF2: // calculate character difference, when data stored in rscratch1 and cnt1
4568   //    // move loaded chunks to tmp1 and tmp2 registers to use in DIFF block
4569   //    tmp1 = rscratch1;
4570   //    tmp2 = cnt1;
4571   //    // fallthrough to DIFF
4572   //  DIFF: // calculate character difference, when data stored in tmp1 and tmp2
4573   //    // and find different characters. rscratch2 contains zeroes at positions with
4574   //    // same characters. Find index of first different bit (== amount of
4575   //    // trailing zeroes), which is: <different character index> * <bits in character>
4576   //    // + <some bits within characaters>. Then, clearing bits within character
4577   //    // (3 lowest bits for Latin1 case and 4 lowest bits for UTF-16 case)
4578   //    // will result in the number of bits until different character in current chunks.
4579   //
4580   //    // As it's not possible to count trailing zeroes, reverse bits and count leading zeroes
4581   //    rscratch2 = REVERSE_BITS(rscratch2);
4582   //    rscratch2 = COUNT_LEADING_ZEROES(rscratch2);
4583   //    rscratch2 = rscratch2 & (isLL ? -8 : -16);   // clear lowest 3 (Latin1) or 4 (UTF-16) bits
4584   //    tmp1 = tmp1 >> rscratch2;                    // shift off same characters from 1st string chunk
4585   //    tmp2 = tmp2 >> rscratch2;                    // shift off same characters from 2nd string chunk
4586   //
4587   //    // Only first character should be left for comparison. Use unsigned extend instruction for that
4588   //    if (isLL) {
4589   //      tmp1 = UNSIGNED_EXTEND_BYTE2INT(tmp1);
4590   //      tmp2 = UNSIGNED_EXTEND_BYTE2INT(tmp2);
4591   //    } else {
4592   //      tmp1 = UNSIGNED_EXTEND_SHORT2INT(tmp1);
4593   //      tmp2 = UNSIGNED_EXTEND_SHORT2INT(tmp2);
4594   //    }
4595   //
4596   //    result = tmp1 - tmp2;
4597   //    godo DONE;
4598   //  LAST_CHECK:
4599   //    rscratch2 = BIT_XOR(tmp1, tmp2);
4600   //    if (rscratch2 != 0) goto DIFF;
4601   //  DONE:
4602   //    return result;
4603 
4604   address generate_compare_long_string_same_encoding(bool isLL) {
4605     const int characters_in_word = isLL ? 8 : 4;
4606     const int characters_in_dword = 2 * characters_in_word;
4607     const int byte_to_char_shift = isLL ? 0 : 1;
4608     __ align(CodeEntryAlignment);
4609     StubCodeMark mark(this, "StubRoutines", isLL
4610         ? "compare_long_string_same_encoding LL"
4611         : "compare_long_string_same_encoding UU");
4612     address entry = __ pc();
4613     Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
4614         tmp1 = r10, tmp2 = r11;
4615     Label SMALL_LOOP, LARGE_LOOP_PREFETCH, DIFF2, TAIL, DONE, DIFF, LAST_CHECK;


4616     // exit from large loop when less than 64 bytes left to read or we're about
4617     // to prefetch memory behind array border
4618     int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
4619     // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
4620     // update cnt2 counter with already loaded 8 bytes
4621     __ sub(cnt2, cnt2, characters_in_word);
4622     // update pointers, because of previous read
4623     __ add(str1, str1, wordSize);
4624     __ add(str2, str2, wordSize);
4625     if (SoftwarePrefetchHintDistance >= 0) {
4626       __ bind(LARGE_LOOP_PREFETCH);
4627         __ prfm(Address(str1, SoftwarePrefetchHintDistance));
4628         __ prfm(Address(str2, SoftwarePrefetchHintDistance));
4629         compare_string_16_bytes_same(DIFF, DIFF2);
4630         compare_string_16_bytes_same(DIFF, DIFF2);
4631         __ sub(cnt2, cnt2, 8 * characters_in_word);
4632         compare_string_16_bytes_same(DIFF, DIFF2);
4633         __ subs(rscratch2, cnt2, largeLoopExitCondition);
4634         compare_string_16_bytes_same(DIFF, DIFF2);
4635         __ br(__ GT, LARGE_LOOP_PREFETCH);
4636         __ cbz(cnt2, LAST_CHECK);                     // Check if no more chars left
4637     }
4638     __ subs(cnt2, cnt2, characters_in_dword);         // keep number of characters reduced by 16 (LL) or 8 (UU)

4639     __ br(__ LT, TAIL);
4640     __ bind(SMALL_LOOP);
4641       compare_string_16_bytes_same(DIFF, DIFF2);
4642       __ subs(cnt2, cnt2, characters_in_dword);
4643       __ br(__ GE, SMALL_LOOP);
4644       __ cmn(cnt2, (u1)(characters_in_dword));
4645       __ br(__ EQ, LAST_CHECK);
4646     __ bind(TAIL);
4647       __ add(str1, str1, cnt2, __ LSL, byte_to_char_shift); // points to last 16 bytes to compare
4648       __ add(str2, str2, cnt2, __ LSL, byte_to_char_shift); // points to last 16 bytes to compare
4649       __ ldr(tmp1, Address(str1, -8));                // preload 8 bytes before current pointer
4650       __ ldr(tmp2, Address(str2, -8));                // preload 8 bytes before current pointer
4651       compare_string_16_bytes_same(DIFF, DIFF2);
4652       __ b(LAST_CHECK);















4653     __ bind(DIFF2);
4654       __ mov(tmp1, rscratch1);
4655       __ mov(tmp2, cnt1);
4656       // fallthrough to DIFF












4657     __ bind(DIFF);
4658       __ rev(rscratch2, rscratch2);
4659       __ clz(rscratch2, rscratch2);
4660       __ andr(rscratch2, rscratch2, isLL ? -8 : -16);
4661       __ lsrv(tmp1, tmp1, rscratch2);
4662       if (isLL) {
4663         __ lsrv(tmp2, tmp2, rscratch2);
4664         __ uxtbw(tmp1, tmp1);
4665         __ uxtbw(tmp2, tmp2);
4666       } else {
4667         __ lsrv(tmp2, tmp2, rscratch2);
4668         __ uxthw(tmp1, tmp1);
4669         __ uxthw(tmp2, tmp2);
4670       }
4671       __ subw(result, tmp1, tmp2);
4672       __ b(DONE);
4673     __ bind(LAST_CHECK);
4674       __ eor(rscratch2, tmp1, tmp2);
4675       __ cbnz(rscratch2, DIFF);
4676     __ bind(DONE);
4677       __ ret(lr);
4678     return entry;
4679   }
4680 
4681   void generate_compare_long_strings() {
4682       StubRoutines::aarch64::_compare_long_string_LL
4683           = generate_compare_long_string_same_encoding(true);
4684       StubRoutines::aarch64::_compare_long_string_UU
4685           = generate_compare_long_string_same_encoding(false);
4686       StubRoutines::aarch64::_compare_long_string_LU
4687           = generate_compare_long_string_different_encoding(true);
4688       StubRoutines::aarch64::_compare_long_string_UL
4689           = generate_compare_long_string_different_encoding(false);
4690   }
4691 
4692   // R0 = result
4693   // R1 = str2
4694   // R2 = cnt1
4695   // R3 = str1
4696   // R4 = cnt2


< prev index next >