4009 __ generate_dsin_dcos(isCos, (address)StubRoutines::aarch64::_npio2_hw, 4010 (address)StubRoutines::aarch64::_two_over_pi, 4011 (address)StubRoutines::aarch64::_pio2, 4012 (address)StubRoutines::aarch64::_dsin_coef, 4013 (address)StubRoutines::aarch64::_dcos_coef); 4014 return start; 4015 } 4016 4017 address generate_dlog() { 4018 __ align(CodeEntryAlignment); 4019 StubCodeMark mark(this, "StubRoutines", "dlog"); 4020 address entry = __ pc(); 4021 FloatRegister vtmp0 = v0, vtmp1 = v1, vtmp2 = v2, vtmp3 = v3, vtmp4 = v4, 4022 vtmp5 = v5, tmpC1 = v16, tmpC2 = v17, tmpC3 = v18, tmpC4 = v19; 4023 Register tmp1 = r0, tmp2 = r1, tmp3 = r2, tmp4 = r3, tmp5 = r4; 4024 __ fast_log(vtmp0, vtmp1, vtmp2, vtmp3, vtmp4, vtmp5, tmpC1, tmpC2, tmpC3, 4025 tmpC4, tmp1, tmp2, tmp3, tmp4, tmp5); 4026 return entry; 4027 } 4028 4029 // code for comparing 16 bytes of strings with same encoding 4030 void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { 4031 Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11; 4032 __ ldr(rscratch1, Address(__ post(str1, 8))); 4033 __ eor(rscratch2, tmp1, tmp2); 4034 __ ldr(cnt1, Address(__ post(str2, 8))); 4035 __ cbnz(rscratch2, DIFF1); 4036 __ ldr(tmp1, Address(__ post(str1, 8))); 4037 __ eor(rscratch2, rscratch1, cnt1); 4038 __ ldr(tmp2, Address(__ post(str2, 8))); 4039 __ cbnz(rscratch2, DIFF2); 4040 } 4041 4042 // code for comparing 16 characters of strings with Latin1 and Utf16 encoding 4043 void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1, 4044 Label &DIFF2) { 4045 Register cnt1 = r2, tmp1 = r10, tmp2 = r11, tmp3 = r12; 4046 FloatRegister vtmp = v1, vtmpZ = v0, vtmp3 = v2; 4047 4048 __ ldrq(vtmp, Address(__ post(tmp2, 16))); 4049 __ ldr(tmpU, Address(__ post(cnt1, 8))); 4050 __ zip1(vtmp3, __ T16B, vtmp, vtmpZ); 4051 // now we have 32 bytes of characters (converted to U) in vtmp:vtmp3 4052 4053 __ fmovd(tmpL, vtmp3); 4054 __ eor(rscratch2, tmp3, tmpL); 4055 __ cbnz(rscratch2, DIFF2); 4056 4057 __ ldr(tmp3, Address(__ post(cnt1, 8))); 4058 __ umov(tmpL, vtmp3, __ D, 1); 4059 __ eor(rscratch2, tmpU, tmpL); 4060 __ cbnz(rscratch2, DIFF1); 4061 4062 __ zip2(vtmp, __ T16B, vtmp, vtmpZ); 4063 __ ldr(tmpU, Address(__ post(cnt1, 8))); 4064 __ fmovd(tmpL, vtmp); 4065 __ eor(rscratch2, tmp3, tmpL); 4066 __ cbnz(rscratch2, DIFF2); 4067 4068 __ ldr(tmp3, Address(__ post(cnt1, 8))); 4069 __ umov(tmpL, vtmp, __ D, 1); 4070 __ eor(rscratch2, tmpU, tmpL); 4071 __ cbnz(rscratch2, DIFF1); 4072 } 4073 4074 // r0 = result 4075 // r1 = str1 4076 // r2 = cnt1 4077 // r3 = str2 4078 // r4 = cnt2 4079 // r10 = tmp1 4080 // r11 = tmp2 4081 address generate_compare_long_string_different_encoding(bool isLU) { 4082 __ align(CodeEntryAlignment); 4083 StubCodeMark mark(this, "StubRoutines", isLU 4084 ? "compare_long_string_different_encoding LU" 4085 : "compare_long_string_different_encoding UL"); 4086 address entry = __ pc(); 4087 Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2, 4088 DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, NO_PREFETCH, 4089 LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2; 4090 Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4, 4091 tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14; 4092 FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2; 4093 RegSet spilled_regs = RegSet::of(tmp3, tmp4); 4094 4095 int prefetchLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance/2); 4096 4097 __ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ); 4098 // cnt2 == amount of characters left to compare 4099 // Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL)) 4100 __ zip1(vtmp, __ T8B, vtmp, vtmpZ); 4101 __ add(str1, str1, isLU ? wordSize/2 : wordSize); 4102 __ add(str2, str2, isLU ? wordSize : wordSize/2); 4103 __ fmovd(isLU ? tmp1 : tmp2, vtmp); 4104 __ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case. 4105 __ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1); 4106 __ eor(rscratch2, tmp1, tmp2); 4107 __ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0); 4108 __ mov(rscratch1, tmp2); 4109 __ cbnz(rscratch2, CALCULATE_DIFFERENCE); 4110 Register strU = isLU ? str2 : str1, 4111 strL = isLU ? str1 : str2, 4112 tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison 4113 tmpL = isLU ? tmp1 : rscratch1; // where to keep L for comparison 4114 __ push(spilled_regs, sp); 4115 __ sub(tmp2, strL, cnt2); // strL pointer to load from 4116 __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from 4117 4118 __ ldr(tmp3, Address(__ post(cnt1, 8))); 4119 4120 if (SoftwarePrefetchHintDistance >= 0) { 4121 __ subs(rscratch2, cnt2, prefetchLoopExitCondition); 4122 __ br(__ LT, NO_PREFETCH); 4123 __ bind(LARGE_LOOP_PREFETCH); 4124 __ prfm(Address(tmp2, SoftwarePrefetchHintDistance)); 4125 __ mov(tmp4, 2); 4126 __ prfm(Address(cnt1, SoftwarePrefetchHintDistance)); 4127 __ bind(LARGE_LOOP_PREFETCH_REPEAT1); 4128 compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); 4129 __ subs(tmp4, tmp4, 1); 4130 __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1); 4131 __ prfm(Address(cnt1, SoftwarePrefetchHintDistance)); 4132 __ mov(tmp4, 2); 4133 __ bind(LARGE_LOOP_PREFETCH_REPEAT2); 4134 compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); 4135 __ subs(tmp4, tmp4, 1); 4136 __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2); 4137 __ sub(cnt2, cnt2, 64); 4138 __ subs(rscratch2, cnt2, prefetchLoopExitCondition); 4139 __ br(__ GE, LARGE_LOOP_PREFETCH); 4140 } 4141 __ cbz(cnt2, LOAD_LAST); // no characters left except last load 4142 __ bind(NO_PREFETCH); 4143 __ subs(cnt2, cnt2, 16); 4144 __ br(__ LT, TAIL); 4145 __ bind(SMALL_LOOP); // smaller loop 4146 __ subs(cnt2, cnt2, 16); 4147 compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); 4148 __ br(__ GE, SMALL_LOOP); 4149 __ cmn(cnt2, (u1)16); 4150 __ br(__ EQ, LOAD_LAST); 4151 __ bind(TAIL); // 1..15 characters left until last load (last 4 characters) 4152 __ add(cnt1, cnt1, cnt2, __ LSL, 1); // Address of 8 bytes before last 4 characters in UTF-16 string 4153 __ add(tmp2, tmp2, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string 4154 __ ldr(tmp3, Address(cnt1, -8)); 4155 compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // last 16 characters before last load 4156 __ b(LOAD_LAST); 4157 __ bind(DIFF2); 4158 __ mov(tmpU, tmp3); 4159 __ bind(DIFF1); 4160 __ pop(spilled_regs, sp); 4161 __ b(CALCULATE_DIFFERENCE); 4162 __ bind(LOAD_LAST); 4163 // Last 4 UTF-16 characters are already pre-loaded into tmp3 by compare_string_16_x_LU. 4164 // No need to load it again 4165 __ mov(tmpU, tmp3); 4166 __ pop(spilled_regs, sp); 4167 4168 __ ldrs(vtmp, Address(strL)); 4169 __ zip1(vtmp, __ T8B, vtmp, vtmpZ); 4170 __ fmovd(tmpL, vtmp); 4171 4172 __ eor(rscratch2, tmpU, tmpL); 4173 __ cbz(rscratch2, DONE); 4174 4175 // Find the first different characters in the longwords and 4176 // compute their difference. 4177 __ bind(CALCULATE_DIFFERENCE); 4178 __ rev(rscratch2, rscratch2); 4179 __ clz(rscratch2, rscratch2); 4180 __ andr(rscratch2, rscratch2, -16); 4181 __ lsrv(tmp1, tmp1, rscratch2); 4182 __ uxthw(tmp1, tmp1); 4183 __ lsrv(rscratch1, rscratch1, rscratch2); 4184 __ uxthw(rscratch1, rscratch1); 4185 __ subw(result, tmp1, rscratch1); 4186 __ bind(DONE); 4187 __ ret(lr); 4188 return entry; 4189 } 4190 4191 // r0 = result 4192 // r1 = str1 4193 // r2 = cnt1 4194 // r3 = str2 4195 // r4 = cnt2 4196 // r10 = tmp1 4197 // r11 = tmp2 4198 address generate_compare_long_string_same_encoding(bool isLL) { 4199 __ align(CodeEntryAlignment); 4200 StubCodeMark mark(this, "StubRoutines", isLL 4201 ? "compare_long_string_same_encoding LL" 4202 : "compare_long_string_same_encoding UU"); 4203 address entry = __ pc(); 4204 Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4, 4205 tmp1 = r10, tmp2 = r11; 4206 Label SMALL_LOOP, LARGE_LOOP_PREFETCH, CHECK_LAST, DIFF2, TAIL, 4207 LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF, 4208 DIFF_LAST_POSITION, DIFF_LAST_POSITION2; 4209 // exit from large loop when less than 64 bytes left to read or we're about 4210 // to prefetch memory behind array border 4211 int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2); 4212 // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used 4213 // update cnt2 counter with already loaded 8 bytes 4214 __ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2)); 4215 // update pointers, because of previous read 4216 __ add(str1, str1, wordSize); 4217 __ add(str2, str2, wordSize); 4218 if (SoftwarePrefetchHintDistance >= 0) { 4219 __ bind(LARGE_LOOP_PREFETCH); 4220 __ prfm(Address(str1, SoftwarePrefetchHintDistance)); 4221 __ prfm(Address(str2, SoftwarePrefetchHintDistance)); 4222 compare_string_16_bytes_same(DIFF, DIFF2); 4223 compare_string_16_bytes_same(DIFF, DIFF2); 4224 __ sub(cnt2, cnt2, isLL ? 64 : 32); 4225 compare_string_16_bytes_same(DIFF, DIFF2); 4226 __ subs(rscratch2, cnt2, largeLoopExitCondition); 4227 compare_string_16_bytes_same(DIFF, DIFF2); 4228 __ br(__ GT, LARGE_LOOP_PREFETCH); 4229 __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left? 4230 } 4231 // less than 16 bytes left? 4232 __ subs(cnt2, cnt2, isLL ? 16 : 8); 4233 __ br(__ LT, TAIL); 4234 __ bind(SMALL_LOOP); 4235 compare_string_16_bytes_same(DIFF, DIFF2); 4236 __ subs(cnt2, cnt2, isLL ? 16 : 8); 4237 __ br(__ GE, SMALL_LOOP); 4238 __ bind(TAIL); 4239 __ adds(cnt2, cnt2, isLL ? 16 : 8); 4240 __ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF); 4241 __ subs(cnt2, cnt2, isLL ? 8 : 4); 4242 __ br(__ LE, CHECK_LAST); 4243 __ eor(rscratch2, tmp1, tmp2); 4244 __ cbnz(rscratch2, DIFF); 4245 __ ldr(tmp1, Address(__ post(str1, 8))); 4246 __ ldr(tmp2, Address(__ post(str2, 8))); 4247 __ sub(cnt2, cnt2, isLL ? 8 : 4); 4248 __ bind(CHECK_LAST); 4249 if (!isLL) { 4250 __ add(cnt2, cnt2, cnt2); // now in bytes 4251 } 4252 __ eor(rscratch2, tmp1, tmp2); 4253 __ cbnz(rscratch2, DIFF); 4254 __ ldr(rscratch1, Address(str1, cnt2)); 4255 __ ldr(cnt1, Address(str2, cnt2)); 4256 __ eor(rscratch2, rscratch1, cnt1); 4257 __ cbz(rscratch2, LENGTH_DIFF); 4258 // Find the first different characters in the longwords and 4259 // compute their difference. 4260 __ bind(DIFF2); 4261 __ rev(rscratch2, rscratch2); 4262 __ clz(rscratch2, rscratch2); 4263 __ andr(rscratch2, rscratch2, isLL ? -8 : -16); 4264 __ lsrv(rscratch1, rscratch1, rscratch2); 4265 if (isLL) { 4266 __ lsrv(cnt1, cnt1, rscratch2); 4267 __ uxtbw(rscratch1, rscratch1); 4268 __ uxtbw(cnt1, cnt1); 4269 } else { 4270 __ lsrv(cnt1, cnt1, rscratch2); 4271 __ uxthw(rscratch1, rscratch1); 4272 __ uxthw(cnt1, cnt1); 4273 } 4274 __ subw(result, rscratch1, cnt1); 4275 __ b(LENGTH_DIFF); 4276 __ bind(DIFF); 4277 __ rev(rscratch2, rscratch2); 4278 __ clz(rscratch2, rscratch2); 4279 __ andr(rscratch2, rscratch2, isLL ? -8 : -16); 4280 __ lsrv(tmp1, tmp1, rscratch2); 4281 if (isLL) { 4282 __ lsrv(tmp2, tmp2, rscratch2); 4283 __ uxtbw(tmp1, tmp1); 4284 __ uxtbw(tmp2, tmp2); 4285 } else { 4286 __ lsrv(tmp2, tmp2, rscratch2); 4287 __ uxthw(tmp1, tmp1); 4288 __ uxthw(tmp2, tmp2); 4289 } 4290 __ subw(result, tmp1, tmp2); 4291 __ b(LENGTH_DIFF); 4292 __ bind(LAST_CHECK_AND_LENGTH_DIFF); 4293 __ eor(rscratch2, tmp1, tmp2); 4294 __ cbnz(rscratch2, DIFF); 4295 __ bind(LENGTH_DIFF); 4296 __ ret(lr); 4297 return entry; 4298 } 4299 4300 void generate_compare_long_strings() { 4301 StubRoutines::aarch64::_compare_long_string_LL 4302 = generate_compare_long_string_same_encoding(true); 4303 StubRoutines::aarch64::_compare_long_string_UU 4304 = generate_compare_long_string_same_encoding(false); 4305 StubRoutines::aarch64::_compare_long_string_LU 4306 = generate_compare_long_string_different_encoding(true); 4307 StubRoutines::aarch64::_compare_long_string_UL 4308 = generate_compare_long_string_different_encoding(false); 4309 } 4310 4311 // R0 = result 4312 // R1 = str2 4313 // R2 = cnt1 4314 // R3 = str1 4315 // R4 = cnt2 | 4009 __ generate_dsin_dcos(isCos, (address)StubRoutines::aarch64::_npio2_hw, 4010 (address)StubRoutines::aarch64::_two_over_pi, 4011 (address)StubRoutines::aarch64::_pio2, 4012 (address)StubRoutines::aarch64::_dsin_coef, 4013 (address)StubRoutines::aarch64::_dcos_coef); 4014 return start; 4015 } 4016 4017 address generate_dlog() { 4018 __ align(CodeEntryAlignment); 4019 StubCodeMark mark(this, "StubRoutines", "dlog"); 4020 address entry = __ pc(); 4021 FloatRegister vtmp0 = v0, vtmp1 = v1, vtmp2 = v2, vtmp3 = v3, vtmp4 = v4, 4022 vtmp5 = v5, tmpC1 = v16, tmpC2 = v17, tmpC3 = v18, tmpC4 = v19; 4023 Register tmp1 = r0, tmp2 = r1, tmp3 = r2, tmp4 = r3, tmp5 = r4; 4024 __ fast_log(vtmp0, vtmp1, vtmp2, vtmp3, vtmp4, vtmp5, tmpC1, tmpC2, tmpC3, 4025 tmpC4, tmp1, tmp2, tmp3, tmp4, tmp5); 4026 return entry; 4027 } 4028 4029 // Summary: part of string compareTo implementation. Called for code generation in multiple points. 4030 // 1) load 8 bytes and advance pointers of both strings and compare 4031 // previously loaded 8 bytes. jump to DIFF1 if different characters found 4032 // 2) load 8 bytes and advance pointers of both strings and compare 4033 // previously loaded 8 bytes. jump to DIFF2 if different characters found 4034 // 4035 // Input: 4036 // str1 (r1): pointer for next load from 1st string 4037 // cnt1 (r2): register to use for loading data from 2nd string 4038 // str2 (r3): pointer for next load from 2nd string 4039 // tmp1 (r10): already loaded 8 bytes of 1st string. 4040 // tmp2 (r11): already loaded 8 bytes of 2nd string. 4041 // 4042 // Output: 4043 // rscratch2: result of last comparison 4044 // tmp1, tmp2: contains different parts of 1st and 2nd strings if exit via DIFF1 label. Not used for normal and DIFF2 exits 4045 // rscratch1, cnt1: contains different parts of 1st and 2nd strings if exit via DIFF2 label. Not used for normal and DIFF1 exits 4046 // 4047 void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { 4048 Register str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11; 4049 __ ldr(rscratch1, Address(__ post(str1, 8))); 4050 __ eor(rscratch2, tmp1, tmp2); 4051 __ ldr(cnt1, Address(__ post(str2, 8))); 4052 __ cbnz(rscratch2, DIFF1); 4053 __ ldr(tmp1, Address(__ post(str1, 8))); 4054 __ eor(rscratch2, rscratch1, cnt1); 4055 __ ldr(tmp2, Address(__ post(str2, 8))); 4056 __ cbnz(rscratch2, DIFF2); 4057 } 4058 4059 // Summary: part of string compare implementation. Called for code generation in multiple points. 4060 // - expecting 4 UTF-16 string characters preloaded into tmp3 4061 // - load 16 characters from each string 4062 // - convert Latin1 characters to UTF-16 4063 // - compare preloaded 4 characters with 4 first converted Latin1 characters 4064 // - compare next 12 loaded and converted characters from each string 4065 // - compared data is in tmpU and tmpL registers or in tmp3 and tmpL 4066 // - in case different characters are found while comparing tmpU and 4067 // tmpL, jumps to DIFF1. Jumps to DIFF2 in case different character 4068 // was found while comparing tmp3 and tmpL 4069 // - string pointers are increased by amount of loaded bytes 4070 // 4071 // Input: 4072 // strUnext (r2): pointer for next load from UTF-16 string 4073 // strLnext (r11): pointer for next load from Lating1 string 4074 // tmp3 (r12): used to store parts of UTF-16 string 4075 // vmptZ (v0): zeroed register for conversion from Latin1 to UTF-16 4076 // 4077 // Temporary registers: 4078 // vtmp (v1): used to load 16 Latin1 characters and part of converted Latin1 string 4079 // vtmp3 (v2): used for part of converted Latin1 string 4080 // 4081 // Output: rscratch2: result of last comparison 4082 // tmpL: last compared part of converted Latin1 string 4083 // tmpU: in case of exit via DIFF1 or normal exit: contains last compared part of UTF-16 string. 4084 // Contains part of UTF-16 string compared before last comparison otherwise. 4085 // tmp3: in case of exit via DIFF2: contains last compared part of UTF-16 string. 4086 // in case of exit via DIFF1: contains part of UTF-16 string compared before last comparison otherwise. 4087 // in case of normal exit: contains preloaded 8 bytes of UTF-16 string for next comparisons 4088 // 4089 // Parameters: 4090 // tmpL: holds parts of converted Latin1 string 4091 // tmpU: holds parts of UTF-16 string 4092 // DIFF1: label to jump to if different characters are found in tmpU and tmpL 4093 // DIFF2: label to jump to if different characters are found in tmp3 and tmpL 4094 // 4095 void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1, 4096 Label &DIFF2) { 4097 Register strUnext = r2, tmp1 = r10, strLnext = r11, tmp3 = r12; 4098 FloatRegister vtmp = v1, vtmpZ = v0, vtmp3 = v2; 4099 4100 __ ldrq(vtmp, Address(__ post(strLnext, 16))); 4101 __ ldr(tmpU, Address(__ post(strUnext, 8))); 4102 __ zip1(vtmp3, __ T16B, vtmp, vtmpZ); 4103 // now we have 32 bytes of characters (converted to U) in vtmp:vtmp3 4104 4105 __ fmovd(tmpL, vtmp3); 4106 __ eor(rscratch2, tmp3, tmpL); 4107 __ cbnz(rscratch2, DIFF2); 4108 4109 __ ldr(tmp3, Address(__ post(strUnext, 8))); 4110 __ umov(tmpL, vtmp3, __ D, 1); 4111 __ eor(rscratch2, tmpU, tmpL); 4112 __ cbnz(rscratch2, DIFF1); 4113 4114 __ zip2(vtmp, __ T16B, vtmp, vtmpZ); 4115 __ ldr(tmpU, Address(__ post(strUnext, 8))); 4116 __ fmovd(tmpL, vtmp); 4117 __ eor(rscratch2, tmp3, tmpL); 4118 __ cbnz(rscratch2, DIFF2); 4119 4120 __ ldr(tmp3, Address(__ post(strUnext, 8))); 4121 __ umov(tmpL, vtmp, __ D, 1); 4122 __ eor(rscratch2, tmpU, tmpL); 4123 __ cbnz(rscratch2, DIFF1); 4124 } 4125 4126 // Summary: Compare long strings intrinsic implementation for different encodings. 4127 // Comparison is performed in lexical order. 4128 // 4129 // Prerequisites: string length >= 72 characters 4130 // 4131 // Input: result (r0): length difference 4132 // str1 (r1): pointer to 1st string 4133 // str2 (r2): pointer to 2nd string 4134 // cnt1 (r3): number of characters in 1st string 4135 // cnt2 (r4): minimum of str1 and str2 length. Used as counter 4136 // tmp1 (r10): starting 8 bytes of 1st string for UTF-16 string 4137 // tmp2 (r11): starting 8 bytes of 2nd string for UTF-16 string 4138 // vtmpZ (v0): used to convert encodings by providing zero values 4139 // vtmp (v1): starting bytes of Latin1 string. Also used as temporary register 4140 // vtmp3 (v2): temporary register 4141 // 4142 // Temporary registers: 4143 // rscratch1, rscratch2: clobbered on exit 4144 // preloadedChunk (r12), smallLoopCounter (r14): pushed on stack, then restored on exit 4145 // 4146 // 4147 // Output: result - return 0 if strings are equal. Returns positive value 4148 // if 1st string > 2nd string in lexical order. Return 4149 // negative value if 1st string < 2nd string. 4150 // 4151 // Side effects: str1, str2, cnt1, cnt2, tmp1, tmp2, rscratch1, rscratch2: clobbered. 4152 // 4153 // Algorithm parameters: 4154 // isLU: true if 1st string is Latin1. 4155 // 4156 // Calculated constants: 4157 // largeLoopExitCondition: Exit condition for loop with prefetch. 4158 // 4159 // 4160 // PSEUDO CODE: 4161 // // Code below uses <compare_string_16_x_LU> code block which: 4162 // // - loads 16 Latin1 characters at once. Then converts it to UTF-16 and move to GPR 4163 // // - issues 4 smaller loads of 4 UTF-16 characters and for each load compare it with converted Latin1 characters 4164 // // - smaller loads are using 2 different registers to break register dependencies 4165 // // - jump to DIFF or DIFF2 label depending on which register has a character different from converted Latin1 character 4166 // 4167 // <push preloadedChunk and smallLoopCounter on stack>; 4168 // <convert already loaded Latin1 characters to UTF-16 and compare it>; 4169 // <advance string pointers by the number of loaded byte>; 4170 // <calculate strUnext and strLnext == pointers to load next chunks from UTF-16 and Latin1 strings>; 4171 // <preload first 4 UTF characters>; 4172 // cnt2 = cnt2 - 4; // keep characters counter reduced by 4, because last 4 characters are compared separately 4173 // if (SoftwarePrefetchHintDistance >= 0) { // need prefetch 4174 // if (cnt2 < largeLoopExitConditioni) goto NO_PREFETCH; // don't use loop with prefetch in case prefetch distance is too far away 4175 // do { // 64-characters loop with prefetch. 4176 // // Each iteration has 2 prefetch instructions for UTF-16 string and 1 for Latin1 string 4177 // // contains 2-iterations loops (16 characters each) between prefetch instructions 4178 // // to avoid huge code generation 4179 // <prefetch strLnext at SoftwarePrefetchHintDistance>; 4180 // <prefetch strUnext at SoftwarePrefetchHintDistance>; 4181 // for (smallLoopCounter = 0; smallLoopCounter < 2; smallLoopCounter++) { 4182 // <compare_string_16_x_LU>; 4183 // } 4184 // <prefetch strUnext at SoftwarePrefetchHintDistance>; 4185 // for (smallLoopCounter = 0; smallLoopCounter < 2; smallLoopCounter++) { 4186 // <compare_string_16_x_LU>; 4187 // } 4188 // cnt2 = cnt2 - 64; // update counter by the number of loaded characters 4189 // } while(cnt2 >= largeLoopExitCondition); 4190 // } 4191 // if (cnt2 == 0) goto LOAD_LAST; // load and compare last 4 characters 4192 // NO_PREFETCH: 4193 // if (<less than 16 characters left to load) goto TAIL; 4194 // 4195 // // smaller by-16-characters loop 4196 // do { 4197 // cnt2 = cnt2 - 16; 4198 // <compare_string_16_x_LU>; 4199 // } while(<has at least 16 characters to load>); 4200 // 4201 // if (cnt2 == 0) goto LOAD_LAST; 4202 // TAIL: 4203 // <adjust pointers to point to 16 characters before last load>; 4204 // <preload first 4 UTF characters> 4205 // <load and compare 16 characters before last load>; 4206 // goto LOAD_LAST; 4207 // DIFF1: 4208 // <move utf string data to same comparison register as for DIFF2>; 4209 // // fallthrough 4210 // DIFF2: 4211 // <pop preloadedChunk and smallLoopCounter from stack>; 4212 // goto CALCULATE_DIFFERENCE; 4213 // LOAD_LAST: 4214 // <pop preloadedChunk and smallLoopCounter from stack>; 4215 // <load and compare last 4 characters>; 4216 // if (<difference not found>) return; // result = already calculated length difference 4217 // CALCULATE_DIFFERENCE: 4218 // result = <calculate character difference>; 4219 // DONE: 4220 // return; 4221 // 4222 // 4223 // 4224 // 4225 // 4226 // DETAILED CODE: 4227 // vtmpZ = 0; // used to convert encodings 4228 // vtmp = CONVERT_LATIN1_TO_UTF16(vtmp, vtmpZ); // implemented as zip1 instruction 4229 // 4230 // // update string pointers by the number of loaded bytes 4231 // str1 = str1 + (isLU ? wordSize/2 : wordSize); 4232 // str2 = str2 + (isLU ? wordSize : wordSize/2); 4233 // 4234 // // copy converted string into GPR 4235 // if (isLU) tmp1 = vtmp; 4236 // else tmp2 = vtmp; 4237 // 4238 // cnt2 = cnt2 - 8; // reduce cnt2 by the number of already loaded characters. And reduce by 4 more characters 4239 // str1 = str1 + cnt2 << (isLU ? 0 : 1); // address of 1st string last 4 characters 4240 // rscratch1 = BIT_XOR(tmp1, tmp2); // begin loaded chunks comparison 4241 // str2 = str2 + cnt2 << (isLU ? 1 : 0); // address of 2nd string last 4 characters 4242 // rscratch1 = tmp2; // copy 2nd string chunk 4243 // if (rscratch2 != 0) CALCULATE_DIFFERENCE; // found different character 4244 // 4245 // // several redefinitions below to have meaningful names 4246 // void* strU = isLU ? str2 : str1; // UTF-16 string pointer to last 4 characters 4247 // void* strL = isLU ? str1 : str2; // Latin1 string pointer to last 4 characters 4248 // long tmpU = isLU ? rscratch1 : tmp1; // UTF-16 characters holder 4249 // long tmpL = isLU ? tmp1 : rscratch1; // Latin1 characters holder 4250 // void* strLnext = tmp2; // Latin1 string pointer to load next character(s) 4251 // void* strUnext = cnt1; // UTF-16 string pointer to load next character(s) 4252 // 4253 // PUSH_ON_STACK(preloadedChunk, smallLoopCounter); 4254 // strLnext = strL - cnt2; // initialize pointer to Latin1 string next load 4255 // strUnext = strU - cnt2 << 1; // initialize pointer to UTF-16 string next load 4256 // 4257 // preloadedChunk = LOAD8BYTES(strUnext, 8); // pre-load next 8 bytes of UTF-16 string 4258 // strUnext = strUnext + 8; // merged with load above as post-increment 4259 // 4260 // if (SoftwarePrefetchHintDistance >= 0) { 4261 // rscratch2 = cnt2 - prefetchLoopExitCondition; 4262 // if (rscratch2 < 0) goto NO_PREFETCH; 4263 // LARGE_LOOP_PREFETCH: // 64-characters loop 4264 // PREFETCH(strLnext, SoftwarePrefetchHintDistance); 4265 // smallLoopCounter = 2; // initialize inner loop counter 4266 // PREFETCH(strUnext, SoftwarePrefetchHintDistance); 4267 // LARGE_LOOP_PREFETCH_REPEAT1: { // 16 characters inner loop with 2 iterations 4268 // compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // see compare_string_16_x_LU comments 4269 // smallLoopCounter--; 4270 // bool smallLoopRepeat = (smallLoopCounter > 0); // kept in flags 4271 // if (smallLoopRepeat) goto LARGE_LOOP_PREFETCH_REPEAT1; 4272 // } 4273 // PREFETCH(strUnext, SoftwarePrefetchHintDistance); 4274 // smallLoopCounter = 2; // initialize inner loop counter 4275 // LARGE_LOOP_PREFETCH_REPEAT2: { // one more 16 characters inner loop with 2 iterations 4276 // compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // see compare_string_16_x_LU comments 4277 // smallLoopCounter--; 4278 // bool smallLoopRepeat = (smallLoopCounter > 0); // kept in flags 4279 // if (smallLoopRepeat) goto LARGE_LOOP_PREFETCH_REPEAT2; 4280 // } 4281 // cnt2 = cnt2 - 64; 4282 // rscratch2 = cnt2 - prefetchLoopExitCondition; 4283 // if (rscratch2 >= 0) goto LARGE_LOOP_PREFETCH; 4284 // } // end of 64-characters loop 4285 // 4286 // if (cnt2 == 0) goto LOAD_LAST; // no more characters left except last 4 characters reserved earlier 4287 // NO_PREFETCH: // all further loads doesn't require prefetch instruction 4288 // cnt2 = cnt2 - 16; // keep cnt2 counter reduced by 16 4289 // if (cnt2 < 0) goto TAIL; // less than 16 characters left to load until last 4 reserved characters 4290 // SMALL_LOOP: // 16-characters loop 4291 // compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); 4292 // cnt2 = cnt2 - 16; // decrement counter by previously loaded 16 characters 4293 // bool repeatSmallLoop = cnt2 >= 0; // kept in flags 4294 // if (repeatSmallLoop) goto SMALL_LOOP; 4295 // if (cnt2 == -16) goto LOAD_LAST; 4296 // TAIL: 4297 // strUnext = strUnext + cnt2 << 1; // pointer to UTF-16 last 16 characters + 8 bytes 4298 // strLnext = strLnext + cnt2; // pointer to Latin1 last 16 characters 4299 // preloadedChunk = LOAD8BYTES(strUnext, -8); 4300 // compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); 4301 // goto LOAD_LAST; 4302 // DIFF2: 4303 // tmpU = preloadedChunk; 4304 // DIFF1: 4305 // pop(preloadedChunk, smallLoopCounter); 4306 // goto CALCULATE_DIFFERENCE; 4307 // LOAD_LAST: 4308 // tmpU = preloadedChunk; // already loaded last 4 UTF-16 characters. Just copy to required register 4309 // pop(preloadedChunk, smallLoopCounter); 4310 // vtmp = LOAD4BYTES(strL); 4311 // vtmp = CONVERT_LATIN1_TO_UTF16(vtmp, vtmpZ); 4312 // tmpL = vtmp; 4313 // rscratch2 = BIT_XOR(tmpU, tmpL); 4314 // if (rscratch2 == 0) goto DONE; 4315 // CALCULATE_DIFFERENCE: 4316 // // No count trailing zeroes instruction is available. Reverse bits and count leading zeroes instead. 4317 // rscratch2 = REVERSE_BITS(rscratch2); 4318 // rscratch2 = COUNT_LEADING_ZEROES(rscratch2); 4319 // rscratch2 = rscratch2 & -16; // clear lowest 4 bits to have number of bits until different character 4320 // tmp1 = tmp1 >> rscratch2; // shift off same symbols from 1st string data 4321 // tmp1 = UNSIGNED_EXTEND_SHORT2INT(tmp1); // only first different symbol remains in 1st string data 4322 // rscratch1 = rscratch1 >> rscratch2; // shift off same symbols from 2nd string data 4323 // rscratch1 = UNSIGNED_EXTEND_SHORT2INT(rscratch1); // only first different symbol remains in 2nd string data 4324 // result = tmp1 - rscratch1; // character difference 4325 // DONE: 4326 // return; 4327 address generate_compare_long_string_different_encoding(bool isLU) { 4328 __ align(CodeEntryAlignment); 4329 StubCodeMark mark(this, "StubRoutines", isLU 4330 ? "compare_long_string_different_encoding LU" 4331 : "compare_long_string_different_encoding UL"); 4332 address entry = __ pc(); 4333 Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2, 4334 DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, NO_PREFETCH, 4335 LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2; 4336 Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4, 4337 tmp1 = r10, tmp2 = r11, preloadedChunk = r12, smallLoopCounter = r14; 4338 FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2; 4339 RegSet spilled_regs = RegSet::of(preloadedChunk, smallLoopCounter); 4340 4341 int prefetchLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance/2); 4342 4343 __ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ); 4344 // cnt2 == amount of characters left to compare 4345 // Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL)) 4346 __ zip1(vtmp, __ T8B, vtmp, vtmpZ); 4347 __ add(str1, str1, isLU ? wordSize/2 : wordSize); 4348 __ add(str2, str2, isLU ? wordSize : wordSize/2); 4349 __ fmovd(isLU ? tmp1 : tmp2, vtmp); 4350 __ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case. 4351 __ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1); 4352 __ eor(rscratch2, tmp1, tmp2); 4353 __ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0); 4354 __ mov(rscratch1, tmp2); 4355 __ cbnz(rscratch2, CALCULATE_DIFFERENCE); 4356 Register strU = isLU ? str2 : str1, 4357 strL = isLU ? str1 : str2, 4358 tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison 4359 tmpL = isLU ? tmp1 : rscratch1, // where to keep L for comparison 4360 strLnext = tmp2, 4361 strUnext = cnt1; 4362 __ push(spilled_regs, sp); 4363 __ sub(strLnext, strL, cnt2); // strL pointer to load from 4364 __ sub(strUnext, strU, cnt2, __ LSL, 1); // strU pointer to load from 4365 4366 // safe to read ahead 4 characters, because string length >= 72 characters 4367 __ ldr(preloadedChunk, Address(__ post(strUnext, 8))); 4368 4369 if (SoftwarePrefetchHintDistance >= 0) { 4370 __ subs(rscratch2, cnt2, prefetchLoopExitCondition); 4371 __ br(__ LT, NO_PREFETCH); 4372 __ bind(LARGE_LOOP_PREFETCH); 4373 __ prfm(Address(strLnext, SoftwarePrefetchHintDistance)); 4374 __ mov(smallLoopCounter, 2); 4375 __ prfm(Address(strUnext, SoftwarePrefetchHintDistance)); 4376 __ bind(LARGE_LOOP_PREFETCH_REPEAT1); 4377 compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); 4378 __ subs(smallLoopCounter, smallLoopCounter, 1); 4379 __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1); 4380 __ prfm(Address(strUnext, SoftwarePrefetchHintDistance)); 4381 __ mov(smallLoopCounter, 2); 4382 __ bind(LARGE_LOOP_PREFETCH_REPEAT2); 4383 compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); 4384 __ subs(smallLoopCounter, smallLoopCounter, 1); 4385 __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2); 4386 __ sub(cnt2, cnt2, 64); 4387 __ subs(rscratch2, cnt2, prefetchLoopExitCondition); 4388 __ br(__ GE, LARGE_LOOP_PREFETCH); 4389 } 4390 __ cbz(cnt2, LOAD_LAST); // no characters left except last load 4391 __ bind(NO_PREFETCH); 4392 // Load and compare cnt2 characters using 16 characters loop with 4393 // compare_string_16_x_LU primitive. In case 1..15 characters left: 4394 // use same compare_string_16_x_LU primitive with partial overlapping 4395 __ subs(cnt2, cnt2, 16); 4396 __ br(__ LT, TAIL); 4397 __ bind(SMALL_LOOP); // smaller loop 4398 __ subs(cnt2, cnt2, 16); 4399 compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); 4400 __ br(__ GE, SMALL_LOOP); 4401 __ cmn(cnt2, (u1)16); 4402 __ br(__ EQ, LOAD_LAST); 4403 __ bind(TAIL); // 1..15 characters left until last load (last 4 characters) 4404 __ add(strUnext, strUnext, cnt2, __ LSL, 1); // Address of 8 bytes before last 4 characters in UTF-16 string 4405 __ add(strLnext, strLnext, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string 4406 __ ldr(preloadedChunk, Address(strUnext, -8)); 4407 compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // last 16 characters before last load 4408 __ b(LOAD_LAST); 4409 __ bind(DIFF2); 4410 __ mov(tmpU, preloadedChunk); 4411 __ bind(DIFF1); 4412 __ pop(spilled_regs, sp); 4413 __ b(CALCULATE_DIFFERENCE); 4414 __ bind(LOAD_LAST); 4415 // Last 4 UTF-16 characters are already pre-loaded into preloadedChunk by compare_string_16_x_LU. 4416 // No need to load it again 4417 __ mov(tmpU, preloadedChunk); 4418 __ pop(spilled_regs, sp); 4419 4420 __ ldrs(vtmp, Address(strL)); 4421 __ zip1(vtmp, __ T8B, vtmp, vtmpZ); 4422 __ fmovd(tmpL, vtmp); 4423 4424 __ eor(rscratch2, tmpU, tmpL); 4425 __ cbz(rscratch2, DONE); 4426 4427 // Find the first different characters in the longwords and 4428 // compute their difference. 4429 __ bind(CALCULATE_DIFFERENCE); 4430 __ rev(rscratch2, rscratch2); 4431 __ clz(rscratch2, rscratch2); 4432 __ andr(rscratch2, rscratch2, -16); 4433 __ lsrv(tmp1, tmp1, rscratch2); 4434 __ uxthw(tmp1, tmp1); 4435 __ lsrv(rscratch1, rscratch1, rscratch2); 4436 __ uxthw(rscratch1, rscratch1); 4437 __ subw(result, tmp1, rscratch1); 4438 __ bind(DONE); 4439 __ ret(lr); 4440 return entry; 4441 } 4442 4443 // Summary: Compare long strings intrinsic implementation for same encodings. 4444 // Comparison is performed in lexical order. 4445 // 4446 // Prerequisites: string length >= 72 characters 4447 // 4448 // Input: result (r0): length difference 4449 // str1 (r1): pointer to 1st string 4450 // str2 (r2): pointer to 2nd string 4451 // cnt1 (r3): amount of characters in 1st string 4452 // cnt2 (r4): minimum of str1 and str2 length. Used as counter 4453 // tmp1 (r10): starting 8 bytes of 1st string 4454 // tmp2 (r11): starting 8 bytes of 2nd string 4455 // 4456 // Temporary registers: 4457 // rscratch1, rscratch2 4458 // 4459 // 4460 // Output: result - return 0 if strings are equal. Returns positive value 4461 // if 1st string > 2nd string in lexical order. Returns 4462 // negative value if 1st string < 2nd string. 4463 // 4464 // Side effects: str1, str2, cnt1, cnt2, tmp1, tmp2, rscratch1, rscratch2: clobbered. 4465 // 4466 // Algorithm parameters: 4467 // isLL: true if both string are Latin1. false if both are UTF-16. 4468 // Used to generate code for both Latin1 - Latin1 (LL) case and 4469 // UTF-16 - UTF-16 (UU) case. 4470 // Calculated constants: 4471 // largeLoopExitCondition: MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2); 4472 // Exit condition for loop with prefetch. 4473 // characters_in_word: isLL ? 8 : 4 4474 // Number of characters fit in work (1 register) 4475 // characters_in_dword: 2 * characters_in_word 4476 // Number of characaters fit in double word (2 registers) 4477 // byte_to_char_shift: isLL ? 0 : 1 4478 // Shift value to convert between byte and character counters 4479 // 4480 // PSEUDO CODE: 4481 // // Code below uses <compare_string_16_bytes_same> code block, which: 4482 // // - compares already loaded tmp1 and tmp2. goto DIFF label if it's not equal 4483 // // - loads and compares next 8 bytes of both strings (stored in rscratch1 and cnt1). goto DIFF2 label if it's not equal 4484 // // - loads next 8 bytes of both strings into tmp1 and tmp2 4485 // // - on each load string pointers are updated to point at character after loaded block 4486 // 4487 // <update cnt2 counter and str1, str2 pointers by already loaded characters>; 4488 // if (SoftwarePrefetchHintDistance >= 0) { // need prefetch 4489 // do { // 64-byte loop with prefetch 4490 // <prefetch str1 at SoftwarePrefetchHintDistance>; 4491 // <prefetch str2 at SoftwarePrefetchHintDistance>; 4492 // <compare_string_16_bytes_same>; 4493 // <compare_string_16_bytes_same>; 4494 // cnt2 = cnt2 - (8 * characters_in_word); // update counter by amount 4495 // <compare_string_16_bytes_same>; 4496 // <compare_string_16_bytes_same>; 4497 // } while(cnt2 >= largeLoopExitCondition); 4498 // } 4499 // if (cnt2 == 0) 4500 // while(cnt2 > characters_in_dword) { 4501 // <compare_string_16_bytes_same>; 4502 // cnt2 = cnt2 - characters_in_dword; 4503 // } 4504 // <adjust pointer to last 16 bytes for each string>; 4505 // <preload 8 bytes before current pointers into tmp1 and tmp2)>; 4506 // <compare_string_16_bytes_same>; 4507 // goto LAST_CHECK; 4508 // DIFF2: 4509 // <copy string chunks to tmp1 and tmp2 for further analysis>; 4510 // // fallthrough to DIFF 4511 // DIFF: 4512 // <get different characters from tmp1 and tmp2. Then calculate characters difference> 4513 // goto DONE; 4514 // LAST_CHECK: // label to jump to when last chunk of data has to be checked. 4515 // // Return character difference if different characters are found. 4516 // // Return length difference (already calculated) otherwise. 4517 // <check last loaded chunks and return result> 4518 // DONE: 4519 // return; 4520 // 4521 // 4522 // DETAILED CODE: 4523 // // N.B.: compare_string_16_bytes_same and call below is not 4524 // // an actual call at runtime. It is called at code generation time. 4525 // 4526 // cnt2 = cnt2 - characters_in_word); 4527 // str1 = str1 + wordSize; 4528 // str2 = str2 + wordSize; 4529 // if (SoftwarePrefetchHintDistance >= 0) { 4530 // LARGE_LOOP_PREFETCH: 4531 // LOAD_PREFETCH(str1, SoftwarePrefetchHintDistance); 4532 // LOAD_PREFETCH(str2, SoftwarePrefetchHintDistance); 4533 // compare_string_16_bytes_same(DIFF, DIFF2); 4534 // compare_string_16_bytes_same(DIFF, DIFF2); 4535 // cnt2 = cnt2 - 8 * characters_in_word; 4536 // compare_string_16_bytes_same(DIFF, DIFF2); 4537 // rscratch2 = cnt2 - largeLoopExitCondition; // rscratch2 is not used. Use subs instead of cmp in case of potentially large constants 4538 // bool canLoop = rscratch2 > 0; // kept in flags 4539 // compare_string_16_bytes_same(DIFF, DIFF2); 4540 // if (canLoop) LARGE_LOOP_PREFETCH; 4541 // if (cnt2 == 0) goto LAST_CHECK; // no more loads left 4542 // } 4543 // 4544 // cnt2 = cnt2 - characters_in_dword; // keep cnt2 counter reduced by 16 (LL) or 8 (UU) 4545 // bool lessThan16bytesLeft = cnt2 < 0; // kept in flags 4546 // if (lessThan16bytesLeft) goto TAIL; 4547 // SMALL_LOOP: // 16 byte loop 4548 // compare_string_16_bytes_same(DIFF, DIFF2); 4549 // cnt2 = cnt2 - characters_in_dword; 4550 // bool canLoop = cnt2 >= 16; 4551 // if (canLoop) goto SMALL_LOOP; 4552 // bool lastCheckLeft = cnt2 == -characters_in_dword; 4553 // if (lastCheckLeft) goto LAST_CHECK; 4554 // TAIL: // less than 16 bytes left to load. And 8 bytes were loaded but not 4555 // // compared. Reuse <compare_string_16_bytes_same> primitive. Handle last 4556 // // 24 string bytes by preloading first 8 of these 24 bytes, then use 4557 // // <compare_string_16_bytes_same>. And then compare last 8 bytes loaded 4558 // // by <compare_string_16_bytes_same> 4559 // // This will partially overlap with previous load and comparison, but 4560 // // makes code more simple 4561 // str1 = str1 + cnt2 << byte_to_char_shift; 4562 // str2 = str2 + cnt2 << byte_to_char_shift; 4563 // tmp1 = LOAD8BYTES(str1, -8); 4564 // tmp2 = LOAD8BYTES(str2, -8); 4565 // compare_string_16_bytes_same(DIFF, DIFF2); 4566 // goto LAST_CHECK; 4567 // DIFF2: // calculate character difference, when data stored in rscratch1 and cnt1 4568 // // move loaded chunks to tmp1 and tmp2 registers to use in DIFF block 4569 // tmp1 = rscratch1; 4570 // tmp2 = cnt1; 4571 // // fallthrough to DIFF 4572 // DIFF: // calculate character difference, when data stored in tmp1 and tmp2 4573 // // and find different characters. rscratch2 contains zeroes at positions with 4574 // // same characters. Find index of first different bit (== amount of 4575 // // trailing zeroes), which is: <different character index> * <bits in character> 4576 // // + <some bits within characaters>. Then, clearing bits within character 4577 // // (3 lowest bits for Latin1 case and 4 lowest bits for UTF-16 case) 4578 // // will result in the number of bits until different character in current chunks. 4579 // 4580 // // As it's not possible to count trailing zeroes, reverse bits and count leading zeroes 4581 // rscratch2 = REVERSE_BITS(rscratch2); 4582 // rscratch2 = COUNT_LEADING_ZEROES(rscratch2); 4583 // rscratch2 = rscratch2 & (isLL ? -8 : -16); // clear lowest 3 (Latin1) or 4 (UTF-16) bits 4584 // tmp1 = tmp1 >> rscratch2; // shift off same characters from 1st string chunk 4585 // tmp2 = tmp2 >> rscratch2; // shift off same characters from 2nd string chunk 4586 // 4587 // // Only first character should be left for comparison. Use unsigned extend instruction for that 4588 // if (isLL) { 4589 // tmp1 = UNSIGNED_EXTEND_BYTE2INT(tmp1); 4590 // tmp2 = UNSIGNED_EXTEND_BYTE2INT(tmp2); 4591 // } else { 4592 // tmp1 = UNSIGNED_EXTEND_SHORT2INT(tmp1); 4593 // tmp2 = UNSIGNED_EXTEND_SHORT2INT(tmp2); 4594 // } 4595 // 4596 // result = tmp1 - tmp2; 4597 // godo DONE; 4598 // LAST_CHECK: 4599 // rscratch2 = BIT_XOR(tmp1, tmp2); 4600 // if (rscratch2 != 0) goto DIFF; 4601 // DONE: 4602 // return result; 4603 4604 address generate_compare_long_string_same_encoding(bool isLL) { 4605 const int characters_in_word = isLL ? 8 : 4; 4606 const int characters_in_dword = 2 * characters_in_word; 4607 const int byte_to_char_shift = isLL ? 0 : 1; 4608 __ align(CodeEntryAlignment); 4609 StubCodeMark mark(this, "StubRoutines", isLL 4610 ? "compare_long_string_same_encoding LL" 4611 : "compare_long_string_same_encoding UU"); 4612 address entry = __ pc(); 4613 Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4, 4614 tmp1 = r10, tmp2 = r11; 4615 Label SMALL_LOOP, LARGE_LOOP_PREFETCH, DIFF2, TAIL, DONE, DIFF, LAST_CHECK; 4616 // exit from large loop when less than 64 bytes left to read or we're about 4617 // to prefetch memory behind array border 4618 int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2); 4619 // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used 4620 // update cnt2 counter with already loaded 8 bytes 4621 __ sub(cnt2, cnt2, characters_in_word); 4622 // update pointers, because of previous read 4623 __ add(str1, str1, wordSize); 4624 __ add(str2, str2, wordSize); 4625 if (SoftwarePrefetchHintDistance >= 0) { 4626 __ bind(LARGE_LOOP_PREFETCH); 4627 __ prfm(Address(str1, SoftwarePrefetchHintDistance)); 4628 __ prfm(Address(str2, SoftwarePrefetchHintDistance)); 4629 compare_string_16_bytes_same(DIFF, DIFF2); 4630 compare_string_16_bytes_same(DIFF, DIFF2); 4631 __ sub(cnt2, cnt2, 8 * characters_in_word); 4632 compare_string_16_bytes_same(DIFF, DIFF2); 4633 __ subs(rscratch2, cnt2, largeLoopExitCondition); 4634 compare_string_16_bytes_same(DIFF, DIFF2); 4635 __ br(__ GT, LARGE_LOOP_PREFETCH); 4636 __ cbz(cnt2, LAST_CHECK); // Check if no more chars left 4637 } 4638 __ subs(cnt2, cnt2, characters_in_dword); // keep number of characters reduced by 16 (LL) or 8 (UU) 4639 __ br(__ LT, TAIL); 4640 __ bind(SMALL_LOOP); 4641 compare_string_16_bytes_same(DIFF, DIFF2); 4642 __ subs(cnt2, cnt2, characters_in_dword); 4643 __ br(__ GE, SMALL_LOOP); 4644 __ cmn(cnt2, (u1)(characters_in_dword)); 4645 __ br(__ EQ, LAST_CHECK); 4646 __ bind(TAIL); 4647 __ add(str1, str1, cnt2, __ LSL, byte_to_char_shift); // points to last 16 bytes to compare 4648 __ add(str2, str2, cnt2, __ LSL, byte_to_char_shift); // points to last 16 bytes to compare 4649 __ ldr(tmp1, Address(str1, -8)); // preload 8 bytes before current pointer 4650 __ ldr(tmp2, Address(str2, -8)); // preload 8 bytes before current pointer 4651 compare_string_16_bytes_same(DIFF, DIFF2); 4652 __ b(LAST_CHECK); 4653 __ bind(DIFF2); 4654 __ mov(tmp1, rscratch1); 4655 __ mov(tmp2, cnt1); 4656 // fallthrough to DIFF 4657 __ bind(DIFF); 4658 __ rev(rscratch2, rscratch2); 4659 __ clz(rscratch2, rscratch2); 4660 __ andr(rscratch2, rscratch2, isLL ? -8 : -16); 4661 __ lsrv(tmp1, tmp1, rscratch2); 4662 if (isLL) { 4663 __ lsrv(tmp2, tmp2, rscratch2); 4664 __ uxtbw(tmp1, tmp1); 4665 __ uxtbw(tmp2, tmp2); 4666 } else { 4667 __ lsrv(tmp2, tmp2, rscratch2); 4668 __ uxthw(tmp1, tmp1); 4669 __ uxthw(tmp2, tmp2); 4670 } 4671 __ subw(result, tmp1, tmp2); 4672 __ b(DONE); 4673 __ bind(LAST_CHECK); 4674 __ eor(rscratch2, tmp1, tmp2); 4675 __ cbnz(rscratch2, DIFF); 4676 __ bind(DONE); 4677 __ ret(lr); 4678 return entry; 4679 } 4680 4681 void generate_compare_long_strings() { 4682 StubRoutines::aarch64::_compare_long_string_LL 4683 = generate_compare_long_string_same_encoding(true); 4684 StubRoutines::aarch64::_compare_long_string_UU 4685 = generate_compare_long_string_same_encoding(false); 4686 StubRoutines::aarch64::_compare_long_string_LU 4687 = generate_compare_long_string_different_encoding(true); 4688 StubRoutines::aarch64::_compare_long_string_UL 4689 = generate_compare_long_string_different_encoding(false); 4690 } 4691 4692 // R0 = result 4693 // R1 = str2 4694 // R2 = cnt1 4695 // R3 = str1 4696 // R4 = cnt2 |