4321 __ ret(lr); 4322 return entry; 4323 } 4324 4325 void generate_compare_long_strings() { 4326 StubRoutines::aarch64::_compare_long_string_LL 4327 = generate_compare_long_string_same_encoding(true); 4328 StubRoutines::aarch64::_compare_long_string_UU 4329 = generate_compare_long_string_same_encoding(false); 4330 StubRoutines::aarch64::_compare_long_string_LU 4331 = generate_compare_long_string_different_encoding(true); 4332 StubRoutines::aarch64::_compare_long_string_UL 4333 = generate_compare_long_string_different_encoding(false); 4334 } 4335 4336 // R0 = result 4337 // R1 = str2 4338 // R2 = cnt1 4339 // R3 = str1 4340 // R4 = cnt2 4341 // This generic linear code use few additional ideas, which makes it faster: 4342 // 1) we can safely keep at least 1st register of pattern(since length >= 8) 4343 // in order to skip initial loading(help in systems with 1 ld pipeline) 4344 // 2) we can use "fast" algorithm of finding single character to search for 4345 // first symbol with less branches(1 branch per each loaded register instead 4346 // of branch for each symbol), so, this is where constants like 4347 // 0x0101...01, 0x00010001...0001, 0x7f7f...7f, 0x7fff7fff...7fff comes from 4348 // 3) after loading and analyzing 1st register of source string, it can be 4349 // used to search for every 1st character entry, saving few loads in 4350 // comparison with "simplier-but-slower" implementation 4351 // 4) in order to avoid lots of push/pop operations, code below is heavily 4352 // re-using/re-initializing/compressing register values, which makes code 4353 // larger and a bit less readable, however, most of extra operations are 4354 // issued during loads or branches, so, penalty is minimal 4355 address generate_string_indexof_linear(bool str1_isL, bool str2_isL) { 4356 const char* stubName = str1_isL 4357 ? (str2_isL ? "indexof_linear_ll" : "indexof_linear_ul") 4358 : "indexof_linear_uu"; 4359 __ align(CodeEntryAlignment); 4360 StubCodeMark mark(this, "StubRoutines", stubName); 4361 address entry = __ pc(); 4362 4363 int str1_chr_size = str1_isL ? 1 : 2; 4364 int str2_chr_size = str2_isL ? 1 : 2; 4365 int str1_chr_shift = str1_isL ? 0 : 1; 4366 int str2_chr_shift = str2_isL ? 0 : 1; 4367 bool isL = str1_isL && str2_isL; 4368 // parameters 4369 Register result = r0, str2 = r1, cnt1 = r2, str1 = r3, cnt2 = r4; 4370 // temporary registers 4371 Register tmp1 = r20, tmp2 = r21, tmp3 = r22, tmp4 = r23; 4372 RegSet spilled_regs = RegSet::range(tmp1, tmp4); 4373 // redefinitions 4374 Register ch1 = rscratch1, ch2 = rscratch2, first = tmp3; 4375 4376 __ push(spilled_regs, sp); 4377 Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, 4378 L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED, 4379 L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP, 4380 L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH, 4381 L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2, 4382 L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH; 4383 // Read whole register from str1. It is safe, because length >=8 here 4384 __ ldr(ch1, Address(str1)); 4385 // Read whole register from str2. It is safe, because length >=8 here 4386 __ ldr(ch2, Address(str2)); 4387 __ andr(first, ch1, str1_isL ? 0xFF : 0xFFFF); 4388 if (str1_isL != str2_isL) { 4389 __ eor(v0, __ T16B, v0, v0); 4390 } 4391 __ mov(tmp1, str2_isL ? 0x0101010101010101 : 0x0001000100010001); 4392 __ mul(first, first, tmp1); 4393 // check if we have less than 1 register to check 4394 __ subs(cnt2, cnt2, wordSize/str2_chr_size - 1); 4395 if (str1_isL != str2_isL) { 4396 __ fmovd(v1, ch1); 4397 } 4398 __ br(__ LE, L_SMALL); 4399 __ eor(ch2, first, ch2); 4400 if (str1_isL != str2_isL) { 4401 __ zip1(v1, __ T16B, v1, v0); 4402 } 4403 __ sub(tmp2, ch2, tmp1); 4404 __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4405 __ bics(tmp2, tmp2, ch2); 4406 if (str1_isL != str2_isL) { | 4321 __ ret(lr); 4322 return entry; 4323 } 4324 4325 void generate_compare_long_strings() { 4326 StubRoutines::aarch64::_compare_long_string_LL 4327 = generate_compare_long_string_same_encoding(true); 4328 StubRoutines::aarch64::_compare_long_string_UU 4329 = generate_compare_long_string_same_encoding(false); 4330 StubRoutines::aarch64::_compare_long_string_LU 4331 = generate_compare_long_string_different_encoding(true); 4332 StubRoutines::aarch64::_compare_long_string_UL 4333 = generate_compare_long_string_different_encoding(false); 4334 } 4335 4336 // R0 = result 4337 // R1 = str2 4338 // R2 = cnt1 4339 // R3 = str1 4340 // R4 = cnt2 4341 4342 // Algorithm assumes pattern and source strings to be at least 8 bytes 4343 // 4344 // General schema: 4345 // 1) Calculate maximum number of all possible matches: <amount of all possible matches>. 4346 // Denote 1st character of pattern as C0. 4347 // <amount of all possible matches> * <character size> is amount of source 4348 // bytes to scan for a match to C0. 4349 // Handle case when this number is less than 8 as a special case. 4350 // 2) Scan source string for C0. Read 8 byte blocks up to scan limit calculated above. 4351 // If no C0 match found, scan tail which is less than 8 bytes. 4352 // Proceed to full match check in case match found at any stage above. 4353 // Exit with index = -1 in case no full match found. 4354 // 3) Full match check. Iterate through all matched characters within 4355 // current 8-byte block. For each match character check for full pattern 4356 // match starting from that character. First read and compare 8 bytes of 4357 // source with starting 8 bytes of a pattern. Then proceed to per-character 4358 // match. Return match index on success. Proceed to next match within 4359 // current 8-byte block in case no full match found. Proceed to search of 4360 // C0 using 8-byte blocks (2). 4361 // NB: The case when pattern is exactly 8 bytes is considered special. Since 4362 // the code requires less loads and branches. 4363 4364 // Implementation notes: 4365 // This generic linear code use few additional ideas, which makes it faster: 4366 // 1) we can safely keep at least C0 (since length >= 8) 4367 // in order to skip initial loading(help in systems with 1 ld pipeline) 4368 // 2) we can use "fast" algorithm of finding single character to search for 4369 // first symbol with less branches(1 branch per each loaded register instead 4370 // of branch for each symbol), so, this is where constants like 4371 // 0x0101...01, 0x00010001...0001, 0x7f7f...7f, 0x7fff7fff...7fff comes from 4372 // 3) after loading and analyzing 1st register of source string, it can be 4373 // used to search for every C0 entry, saving few loads in 4374 // comparison with "simpler-but-slower" implementation 4375 // 4) in order to avoid lots of push/pop operations, code below is heavily 4376 // re-using/re-initializing/compressing register values, which makes code 4377 // larger and a bit less readable, however, most of extra operations are 4378 // issued during loads or branches, so, penalty is minimal 4379 4380 4381 // Detailed algorithm: 4382 4383 // // Algorithm parameters: different code is generated depending on these values. 4384 // bool str1_isL; // true if pattern string is Latin1 4385 // bool str2_isL; // true if source string is Latin1 4386 // 4387 // // Constants derived via algorithm parameters: 4388 // str1_chr_size // pattern string character size in bytes 4389 // str2_chr_size // source string character size in bytes 4390 // str1_chr_shift // shift used to convert between byte and char sizes for pattern 4391 // str2_chr_shift // shift used to convert between byte and char sizes for source 4392 4393 // // Output: 4394 // int result; // return pattern index. -1 if not found 4395 // // Input: 4396 // int result; // set to 0 by caller 4397 // byte* str1; // pattern string 4398 // byte* str2; // source string 4399 // long cnt1; // pattern string length. Holds positive 32bit value. Also counter of characters-yet-to-be-checked 4400 // long cnt2; // source string length. Holds positive 32bit value. 4401 // 4402 // // Temporary variables: 4403 // long tmp1; // mask value. lowest bit set at each character place 4404 // long tmp2; 4405 // long tmp3; // also referred as "first". Сloned first pattern symbol 4406 // long tmp4; 4407 // long ch1; // first 8 bytes of pattern 4408 // long ch2; // mostly used to hold parts of source string to check 4409 // long2 v0, v1; // long[2] vectors used to convert encodings 4410 // 4411 // // // Search for first pattern character inside source string by using 4412 // // // modified block search algorithm used for finding 1-character-pattern 4413 // // // (see MacroAssembler::string_indexof at label DO1). 4414 // // // It checks whole register for single character of Latin1 or 4415 // // // UTF-16 encoding. 4416 // // // 4417 // // // Assume we are searching C0 inside of 8-byte "data". 1 or 2 bytes of 4418 // // // data related to processed character of source string is called 4419 // // // 'value' below. 4420 // // // 4421 // // // Algorithm steps: 4422 // // // 4423 // // // step 1: replicated = <replicate C0 to size of data (i.e. register)>. 4424 // // // 4425 // // // step 2: xored = replicated ^ data; // has zeroes at C0 match positions 4426 // // // 4427 // // // step 3: maskDelta = xored - (Latin1 ? 0x0101010101010101 : 0x0001000100010001); 4428 // // // (values in maskDelta will have highest bit set in values, 4429 // // // where same position xored values were equal to: 4430 // // // [0x81, 0xFF], 0x00, 0x01 for Latin1 and 4431 // // // [0x8001, 0xFFFF], 0x0000, 0x0001 for UTF-16) 4432 // // // 4433 // // // step 4: maskedValue = xored | (Latin1 ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4434 // // // (values in maskedValue will have highest bit set to zero for the 4435 // // // following values in xored: [0x00, 0x7F] for Latin1, [0x0000, 0x7FFF] for UTF-16) 4436 // // // 4437 // // // step 5: matchInfo = maskDelta & ~maskedValue; 4438 // // // ("& ~maskedValue" clears all bits in each value except highest bit 4439 // // // at indexes, where xored value was in [0x00, 0x7F] (Latin1) or [0x00, 0x7FFF] (UTF-16). 4440 // // // maskDelta has highest bit set for values, where xored values 4441 // // // was [0x81, 0xFF], 0x00, 0x01 (Latin1) or [0x8001, 0xFFFF], 0x0000, 0x0001 (UTF-16). 4442 // // // Then the only non-zero values in matchInfo are those, where xored 4443 // // // values were 0x00 or 0x01. OxOO is pattern match. 0x01 means this 4444 // // // position has character with difference in lowest bit. 4445 // // // We will recheck first character match later as part of next checks. 4446 // // // Positions where xored values were 0x00 or 0x01 has highest bit set in 4447 // // // matchInfo (0x80 Latin1 and 0x8000 UTF-16). 4448 // // // 4449 // // // step 6: if (matchInfo) == 0 then <no match found inside data>; 4450 // // // else <check further match>; 4451 // // // 4452 // // // The algorithm skips almost all characters (except one), that 4453 // // // are not C0. 1 branch and 1 load is issued per 8 bytes when 4454 // // // no match was found. Referred as ALGO1 4455 // 4456 // 4457 // // IndexOf block linear scan: 4458 // 4459 // PUSH_ON_STACK(tmp1, tmp2, tmp3, tmp4); // store registers to use as temporary 4460 // // now initial data can be loaded before further initialization. It is safe 4461 // // because algorithm is used for >=8 characters strings 4462 // ch1 = LOAD8(str1); // first 8 bytes of pattern string 4463 // ch2 = LOAD8(str2); // first 8 bytes of source string 4464 // // linear search of cnt1 size pattern in cnt2 string required check of 4465 // // (cnt2 - cnt1 + 1) = <amount of all possible matches> 4466 // cnt2 = cnt2 - cnt1; // partially calculated <amount of all possible matches> 4467 // first = ch1 & (str1_isL ? 0xFF : 0xFFFF); // pattern first character 4468 // if (str1_isL != str2_isL) v0 = v0 ^ v0; // zeroing data 4469 // tmp1 = str2_isL ? 0x0101010101010101 : 0x0001000100010001; // mask 4470 // first = first * tmp1; // now first contains 8 bytes of duplicated first pattern symbols 4471 // cnt2 = cnt2 - (wordSize/str2_chr_size - 1); // <amount of all possible matches> - wordSize/str2_chr_size; 4472 // bool smallAmountOfCombinations = cnt2 <= 0; // kept in flags 4473 // // <amount of combinations> - wordSize/str2_chr_size is positive in case 4474 // // first symbols of all possible matches doesn't fit in wordSize bytes (i.e. 1 register). 4475 // if (str1_isL != str2_isL) v1[0] = ch1; 4476 // if (smallAmountOfCombinations) goto L_SMALL;// check less than 8 bytes among loaded 8 4477 // ch2 = first ^ ch2; // see ALGO1 4478 // // CONVERT_LATIN1_TO_UTF16 below is implemented by zip1 instruction 4479 // if (str1_isL != str2_isL) v1 = CONVERT_LATIN1_TO_UTF16(v1, v0); 4480 // tmp2 = ch2 - tmp1; // see ALGO1 4481 // ch2 = ch2 | str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff; // see ALGO1 4482 // tmp2 = tmp2 & !ch2; // see ALGO1 4483 // bool noMatch = (tmp2 == 0); // kept in flags 4484 // if (str1_isL != str2_isL) ch1 = v1[0]; // first 8 bytes of converted pattern string 4485 // if (!noMatch) goto L_HAS_ZERO; // goto match analyzing 4486 // cnt2 = cnt2 - wordSize/str2_chr_size; // reduce counter by amount of checked characters 4487 // bool smallAmountOfCombinations2 = cnt2 < 0; // kept in flags 4488 // result += wordSize/str2_chr_size; // update result index according to checked characters amount 4489 // str2 += wordSize; // update source string pointer to point at unchecked characters 4490 // if (smallAmountOfCombinations2) goto L_POST_LOOP; // load and check less than 8 bytes 4491 // L_LOOP: // main loop. Scan for C0 in a blocks of 8 bytes 4492 // ch2 = LOAD8(str2); // load next 8 source bytes to check 4493 // ch2 = first & ch2; // see ALGO1 4494 // tmp2 = ch2 - tmp1; // see ALGO1 4495 // ch2 = ch2 | str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff; // see ALGO1 4496 // tmp2 = tmp2 & !ch2; // see ALGO1 4497 // bool noMatch2 = (tmp2 == 0); // kept in flags 4498 // if (!noMatch2) goto L_HAS_ZERO; // goto match analyzing 4499 // L_LOOP_PROCEED: // no full match found. Continue 4500 // cnt2 = cnt2 - wordSize/str2_chr_size; // reduce counter by amount of checked characters 4501 // bool largeAmountOfCombinations = cnt2 >= 0; // kept in flags 4502 // str2 += wordSize; // update source string pointer to point at unchecked characters 4503 // result += wordSize/str2_chr_size; // update result index according to checked characters amount 4504 // if (largeAmountOfCombinations) goto L_LOOP; 4505 // L_POST_LOOP: // less than 8 bytes lest to load and check for first pattern symbol 4506 // if (cnt2 <= -wordSize/str2_chr_size) goto NOMATCH; // no data left to check 4507 // ch2 = LOAD8(str2); // load last octet (more than need to be checked, but still within source string) 4508 // // NB: cnt2 is in [-7, -1] (Latin1) and [-3, -1] (UTF16) = amount of 4509 // // characters left to check reduced by wordSize/str2_chr_size, which is 4510 // // -(amount-of-characters-not-to-check). Left shift by 4511 // // (LogBitePerBytye + str2_chr_shift) below will turn it to amount of bits 4512 // // to ignore at the end of match info. Can reuse cnt2 to keep this info, 4513 // // because old cnt2 value is not needed anymore 4514 // cnt2 = 0 - cnt2 << 3 + str2_chr_shift; // amount of bits to ignore 4515 // ch2 = first ^ ch2; // see ALGO1 4516 // tmp2 = ch2 - tmp1; // see ALGO1 4517 // ch2 = ch2 | str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff; // see ALGO1 4518 // tmp4 = -1; // mask. all bits set 4519 // goto L_SMALL_PROCEED; // goto common part of handling tails 4520 // L_SMALL: // checks remaining amount of characters in already loaded data (jump here in case small amount of data should be scanned for 1st symbol) 4521 // // L_SMALL block for different encodings has code to convert Latin1 to UTF16 4522 // // because jump to this block is done before conversion is finished 4523 // cnt2 = 0 - cnt2 << 3 + str2_chr_shift; // amount of bits to ignore 4524 // ch2 = first ^ ch2; // see ALGO1 4525 // if (str1_isL != str2_isL) v1 = CONVERT_LATIN1_TO_UTF16(v1, v0); 4526 // tmp2 = ch2 - tmp1; // see ALGO1 4527 // ch2 = ch2 | str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff; // see ALGO1 4528 // if (str1_isL != str2_isL) ch1 = v1[0]; 4529 // L_SMALL_PROCEED: // common code for handling tail of L_LOOP and L_SMALL: scanning less than 8 bytes 4530 // tmp4 = tmp4 >> cnt2; // mask with useless bits set to 0 4531 // tmp2 = tmp2 & !ch2; // see ALGO1 4532 // tmp2 = tmp2 & tmp4; // clear match info in useless bits 4533 // bool noMatch3 = (tmp2 == 0); // kept in flags 4534 // tmp2 = REVERSE_BITS(tmp2); // need backward bits order for COUNT_LEADING_ZEROES instruction 4535 // // NB: after reversing bits, higher bytes represent earlier characters 4536 // // match info. And instead of 0x80(Latin1) and 0x8000(UTF16) entries for 4537 // // matching symbols now it is 0x1 in any matching character place 4538 // if (noMatch3) goto NOMATCH; 4539 // L_SMALL_HAS_ZERO_LOOP: // Case of less than 8 bytes were scanned for C0: possible match with C0 found. Check if whole pattern match. 4540 // // NB: COUT_LEADING_ZEROES below return amount of bits until first 4541 // // match symbol + <bits in character until lowest bit> (because lowest 4542 // // bit set in matched symbol place). Then, resulting value is: 4543 // // match_index * BitsPerCharacter + (BitsPerCharactere-1). Then, shifting 4544 // // this value right by BitsPerCharacter will result to matching index 4545 // // within currently considered data chunk 4546 // tmp4 = COUNT_LEADING_ZEROES(tmp2); 4547 // bool patternIsShort = (cnt1 <= wordSize/str2_chr_size); // kept in flags 4548 // if (patternIsShort) goto L_SMALL_CMP_LOOP_LAST_CMP2; 4549 // if (str2_isL) { 4550 // str2 = str2 + tmp4 >> LogBitsPerByte; // source address at matched index 4551 // ch2 = LOAD8(str2); // read first 8 bytes of source string starting from matched symbol. We need to recheck 1st pattern symbol, because it still can be non-matching symbol 4552 // tmp2 = tmp2 << tmp4; // shift off all leading zeroes in match info 4553 // result = result + tmp4 >> LogBitsPerBytes;// update result to reflect currently checked match index 4554 // tmp2 = tmp2 << 1; // shift off matching bit 4555 // } else { 4556 // // NB: calculation of byte offset in source string for UTF16 case: 4557 // // tmp4 is <amount of bits until first matched symbol> + 15. 4558 // // Amount of bytes until bit with match information is tmp4 >> LogBitsPerByte. 4559 // // This amount of bytes is always odd for UTF-16 case and last counted 4560 // // byte is part of matching symbol. Need to clear lowest bit then to 4561 // // get amount of bytes until matched character. Then 4562 // // (tmp4 >> LogBitsPerByte) && 0xE clears lowest bit. 4563 // ch2 = 0xE; // all bits in byte except lowest one are set 4564 // ch2 = ch2 & tmp4 >> LogBitsPerByte; // byte offset of matched character 4565 // ch2 = LOAD8(str2, ch2); // load 8 bytes from str2 with ch2 offset. Recheck 1st pattern symbol, because it still can be non-matching symbol 4566 // tmp2 = tmp2 << tmp4; // shift off all leading zeroes in match info 4567 // result = result + tmp4 >> LogBitsPerBytes + str2_chr_shift; // update result to reflect currently checked match index 4568 // str2 = str2 + tmp4 >> LogBitsPerByte + str2_chr_shift; // update str2 by adding matched char index (will need to add once more to have byte offset added) 4569 // tmp2 = tmp2 << 1; // shift off matching bit 4570 // str2 = str2 + tmp4 >> LogBitsPerByte + str2_chr_shift; // now str2 points to matched character address 4571 // } 4572 // bool first8BytesNotMatch = (ch1 != ch2); // kept in flags 4573 // tmp4 = wordSize/str_chr_size; // index of next character to check match 4574 // if (first8BytesNotMatch) goto L_SMALL_CMP_LOOP_NOMATCH; 4575 // L_SMALL_CMP_LOOP: // Case of less than 8 bytes were scanned for C0: first 8 bytes of pattern already matched. Proceed to per-character match until the end of pattern 4576 // // NB: No need in "first" values anymore because it's a small case and 4577 // // we already checked all first symbol matches. Reuse it. 4578 // first = (str1_isL) ? LOAD_LATIN1_CHAR(str1, tmp4) : LOAD_UTF16_CHAR(str1, tmp4 << str1_chr_shift); // next pattern symbol to check 4579 // ch2 = (str2_isL) ? LOAD_LATIN1_CHAR(str2, tmp4) : LOAD_UTF16_CHAR(str2, tmp4 << str2_chr_shift); // next source symbol to check 4580 // tmp4 = tmp4 + 1; // update index to check next 4581 // bool isLastSymbol = (tmp4 >= cnt1); // kept in flags 4582 // if (isLastSymbol) goto L_SMALL_CMP_LOOP_LAST_CMP; 4583 // bool currentSymbolMatch = (first == ch2); // kept in flags 4584 // if (currentSymbolMatch) goto L_SMALL_CMP_LOOP; 4585 // L_SMALL_CMP_LOOP_NOMATCH: // Case of less than 8 bytes were scanned for C0: current full match attempt failed. 4586 // if (tmp2 == 0) NOMATCH; // no more first symbol matches found 4587 // tmp4 = COUNT_LEADING_ZEROES(tmp2); // find index of matching bit 4588 // result = result + 1; // increase result by 1, because current match fail. Now possible result starts from next symbol 4589 // str2 = str2 + str_chr_size; // advance source pointer, because current match fail. Now possible match checks starts from next symbol 4590 // goto L_SMALL_HAS_ZERO_LOOP; 4591 // L_SMALL_CMP_LOOP_LAST_CMP: // Case of less than 8 bytes were scanned for C0: 1 character left to be checked until full match 4592 // bool lastCharacterNotMatch = (first != ch2);// kept in flags 4593 // if (lastCharacterNotMatch) goto L_SMALL_CMP_LOOP_NOMATCH; 4594 // goto DONE; 4595 // L_SMALL_CMP_LOOP_LAST_CMP2: // Case when string size is 8 bytes. No need in internal per-character loop 4596 // if (str2_isL) { 4597 // str2 = str2 + tmp4 >> LogBitsPerByte; // source address at matched index 4598 // ch2 = LOAD8(str2); // read first 8 bytes of source string starting from matched symbol. Recheck 1st pattern symbol, because it still can be non-matching symbol 4599 // tmp2 = tmp2 << tmp4; // shift off all leading zeroes in match info 4600 // result = result + tmp4 >> LogBitsPerBytes;// update result to reflect currently checked match index 4601 // tmp2 = tmp2 << 1; // shift off matching bit 4602 // } else { 4603 // ch2 = 0xE; // all bits in byte except lowest one are set 4604 // ch2 = ch2 & tmp4 >> LogBitsPerByte; // byte offset of matched character 4605 // ch2 = LOAD8(str2, ch2); // load 8 bytes from str2 with ch2 offset. Recheck 1st pattern symbol, because it still can be non-matching symbol 4606 // tmp2 = tmp2 << tmp4; // shift off all leading zeroes in match info 4607 // result = result + tmp4 >> LogBitsPerBytes + str2_chr_shift; // update result to reflect currently checked match index 4608 // str2 = str2 + tmp4 >> LogBitsPerByte + str2_chr_shift; // update str2 by adding matched char index (will need to add once more to have byte offset added) 4609 // tmp2 = tmp2 << 1; // shift off matching bit 4610 // str2 = str2 + tmp4 >> LogBitsPerByte + str2_chr_shift; // now str2 points to matched character address 4611 // } 4612 // bool notMatching8Bytes = (ch1 != ch2); // kept in flags 4613 // if (notMatching8Bytes) goto L_SMALL_CMP_LOOP_NOMATCH; 4614 // goto DONE; 4615 // L_HAS_ZERO: // main L_LOOP found C0 match candidate(s) in current 8-byte block 4616 // tmp2 = REVERSE_BITS(tmp2); 4617 // tmp4 = COUNT_LEADING_ZEROES(tmp2); 4618 // // Perform compression of counters(cnt1 and cnt2). Both are 64-bit 4619 // // registers holding 32-bit values. Then it can be compressed into one 4620 // // 64-bit register. Store low 32 bits of cnt1 into high 32 bits of cnt2. 4621 // // This way cnt1 register can be reused and restored after. old cnt1 4622 // // value still can be accessed as cnt2 >> 32. 4623 // cnt2 = cnt2 | cnt2 << 32; 4624 // // NB: loop (L_HAS_ZERO_LOOP) below iterates through first symbol match 4625 // // info in tmp2 until full pattern match is met. Result value between 4626 // // iterations should be increased by <match index> + 1. That's why 4627 // // pre-condition for 1st iteration is to reduce result by 1. This will 4628 // // make loop simpler. 4629 // result = result - 1; 4630 // L_HAS_ZERO_LOOP: // Loop over all C0 match candidates found in current 8-byte block 4631 // // NB: code is this L_HAS_ZERO_LOOP code path is mostly same as in 4632 // // L_SMALL_HAS_ZERO_LOOP, but we need more registers to keep intermediate 4633 // // data for this long case(like, keeping cloned first symbol in "first"). 4634 // // Because of that this code has slightly different registers usage with 4635 // // data compression described above and respective data usage. 4636 // cnt1 = wordSize/str2_chr_size; 4637 // bool only8BytesToCompare = (cnt1 >= cnt2 >> 32); // kept in flag 4638 // if (only8BytesToCompare) goto L_CMP_LOOP_LAST_CMP2; 4639 // // if-else blocks below has a number of same instructions, but due to 4640 // // slightly different logic it is pipelined differently. 4641 // if (str2_isL) { 4642 // ch2 = tmp4 >> (LogBitsPerByte + str2_chr_shift); // char index 4643 // ch2 = LOAD8(str2, ch2); // Need to recheck 1st pattern symbol, because it still can be non-matching symbol 4644 // tmp2 = tmp2 << tmp4; // shift off leading zeroes 4645 // str2 = str2 + tmp4 >> (LogBitsPerBytes + str2_chr_shift); // increase str2 by <match index> to have start address of currently checked match combination 4646 // tmp4 = tmp4 + 1; // tmp4 was 8*<match index> + 7. Now it 8 * (<match index> + 1) 4647 // result = result + tmp4 >> (LogBitsPerByte + str2_chr_shift); // result += (<match index> + 1) 4648 // tmp = tmp << 1; // shift of matching bit 4649 // tmp4 = wordSize/str2_chr_size; // index of next character to check 4650 // } else { 4651 // ch2 = 0xE; 4652 // ch2 = ch2 & tmp4 >> LogBitsPerByte; 4653 // ch2 = LOAD8(str2, ch2); // Need to recheck 1st pattern symbol, because it still can be non-matching symbol 4654 // tmp2 = tmp2 << tmp4; 4655 // // NB: tmp4 >> LogBitsPerByte will return <match index> * str2_chr_size + 1, 4656 // // which is offset inside matched 2-byte character. Calculate offset of 4657 // // next character and then step back by 1 character. Since tmp4 is 4658 // // 16*<match index> + 15, we can add 1 for tmp4 to become 16 * (<match index> + 1) 4659 // // which is bit offset of next character. This value is also used to updated 4660 // // "result" by (<match index> + 1). 4661 // tmp4 = tmp4 + 1; // tmp4 was 16*<match index> + 15. Now it is 16 * (<match index> + 1) 4662 // result = result + tmp4 >> (LogBitsPerByte + str2_chr_shift); 4663 // str2 = str2 + tmp4 >> LogBitsPerByte; // advance str2 pointer by (<match index> + 1) characters. Will return by 1 character back later to have start address of currently checked match combination 4664 // tmp2 = tmp2 << 1 // shift off matching bit 4665 // tmp4 = wordSize/str2_chr_size; // index of next character to check 4666 // str2 = str2 - str2_chr_size; // return by 1 character back as promised above 4667 // } 4668 // bool notMatching8Bytes2 = (ch1 != ch2); // kept in flags 4669 // tmp4 = wordSize/str2_chr_size; // index of next character to check 4670 // if (notMatching8Bytes2) goto L_CMP_LOOP_NOMATCH; 4671 // L_CMP_LOOP: // Per-character loop to check full pattern match of current match candidate 4672 // cnt1 = (str1_isL) ? LOAD_LATIN1_CHAR(str1, tmp4) : LOAD_UTF16_CHAR(str1, tmp4 << str1_chr_shift); // next pattern symbol to check 4673 // ch2 = (str2_isL) ? LOAD_LATIN1_CHAR(str2, tmp4) : LOAD_UTF16_CHAR(str2, tmp4 << str2_chr_shift); // next source symbol to check 4674 // tmp4 = tmp4 + 1; // update index 4675 // bool isLastSymbol2 = (tmp4 >= cnt2 >> 32); // kept in flags 4676 // if (isLastSymbol2) goto L_CMP_LOOP_LAST_CMP; 4677 // bool currentSymbolMatch2 = (cnt1 == ch2); // kept in flags 4678 // if (currentSymbolMatch2) goto L_CMP_LOOP; 4679 // L_CMP_LOOP_NOMATCH: // current match candidate check failed 4680 // if (tmp2 == 0) goto L_HAS_ZERO_LOOP_NOMATCH;// no more match found in current byte octet 4681 // tmp4 = COUNT_LEADING_ZERO(tmp2); // find next match bit index 4682 // str2 = str2 + str2_chr_size; // next match attempt begins from next start character. update str2 pointer 4683 // goto L_HAS_ZERO_LOOP; 4684 // L_CMP_LOOP_LAST_CMP: // checking last character until full pattern match 4685 // bool lastCharacterNotMatch2 = (cnt1 != ch2);// kept in flags 4686 // if (lastCharacterNotMatch2) goto L_CMP_LOOP_NOMATCH; 4687 // goto DONE; 4688 // L_CMP_LOOP_LAST_CMP2: // L_LOOP found match and pattern string is 8 bytes. First compare of 8 bytes is the last one for current match candidate. 4689 // if (str2_isL) { 4690 // ch2 = tmp4 >> (LogBitsPerByte + str2_chr_shift); // match char index 4691 // ch2 = LOAD8(str2, ch2); // read whole match candidate. Recheck 1st pattern symbol, because it still can be non-matching symbol 4692 // tmp2 = tmp2 << tmp4; // shift off leading zeroes 4693 // str2 = str2 + tmp4 >> (LogBitsPerByte + str2_chr_shift); // str2 now points to matched 1st character 4694 // tmp4 = tmp4 + 1; // <match bit index + 1> 4695 // result = result + tmp4 >> (LogBitsPerByte + str2_chr_shift); 4696 // tmp2 = tmp2 << 1; // shift off matching bit 4697 // } else { 4698 // ch2 = 0xE; 4699 // ch2 = ch2 & tmp4 >> LogBitsPerByte; // byte offset 4700 // ch2 = LOAD8(str2, ch2); // Recheck 1st pattern symbol, because it still can be non-matching symbol 4701 // tmp2 = tmp2 << tmp4; // shift off leading zeroes 4702 // // NB: tmp4 >> LogBitsPerByte will return <match index> * str2_chr_size + 1, 4703 // // which is offset inside matched 2-byte character. Calculate offset of 4704 // // next character and then step back by 1 character. Since tmp4 is 4705 // // 16*<match index> + 15, we can add 1 for tmp4 to become 16 * (<match index> + 1) 4706 // // which is bit offset of next character. This value is also used to updated 4707 // // "result" by (<match index> + 1). 4708 // tmp4 = tmp4 + 1; // <match bit index + 1> 4709 // result = result + tmp4 >> (LogBitsPerByte + str2_chr_shift); 4710 // str2 = str2 + tmp4 >> LogBitsPerByte; // now points to character after 1st matched one 4711 // tmp2 = tmp2 << 1; // shift off matching bit 4712 // str2 = str2 - str2_chr_size; // points to matched 1st character 4713 // } 4714 // bool notMatching8Bytes3 = (ch1 != ch2); 4715 // if (notMatching8Bytes3) goto L_CMP_LOOP_NOMATCH; 4716 // goto DONE; 4717 // L_HAS_ZERO_LOOP_NOMATCH: // Current match candidate check for full match failed. Restore state for further work. 4718 // // Current octet has no more matches candidates. Restore following data: 4719 // // 1) result index. Index was wordSize/str2_chr_size * N until 4720 // // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, then 4721 // // result was increased at max by wordSize/str2_chr_size - 1. Reset these 4722 // // bits to original state then. Clear 2 lower bits for UTF16 case and 4723 // // lower 3 bits for Latin1 case. 4724 // // 2) restore cnt1 and cnt2 values from "compressed" cnt2 4725 // // 3) advance str2 to represent next str2 octet. result & 7 (Latin1) 4726 // // and result & 3 (UTF-16) is index of last analyzed substring inside 4727 // // current octet. str2 points to this character. Need to update it to 4728 // // point at next octet. Increase by octet is done in a loop we're 4729 // // returning to (L_LOOP). Then we need to reset str2 here to current octet 4730 // // address. Resetting bits in a same manner as result value won't work 4731 // // because address can be unaligned. Then we have to calculate exact value 4732 // // to subtract from str2. It's <last match index inside octet> << str2_chr_shift 4733 // tmp2 = result & (wordSize/str2_chr_size - 1); // last analyzed character index inside octet 4734 // cnt2 = cnt2 >> 32; // uncompress cnt1 4735 // // NB: ZERO_LOW_BITS(param1, param2) below is zeroing lowest bits of 4736 // // param1 from bit number 0 to bit number param2 inclusive. Implemented 4737 // // as bfm instruction 4738 // ZERO_LOW_BITS(result, 2 - str2_chr_shift); 4739 // str2 = str2 - tmp2 << str2_chr_shift; 4740 // cnt2 = cnt2 & 0xFFFFFFFF; // uncompress cnt2 4741 // goto L_LOOP_PROCEED; 4742 // NOMATCH: 4743 // result = -1; 4744 // DONE: 4745 // POP_FROM_STACK(tmp1, tmp2, tmp3, tmp4); // restore temporary register values 4746 // return; 4747 address generate_string_indexof_linear(bool str1_isL, bool str2_isL) { 4748 const char* stubName = str1_isL 4749 ? (str2_isL ? "indexof_linear_ll" : "indexof_linear_ul") 4750 : "indexof_linear_uu"; 4751 __ align(CodeEntryAlignment); 4752 StubCodeMark mark(this, "StubRoutines", stubName); 4753 address entry = __ pc(); 4754 4755 int str1_chr_size = str1_isL ? 1 : 2; 4756 int str2_chr_size = str2_isL ? 1 : 2; 4757 int str1_chr_shift = str1_isL ? 0 : 1; 4758 int str2_chr_shift = str2_isL ? 0 : 1; 4759 bool isL = str1_isL && str2_isL; 4760 // parameters 4761 Register result = r0, str2 = r1, cnt1 = r2, str1 = r3, cnt2 = r4; 4762 // temporary registers 4763 Register tmp1 = r20, tmp2 = r21, tmp3 = r22, tmp4 = r23; 4764 RegSet spilled_regs = RegSet::range(tmp1, tmp4); 4765 // redefinitions 4766 Register ch1 = rscratch1, ch2 = rscratch2, first = tmp3; 4767 4768 __ push(spilled_regs, sp); 4769 Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, 4770 L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED, 4771 L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP, 4772 L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH, 4773 L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2, 4774 L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH; 4775 // Read whole register from str1. It is safe, because length >=8 here 4776 __ ldr(ch1, Address(str1)); 4777 // Read whole register from str2. It is safe, because length >=8 here 4778 __ ldr(ch2, Address(str2)); 4779 __ sub(cnt2, cnt2, cnt1); 4780 __ andr(first, ch1, str1_isL ? 0xFF : 0xFFFF); 4781 if (str1_isL != str2_isL) { 4782 __ eor(v0, __ T16B, v0, v0); 4783 } 4784 __ mov(tmp1, str2_isL ? 0x0101010101010101 : 0x0001000100010001); 4785 __ mul(first, first, tmp1); 4786 // check if we have less than 1 register to check 4787 __ subs(cnt2, cnt2, wordSize/str2_chr_size - 1); 4788 if (str1_isL != str2_isL) { 4789 __ fmovd(v1, ch1); 4790 } 4791 __ br(__ LE, L_SMALL); 4792 __ eor(ch2, first, ch2); 4793 if (str1_isL != str2_isL) { 4794 __ zip1(v1, __ T16B, v1, v0); 4795 } 4796 __ sub(tmp2, ch2, tmp1); 4797 __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4798 __ bics(tmp2, tmp2, ch2); 4799 if (str1_isL != str2_isL) { |