< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page




4321       __ ret(lr);
4322     return entry;
4323   }
4324 
4325   void generate_compare_long_strings() {
4326       StubRoutines::aarch64::_compare_long_string_LL
4327           = generate_compare_long_string_same_encoding(true);
4328       StubRoutines::aarch64::_compare_long_string_UU
4329           = generate_compare_long_string_same_encoding(false);
4330       StubRoutines::aarch64::_compare_long_string_LU
4331           = generate_compare_long_string_different_encoding(true);
4332       StubRoutines::aarch64::_compare_long_string_UL
4333           = generate_compare_long_string_different_encoding(false);
4334   }
4335 
4336   // R0 = result
4337   // R1 = str2
4338   // R2 = cnt1
4339   // R3 = str1
4340   // R4 = cnt2
























4341   // This generic linear code use few additional ideas, which makes it faster:
4342   // 1) we can safely keep at least 1st register of pattern(since length >= 8)
4343   // in order to skip initial loading(help in systems with 1 ld pipeline)
4344   // 2) we can use "fast" algorithm of finding single character to search for
4345   // first symbol with less branches(1 branch per each loaded register instead
4346   // of branch for each symbol), so, this is where constants like
4347   // 0x0101...01, 0x00010001...0001, 0x7f7f...7f, 0x7fff7fff...7fff comes from
4348   // 3) after loading and analyzing 1st register of source string, it can be
4349   // used to search for every 1st character entry, saving few loads in
4350   // comparison with "simplier-but-slower" implementation
4351   // 4) in order to avoid lots of push/pop operations, code below is heavily
4352   // re-using/re-initializing/compressing register values, which makes code
4353   // larger and a bit less readable, however, most of extra operations are
4354   // issued during loads or branches, so, penalty is minimal
















































































































































































































































































































































































4355   address generate_string_indexof_linear(bool str1_isL, bool str2_isL) {
4356     const char* stubName = str1_isL
4357         ? (str2_isL ? "indexof_linear_ll" : "indexof_linear_ul")
4358         : "indexof_linear_uu";
4359     __ align(CodeEntryAlignment);
4360     StubCodeMark mark(this, "StubRoutines", stubName);
4361     address entry = __ pc();
4362 
4363     int str1_chr_size = str1_isL ? 1 : 2;
4364     int str2_chr_size = str2_isL ? 1 : 2;
4365     int str1_chr_shift = str1_isL ? 0 : 1;
4366     int str2_chr_shift = str2_isL ? 0 : 1;
4367     bool isL = str1_isL && str2_isL;
4368    // parameters
4369     Register result = r0, str2 = r1, cnt1 = r2, str1 = r3, cnt2 = r4;
4370     // temporary registers
4371     Register tmp1 = r20, tmp2 = r21, tmp3 = r22, tmp4 = r23;
4372     RegSet spilled_regs = RegSet::range(tmp1, tmp4);
4373     // redefinitions
4374     Register ch1 = rscratch1, ch2 = rscratch2, first = tmp3;
4375 
4376     __ push(spilled_regs, sp);
4377     Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
4378         L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
4379         L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
4380         L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
4381         L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
4382         L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
4383     // Read whole register from str1. It is safe, because length >=8 here
4384     __ ldr(ch1, Address(str1));
4385     // Read whole register from str2. It is safe, because length >=8 here
4386     __ ldr(ch2, Address(str2));

4387     __ andr(first, ch1, str1_isL ? 0xFF : 0xFFFF);
4388     if (str1_isL != str2_isL) {
4389       __ eor(v0, __ T16B, v0, v0);
4390     }
4391     __ mov(tmp1, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
4392     __ mul(first, first, tmp1);
4393     // check if we have less than 1 register to check
4394     __ subs(cnt2, cnt2, wordSize/str2_chr_size - 1);
4395     if (str1_isL != str2_isL) {
4396       __ fmovd(v1, ch1);
4397     }
4398     __ br(__ LE, L_SMALL);
4399     __ eor(ch2, first, ch2);
4400     if (str1_isL != str2_isL) {
4401       __ zip1(v1, __ T16B, v1, v0);
4402     }
4403     __ sub(tmp2, ch2, tmp1);
4404     __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4405     __ bics(tmp2, tmp2, ch2);
4406     if (str1_isL != str2_isL) {




4321       __ ret(lr);
4322     return entry;
4323   }
4324 
4325   void generate_compare_long_strings() {
4326       StubRoutines::aarch64::_compare_long_string_LL
4327           = generate_compare_long_string_same_encoding(true);
4328       StubRoutines::aarch64::_compare_long_string_UU
4329           = generate_compare_long_string_same_encoding(false);
4330       StubRoutines::aarch64::_compare_long_string_LU
4331           = generate_compare_long_string_different_encoding(true);
4332       StubRoutines::aarch64::_compare_long_string_UL
4333           = generate_compare_long_string_different_encoding(false);
4334   }
4335 
4336   // R0 = result
4337   // R1 = str2
4338   // R2 = cnt1
4339   // R3 = str1
4340   // R4 = cnt2
4341 
4342   // Algorithm assumes pattern and source strings to be at least 8 bytes
4343   //
4344   // General schema:
4345   // 1) Calculate maximum number of all possible matches: <amount of all possible matches>.
4346   //    Denote 1st character of pattern as C0.
4347   //    <amount of all possible matches> * <character size> is amount of source
4348   //    bytes to scan for a match to C0.
4349   //    Handle case when this number is less than 8 as a special case.
4350   // 2) Scan source string for C0. Read 8 byte blocks up to scan limit calculated above.
4351   //    If no C0 match found, scan tail which is less than 8 bytes.
4352   //    Proceed to full match check in case match found at any stage above.
4353   //    Exit with index = -1 in case no full match found.
4354   // 3) Full match check. Iterate through all matched characters within
4355   //    current 8-byte block. For each match character check for full pattern
4356   //    match starting from that character. First read and compare 8 bytes of
4357   //    source with starting 8 bytes of a pattern. Then proceed to per-character
4358   //    match. Return match index on success. Proceed to next match within
4359   //    current 8-byte block in case no full match found. Proceed to search of
4360   //    C0 using 8-byte blocks (2).
4361   // NB: The case when pattern is exactly 8 bytes is considered special. Since
4362   //    the code requires less loads and branches.
4363 
4364   // Implementation notes:
4365   // This generic linear code use few additional ideas, which makes it faster:
4366   // 1) we can safely keep at least C0 (since length >= 8)
4367   // in order to skip initial loading(help in systems with 1 ld pipeline)
4368   // 2) we can use "fast" algorithm of finding single character to search for
4369   // first symbol with less branches(1 branch per each loaded register instead
4370   // of branch for each symbol), so, this is where constants like
4371   // 0x0101...01, 0x00010001...0001, 0x7f7f...7f, 0x7fff7fff...7fff comes from
4372   // 3) after loading and analyzing 1st register of source string, it can be
4373   // used to search for every C0 entry, saving few loads in
4374   // comparison with "simpler-but-slower" implementation
4375   // 4) in order to avoid lots of push/pop operations, code below is heavily
4376   // re-using/re-initializing/compressing register values, which makes code
4377   // larger and a bit less readable, however, most of extra operations are
4378   // issued during loads or branches, so, penalty is minimal
4379 
4380 
4381   // Detailed algorithm:
4382 
4383   // // Algorithm parameters: different code is generated depending on these values.
4384   // bool str1_isL;                // true if pattern string is Latin1
4385   // bool str2_isL;                // true if source string is Latin1
4386   //
4387   // // Constants derived via algorithm parameters:
4388   // str1_chr_size                 // pattern string character size in bytes
4389   // str2_chr_size                 // source string character size in bytes
4390   // str1_chr_shift                // shift used to convert between byte and char sizes for pattern
4391   // str2_chr_shift                // shift used to convert between byte and char sizes for source
4392 
4393   // // Output:
4394   // int result;                   // return pattern index. -1 if not found
4395   // // Input:
4396   // int result;                   // set to 0 by caller
4397   // byte* str1;                   // pattern string
4398   // byte* str2;                   // source string
4399   // long cnt1;                    // pattern string length. Holds positive 32bit value. Also counter of characters-yet-to-be-checked
4400   // long cnt2;                    // source string length. Holds positive 32bit value.
4401   //
4402   // // Temporary variables:
4403   // long tmp1;                    // mask value. lowest bit set at each character place
4404   // long tmp2;
4405   // long tmp3;                    // also referred as "first". Сloned first pattern symbol
4406   // long tmp4;
4407   // long ch1;                     // first 8 bytes of pattern
4408   // long ch2;                     // mostly used to hold parts of source string to check
4409   // long2 v0, v1;                 // long[2] vectors used to convert encodings
4410   //
4411   // // // Search for first pattern character inside source string by using
4412   // // // modified block search algorithm used for finding 1-character-pattern
4413   // // // (see MacroAssembler::string_indexof at label DO1).
4414   // // // It checks whole register for single character of Latin1 or
4415   // // // UTF-16 encoding.
4416   // // //
4417   // // // Assume we are searching C0 inside of 8-byte "data". 1 or 2 bytes of
4418   // // // data related to processed character of source string is called
4419   // // // 'value' below.
4420   // // //
4421   // // // Algorithm steps:
4422   // // //
4423   // // // step 1: replicated = <replicate C0 to size of data (i.e. register)>.
4424   // // //
4425   // // // step 2: xored = replicated ^ data; // has zeroes at C0 match positions
4426   // // //
4427   // // // step 3: maskDelta = xored -  (Latin1 ? 0x0101010101010101 : 0x0001000100010001);
4428   // // // (values in maskDelta will have highest bit set in values,
4429   // // // where same position xored values were equal to:
4430   // // // [0x81, 0xFF], 0x00, 0x01 for Latin1 and
4431   // // // [0x8001, 0xFFFF], 0x0000, 0x0001 for UTF-16)
4432   // // //
4433   // // // step 4: maskedValue = xored | (Latin1 ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4434   // // // (values in maskedValue will have highest bit set to zero for the
4435   // // // following values in xored: [0x00, 0x7F] for Latin1, [0x0000, 0x7FFF] for UTF-16)
4436   // // //
4437   // // // step 5: matchInfo = maskDelta & ~maskedValue;
4438   // // // ("& ~maskedValue" clears all bits in each value except highest bit
4439   // // // at indexes, where xored value was in [0x00, 0x7F] (Latin1) or [0x00, 0x7FFF] (UTF-16).
4440   // // // maskDelta has highest bit set for values, where xored values
4441   // // // was [0x81, 0xFF], 0x00, 0x01 (Latin1) or [0x8001, 0xFFFF], 0x0000, 0x0001 (UTF-16).
4442   // // // Then the only non-zero values in matchInfo are those, where xored
4443   // // // values were 0x00 or 0x01. OxOO is pattern match. 0x01 means this
4444   // // // position has character with difference in lowest bit.
4445   // // // We will recheck first character match later as part of next checks.
4446   // // // Positions where xored values were 0x00 or 0x01 has highest bit set in
4447   // // // matchInfo (0x80 Latin1 and 0x8000 UTF-16).
4448   // // //
4449   // // // step 6: if (matchInfo) == 0 then <no match found inside data>;
4450   // // //         else <check further match>;
4451   // // //
4452   // // // The algorithm skips almost all characters (except one), that
4453   // // // are not C0. 1 branch and 1 load is issued per 8 bytes when
4454   // // // no match was found. Referred as ALGO1
4455   //
4456   //
4457   //   // IndexOf block linear scan:
4458   //
4459   //   PUSH_ON_STACK(tmp1, tmp2, tmp3, tmp4);      // store registers to use as temporary
4460   //   // now initial data can be loaded before further initialization. It is safe
4461   //   // because algorithm is used for >=8 characters strings
4462   //   ch1 = LOAD8(str1);                          // first 8 bytes of pattern string
4463   //   ch2 = LOAD8(str2);                          // first 8 bytes of source string
4464   //   // linear search of cnt1 size pattern in cnt2 string required check of
4465   //   // (cnt2 - cnt1 + 1) = <amount of all possible matches>
4466   //   cnt2 = cnt2 - cnt1;                         // partially calculated <amount of all possible matches>
4467   //   first = ch1 & (str1_isL ? 0xFF : 0xFFFF);   // pattern first character
4468   //   if (str1_isL != str2_isL) v0 = v0 ^ v0;     // zeroing data
4469   //   tmp1 = str2_isL ? 0x0101010101010101 : 0x0001000100010001; // mask
4470   //   first = first * tmp1;                       // now first contains 8 bytes of duplicated first pattern symbols
4471   //   cnt2 = cnt2 - (wordSize/str2_chr_size - 1); // <amount of all possible matches> - wordSize/str2_chr_size;
4472   //   bool smallAmountOfCombinations = cnt2 <= 0; // kept in flags
4473   //   // <amount of combinations> - wordSize/str2_chr_size is positive in case
4474   //   // first symbols of all possible matches doesn't fit in wordSize bytes (i.e. 1 register).
4475   //   if (str1_isL != str2_isL) v1[0] = ch1;
4476   //   if (smallAmountOfCombinations) goto L_SMALL;// check less than 8 bytes among loaded 8
4477   //   ch2 = first ^ ch2;                          // see ALGO1
4478   //   // CONVERT_LATIN1_TO_UTF16 below is implemented by zip1 instruction
4479   //   if (str1_isL != str2_isL) v1 = CONVERT_LATIN1_TO_UTF16(v1, v0);
4480   //   tmp2 = ch2 - tmp1;                          // see ALGO1
4481   //   ch2 = ch2 | str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff; // see ALGO1
4482   //   tmp2 = tmp2 & !ch2;                         // see ALGO1
4483   //   bool noMatch = (tmp2 == 0);                 // kept in flags
4484   //   if (str1_isL != str2_isL) ch1 = v1[0];      // first 8 bytes of converted pattern string
4485   //   if (!noMatch) goto L_HAS_ZERO;              // goto match analyzing
4486   //   cnt2 = cnt2 - wordSize/str2_chr_size;       // reduce counter by amount of checked characters
4487   //   bool smallAmountOfCombinations2 = cnt2 < 0; // kept in flags
4488   //   result += wordSize/str2_chr_size;           // update result index according to checked characters amount
4489   //   str2 += wordSize;                           // update source string pointer to point at unchecked characters
4490   //   if (smallAmountOfCombinations2) goto L_POST_LOOP; // load and check less than 8 bytes
4491   // L_LOOP:                                       // main loop. Scan for C0 in a blocks of 8 bytes
4492   //   ch2 = LOAD8(str2);                          // load next 8 source bytes to check
4493   //   ch2 = first & ch2;                          // see ALGO1
4494   //   tmp2 = ch2 - tmp1;                          // see ALGO1
4495   //   ch2 = ch2 | str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff; // see ALGO1
4496   //   tmp2 = tmp2 & !ch2;                         // see ALGO1
4497   //   bool noMatch2 = (tmp2 == 0);                // kept in flags
4498   //   if (!noMatch2) goto L_HAS_ZERO;             // goto match analyzing
4499   // L_LOOP_PROCEED:                               // no full match found. Continue
4500   //   cnt2 = cnt2 - wordSize/str2_chr_size;       // reduce counter by amount of checked characters
4501   //   bool largeAmountOfCombinations = cnt2 >= 0; // kept in flags
4502   //   str2 += wordSize;                           // update source string pointer to point at unchecked characters
4503   //   result += wordSize/str2_chr_size;           // update result index according to checked characters amount
4504   //   if (largeAmountOfCombinations) goto L_LOOP;
4505   // L_POST_LOOP: // less than 8 bytes lest to load and check for first pattern symbol
4506   //   if (cnt2 <= -wordSize/str2_chr_size) goto NOMATCH; // no data left to check
4507   //   ch2 = LOAD8(str2);                          // load last octet (more than need to be checked, but still within source string)
4508   //   // NB: cnt2 is in [-7, -1] (Latin1) and [-3, -1] (UTF16) = amount of
4509   //   // characters left to check reduced by wordSize/str2_chr_size, which is
4510   //   // -(amount-of-characters-not-to-check). Left shift by
4511   //   // (LogBitePerBytye + str2_chr_shift) below will turn it to amount of bits
4512   //   // to ignore at the end of match info. Can reuse cnt2 to keep this info,
4513   //   // because old cnt2 value is not needed anymore
4514   //   cnt2 = 0 - cnt2 <<  3 + str2_chr_shift;     // amount of bits to ignore
4515   //   ch2 = first ^ ch2;                          // see ALGO1
4516   //   tmp2 = ch2 - tmp1;                          // see ALGO1
4517   //   ch2 = ch2 | str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff; // see ALGO1
4518   //   tmp4 = -1;                                  // mask. all bits set
4519   //   goto L_SMALL_PROCEED;                       // goto common part of handling tails
4520   // L_SMALL:                                      // checks remaining amount of characters in already loaded data (jump here in case small amount of data should be scanned for 1st symbol)
4521   //   // L_SMALL block for different encodings has code to convert Latin1 to UTF16
4522   //   // because jump to this block is done before conversion is finished
4523   //   cnt2 = 0 - cnt2 <<  3 + str2_chr_shift;     // amount of bits to ignore
4524   //   ch2 = first ^ ch2;                          // see ALGO1
4525   //   if (str1_isL != str2_isL) v1 = CONVERT_LATIN1_TO_UTF16(v1, v0);
4526   //   tmp2 = ch2 - tmp1;                          // see ALGO1
4527   //   ch2 = ch2 | str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff; // see ALGO1
4528   //   if (str1_isL != str2_isL) ch1 = v1[0];
4529   // L_SMALL_PROCEED:                              // common code for handling tail of L_LOOP and L_SMALL: scanning less than 8 bytes
4530   //   tmp4 = tmp4 >> cnt2;                        // mask with useless bits set to 0
4531   //   tmp2 = tmp2 & !ch2;                         // see ALGO1
4532   //   tmp2 = tmp2 & tmp4;                         // clear match info in useless bits
4533   //   bool noMatch3 = (tmp2 == 0);                // kept in flags
4534   //   tmp2 = REVERSE_BITS(tmp2);                  // need backward bits order for COUNT_LEADING_ZEROES instruction
4535   //   // NB: after reversing bits, higher bytes represent earlier characters
4536   //   // match info. And instead of 0x80(Latin1) and 0x8000(UTF16) entries for
4537   //   // matching symbols now it is 0x1 in any matching character place
4538   //   if (noMatch3) goto NOMATCH;
4539   // L_SMALL_HAS_ZERO_LOOP:                        // Case of less than 8 bytes were scanned for C0: possible match with C0 found. Check if whole pattern match.
4540   //   // NB: COUT_LEADING_ZEROES below return amount of bits until first
4541   //   // match symbol + <bits in character until lowest bit> (because lowest
4542   //   // bit set in matched symbol place). Then, resulting value is:
4543   //   // match_index * BitsPerCharacter + (BitsPerCharactere-1). Then, shifting
4544   //   // this value right by BitsPerCharacter will result to matching index
4545   //   // within currently considered data chunk
4546   //   tmp4 = COUNT_LEADING_ZEROES(tmp2);
4547   //   bool patternIsShort = (cnt1 <= wordSize/str2_chr_size); // kept in flags
4548   //   if (patternIsShort) goto L_SMALL_CMP_LOOP_LAST_CMP2;
4549   //   if (str2_isL) {
4550   //     str2 = str2 + tmp4 >> LogBitsPerByte;     // source address at matched index
4551   //     ch2 = LOAD8(str2);                        // read first 8 bytes of source string starting from matched symbol. We need to recheck 1st pattern symbol, because it still can be non-matching symbol
4552   //     tmp2 = tmp2 << tmp4;                      // shift off all leading zeroes in match info
4553   //     result = result + tmp4 >> LogBitsPerBytes;// update result to reflect currently checked match index
4554   //     tmp2 = tmp2 << 1;                         // shift off matching bit
4555   //   } else {
4556   //     // NB: calculation of byte offset in source string for UTF16 case:
4557   //     // tmp4 is <amount of bits until first matched symbol> + 15.
4558   //     // Amount of bytes until bit with match information is tmp4 >> LogBitsPerByte.
4559   //     // This amount of bytes is always odd for UTF-16 case and last counted
4560   //     // byte is part of matching symbol. Need to clear lowest bit then to
4561   //     // get amount of bytes until matched character. Then
4562   //     // (tmp4 >> LogBitsPerByte) && 0xE clears lowest bit.
4563   //     ch2 = 0xE;                                // all bits in byte except lowest one are set
4564   //     ch2 = ch2 & tmp4 >> LogBitsPerByte;       // byte offset of matched character
4565   //     ch2 = LOAD8(str2, ch2);                   // load 8 bytes from str2 with ch2 offset. Recheck 1st pattern symbol, because it still can be non-matching symbol
4566   //     tmp2 = tmp2 << tmp4;                      // shift off all leading zeroes in match info
4567   //     result = result + tmp4 >> LogBitsPerBytes + str2_chr_shift; // update result to reflect currently checked match index
4568   //     str2 = str2 + tmp4 >> LogBitsPerByte + str2_chr_shift; // update str2 by adding matched char index (will need to add once more to have byte offset added)
4569   //     tmp2 = tmp2 << 1;                         // shift off matching bit
4570   //     str2 = str2 + tmp4 >> LogBitsPerByte + str2_chr_shift; // now str2 points to matched character address
4571   //   }
4572   //   bool first8BytesNotMatch = (ch1 != ch2);    // kept in flags
4573   //   tmp4 = wordSize/str_chr_size;               // index of next character to check match
4574   //   if (first8BytesNotMatch) goto L_SMALL_CMP_LOOP_NOMATCH;
4575   // L_SMALL_CMP_LOOP:                             // Case of less than 8 bytes were scanned for C0: first 8 bytes of pattern already matched. Proceed to per-character match until the end of pattern
4576   //   // NB: No need in "first" values anymore because it's a small case and
4577   //   // we already checked all first symbol matches. Reuse it.
4578   //   first = (str1_isL) ? LOAD_LATIN1_CHAR(str1, tmp4) : LOAD_UTF16_CHAR(str1, tmp4 << str1_chr_shift); // next pattern symbol to check
4579   //   ch2 = (str2_isL) ? LOAD_LATIN1_CHAR(str2, tmp4) : LOAD_UTF16_CHAR(str2, tmp4 << str2_chr_shift); // next source symbol to check
4580   //   tmp4 = tmp4 + 1;                            // update index to check next
4581   //   bool isLastSymbol = (tmp4 >= cnt1);         // kept in flags
4582   //   if (isLastSymbol) goto L_SMALL_CMP_LOOP_LAST_CMP;
4583   //   bool currentSymbolMatch = (first == ch2);   // kept in flags
4584   //   if (currentSymbolMatch) goto L_SMALL_CMP_LOOP;
4585   // L_SMALL_CMP_LOOP_NOMATCH:                     // Case of less than 8 bytes were scanned for C0: current full match attempt failed.
4586   //   if (tmp2 == 0) NOMATCH;                     // no more first symbol matches found
4587   //   tmp4 = COUNT_LEADING_ZEROES(tmp2);          // find index of matching bit
4588   //   result = result + 1;                        // increase result by 1, because current match fail. Now possible result starts from next symbol
4589   //   str2 = str2 + str_chr_size;                 // advance source pointer, because current match fail. Now possible match checks starts from next symbol
4590   //   goto L_SMALL_HAS_ZERO_LOOP;
4591   // L_SMALL_CMP_LOOP_LAST_CMP:                    // Case of less than 8 bytes were scanned for C0: 1 character left to be checked until full match
4592   //   bool lastCharacterNotMatch = (first != ch2);// kept in flags
4593   //   if (lastCharacterNotMatch) goto L_SMALL_CMP_LOOP_NOMATCH;
4594   //   goto DONE;
4595   // L_SMALL_CMP_LOOP_LAST_CMP2:                   // Case when string size is 8 bytes. No need in internal per-character loop
4596   //   if (str2_isL) {
4597   //     str2 = str2 + tmp4 >> LogBitsPerByte;     // source address at matched index
4598   //     ch2 = LOAD8(str2);                        // read first 8 bytes of source string starting from matched symbol. Recheck 1st pattern symbol, because it still can be non-matching symbol
4599   //     tmp2 = tmp2 << tmp4;                      // shift off all leading zeroes in match info
4600   //     result = result + tmp4 >> LogBitsPerBytes;// update result to reflect currently checked match index
4601   //     tmp2 = tmp2 << 1;                         // shift off matching bit
4602   //   } else {
4603   //     ch2 = 0xE;                                // all bits in byte except lowest one are set
4604   //     ch2 = ch2 & tmp4 >> LogBitsPerByte;       // byte offset of matched character
4605   //     ch2 = LOAD8(str2, ch2);                   // load 8 bytes from str2 with ch2 offset. Recheck 1st pattern symbol, because it still can be non-matching symbol
4606   //     tmp2 = tmp2 << tmp4;                      // shift off all leading zeroes in match info
4607   //     result = result + tmp4 >> LogBitsPerBytes + str2_chr_shift; // update result to reflect currently checked match index
4608   //     str2 = str2 + tmp4 >> LogBitsPerByte + str2_chr_shift; // update str2 by adding matched char index (will need to add once more to have byte offset added)
4609   //     tmp2 = tmp2 << 1;                         // shift off matching bit
4610   //     str2 = str2 + tmp4 >> LogBitsPerByte + str2_chr_shift; // now str2 points to matched character address
4611   //   }
4612   //   bool notMatching8Bytes = (ch1 != ch2);       // kept in flags
4613   //   if (notMatching8Bytes) goto L_SMALL_CMP_LOOP_NOMATCH;
4614   //   goto DONE;
4615   // L_HAS_ZERO:                                   // main L_LOOP found C0 match candidate(s) in current 8-byte block
4616   //   tmp2 = REVERSE_BITS(tmp2);
4617   //   tmp4 = COUNT_LEADING_ZEROES(tmp2);
4618   //   // Perform compression of counters(cnt1 and cnt2). Both are 64-bit
4619   //   // registers holding 32-bit values. Then it can be compressed into one
4620   //   // 64-bit register. Store low 32 bits of cnt1 into high 32 bits of cnt2.
4621   //   // This way cnt1 register can be reused and restored after. old cnt1
4622   //   // value still can be accessed as cnt2 >> 32.
4623   //   cnt2 = cnt2 | cnt2 << 32;
4624   //   // NB: loop (L_HAS_ZERO_LOOP) below iterates through first symbol match
4625   //   // info in tmp2 until full pattern match is met. Result value between
4626   //   // iterations should be increased by <match index> + 1. That's why
4627   //   // pre-condition for 1st iteration is to reduce result by 1. This will
4628   //   // make loop simpler.
4629   //   result = result - 1;
4630   // L_HAS_ZERO_LOOP:                              // Loop over all C0 match candidates found in current 8-byte block
4631   //   // NB: code is this L_HAS_ZERO_LOOP code path is mostly same as in
4632   //   // L_SMALL_HAS_ZERO_LOOP, but we need more registers to keep intermediate
4633   //   // data for this long case(like, keeping cloned first symbol in "first").
4634   //   // Because of that this code has slightly different registers usage with
4635   //   // data compression described above and respective data usage.
4636   //   cnt1 = wordSize/str2_chr_size;
4637   //   bool only8BytesToCompare = (cnt1 >= cnt2 >> 32); // kept in flag
4638   //   if (only8BytesToCompare) goto L_CMP_LOOP_LAST_CMP2;
4639   //   // if-else blocks below has a number of same instructions, but due to
4640   //   // slightly different logic it is pipelined differently.
4641   //   if (str2_isL) {
4642   //     ch2 = tmp4 >> (LogBitsPerByte + str2_chr_shift); // char index
4643   //     ch2 = LOAD8(str2, ch2);                   // Need to recheck 1st pattern symbol, because it still can be non-matching symbol
4644   //     tmp2 = tmp2 << tmp4;                      // shift off leading zeroes
4645   //     str2 = str2 + tmp4 >> (LogBitsPerBytes + str2_chr_shift); // increase str2 by <match index> to have start address of currently checked match combination
4646   //     tmp4 = tmp4 + 1;                          // tmp4 was 8*<match index> + 7. Now it 8 * (<match index> + 1)
4647   //     result = result + tmp4 >> (LogBitsPerByte + str2_chr_shift); // result += (<match index> + 1)
4648   //     tmp = tmp << 1;                           // shift of matching bit
4649   //     tmp4 = wordSize/str2_chr_size;            // index of next character to check
4650   //   } else {
4651   //     ch2 = 0xE;
4652   //     ch2 = ch2 & tmp4 >> LogBitsPerByte;
4653   //     ch2 = LOAD8(str2, ch2);                   // Need to recheck 1st pattern symbol, because it still can be non-matching symbol
4654   //     tmp2 = tmp2 << tmp4;
4655   //     // NB: tmp4 >> LogBitsPerByte will return <match index> * str2_chr_size + 1,
4656   //     // which is offset inside matched 2-byte character. Calculate offset of
4657   //     // next character and then step back by 1 character. Since tmp4 is
4658   //     // 16*<match index> + 15, we can add 1 for tmp4 to become 16 * (<match index> + 1)
4659   //     // which is bit offset of next character. This value is also used to updated
4660   //     // "result" by (<match index> + 1).
4661   //     tmp4 = tmp4 + 1;                          // tmp4 was 16*<match index> + 15. Now it is 16 * (<match index> + 1)
4662   //     result = result + tmp4 >> (LogBitsPerByte + str2_chr_shift);
4663   //     str2 = str2 + tmp4 >> LogBitsPerByte;     // advance str2 pointer by (<match index> + 1) characters. Will return by 1 character back later to have start address of currently checked match combination
4664   //     tmp2 = tmp2 << 1                          // shift off matching bit
4665   //     tmp4 = wordSize/str2_chr_size;            // index of next character to check
4666   //     str2 = str2 - str2_chr_size;              // return by 1 character back as promised above
4667   //   }
4668   //   bool notMatching8Bytes2 = (ch1 != ch2);     // kept in flags
4669   //   tmp4 = wordSize/str2_chr_size;              // index of next character to check
4670   //   if (notMatching8Bytes2) goto L_CMP_LOOP_NOMATCH;
4671   // L_CMP_LOOP:                                   // Per-character loop to check full pattern match of current match candidate
4672   //   cnt1 = (str1_isL) ? LOAD_LATIN1_CHAR(str1, tmp4) : LOAD_UTF16_CHAR(str1, tmp4 << str1_chr_shift); // next pattern symbol to check
4673   //   ch2 = (str2_isL) ? LOAD_LATIN1_CHAR(str2, tmp4) : LOAD_UTF16_CHAR(str2, tmp4 << str2_chr_shift); // next source symbol to check
4674   //   tmp4 = tmp4 + 1; // update index
4675   //   bool isLastSymbol2 = (tmp4 >= cnt2 >> 32);  // kept in flags
4676   //   if (isLastSymbol2) goto L_CMP_LOOP_LAST_CMP;
4677   //   bool currentSymbolMatch2 = (cnt1 == ch2);   // kept in flags
4678   //   if (currentSymbolMatch2) goto L_CMP_LOOP;
4679   // L_CMP_LOOP_NOMATCH:                           // current match candidate check failed
4680   //   if (tmp2 == 0) goto L_HAS_ZERO_LOOP_NOMATCH;// no more match found in current byte octet
4681   //   tmp4 = COUNT_LEADING_ZERO(tmp2);            // find next match bit index
4682   //   str2 = str2 + str2_chr_size;                // next match attempt begins from next start character. update str2 pointer
4683   //   goto L_HAS_ZERO_LOOP;
4684   // L_CMP_LOOP_LAST_CMP:                          // checking last character until full pattern match
4685   //   bool lastCharacterNotMatch2 = (cnt1 != ch2);// kept in flags
4686   //   if (lastCharacterNotMatch2) goto L_CMP_LOOP_NOMATCH;
4687   //   goto DONE;
4688   // L_CMP_LOOP_LAST_CMP2:                         // L_LOOP found match and pattern string is 8 bytes. First compare of 8 bytes is the last one for current match candidate.
4689   //   if (str2_isL) {
4690   //     ch2 = tmp4 >> (LogBitsPerByte + str2_chr_shift); // match char index
4691   //     ch2 = LOAD8(str2, ch2);                   // read whole match candidate. Recheck 1st pattern symbol, because it still can be non-matching symbol
4692   //     tmp2 = tmp2 << tmp4;                      // shift off leading zeroes
4693   //     str2 = str2 + tmp4 >> (LogBitsPerByte + str2_chr_shift); // str2 now points to matched 1st character
4694   //     tmp4 = tmp4 + 1;                          // <match bit index + 1>
4695   //     result = result + tmp4 >> (LogBitsPerByte + str2_chr_shift);
4696   //     tmp2 = tmp2 << 1;                         // shift off matching bit
4697   //   } else {
4698   //     ch2 = 0xE;
4699   //     ch2 = ch2 & tmp4 >> LogBitsPerByte;       // byte offset
4700   //     ch2 = LOAD8(str2, ch2);                   // Recheck 1st pattern symbol, because it still can be non-matching symbol
4701   //     tmp2 = tmp2 << tmp4;                      // shift off leading zeroes
4702   //     // NB: tmp4 >> LogBitsPerByte will return <match index> * str2_chr_size + 1,
4703   //     // which is offset inside matched 2-byte character. Calculate offset of
4704   //     // next character and then step back by 1 character. Since tmp4 is
4705   //     // 16*<match index> + 15, we can add 1 for tmp4 to become 16 * (<match index> + 1)
4706   //     // which is bit offset of next character. This value is also used to updated
4707   //     // "result" by (<match index> + 1).
4708   //     tmp4 = tmp4 + 1;                          // <match bit index + 1>
4709   //     result = result + tmp4 >> (LogBitsPerByte + str2_chr_shift);
4710   //     str2 = str2 + tmp4 >> LogBitsPerByte;     // now points to character after 1st matched one
4711   //     tmp2 = tmp2 << 1;                         // shift off matching bit
4712   //     str2 = str2 - str2_chr_size;              // points to matched 1st character
4713   //   }
4714   //   bool notMatching8Bytes3 = (ch1 != ch2);
4715   //   if (notMatching8Bytes3) goto L_CMP_LOOP_NOMATCH;
4716   //   goto DONE;
4717   // L_HAS_ZERO_LOOP_NOMATCH:                      // Current match candidate check for full match failed. Restore state for further work.
4718   //  // Current octet has no more matches candidates. Restore following data:
4719   //  // 1) result index. Index was wordSize/str2_chr_size * N until
4720   //  // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, then
4721   //  // result was increased at max by wordSize/str2_chr_size - 1. Reset these
4722   //  // bits to original state then. Clear 2 lower bits for UTF16 case and
4723   //  // lower 3 bits for Latin1 case.
4724   //  // 2) restore cnt1 and cnt2 values from "compressed" cnt2
4725   //  // 3) advance str2 to represent next str2 octet. result & 7 (Latin1)
4726   //  // and result & 3 (UTF-16) is index of last analyzed substring inside
4727   //  // current octet. str2 points to this character. Need to update it to
4728   //  // point at next octet. Increase by octet is done in a loop we're
4729   //  // returning to (L_LOOP). Then we need to reset str2 here to current octet
4730   //  // address. Resetting bits in a same manner as result value won't work
4731   //  // because address can be unaligned. Then we have to calculate exact value
4732   //  // to subtract from str2. It's <last match index inside octet> << str2_chr_shift
4733   //  tmp2 = result & (wordSize/str2_chr_size - 1); // last analyzed character index inside octet
4734   //  cnt2 = cnt2 >> 32;                           // uncompress cnt1
4735   //  // NB: ZERO_LOW_BITS(param1, param2) below is zeroing lowest bits of
4736   //  // param1 from bit number 0 to bit number param2 inclusive. Implemented
4737   //  // as bfm instruction
4738   //  ZERO_LOW_BITS(result, 2 - str2_chr_shift);
4739   //  str2 = str2 - tmp2 << str2_chr_shift;
4740   //  cnt2 = cnt2 & 0xFFFFFFFF;                    // uncompress cnt2
4741   //  goto L_LOOP_PROCEED;
4742   // NOMATCH:
4743   //   result = -1;
4744   // DONE:
4745   //   POP_FROM_STACK(tmp1, tmp2, tmp3, tmp4);      // restore temporary register values
4746   //   return;
4747   address generate_string_indexof_linear(bool str1_isL, bool str2_isL) {
4748     const char* stubName = str1_isL
4749         ? (str2_isL ? "indexof_linear_ll" : "indexof_linear_ul")
4750         : "indexof_linear_uu";
4751     __ align(CodeEntryAlignment);
4752     StubCodeMark mark(this, "StubRoutines", stubName);
4753     address entry = __ pc();
4754 
4755     int str1_chr_size = str1_isL ? 1 : 2;
4756     int str2_chr_size = str2_isL ? 1 : 2;
4757     int str1_chr_shift = str1_isL ? 0 : 1;
4758     int str2_chr_shift = str2_isL ? 0 : 1;
4759     bool isL = str1_isL && str2_isL;
4760    // parameters
4761     Register result = r0, str2 = r1, cnt1 = r2, str1 = r3, cnt2 = r4;
4762     // temporary registers
4763     Register tmp1 = r20, tmp2 = r21, tmp3 = r22, tmp4 = r23;
4764     RegSet spilled_regs = RegSet::range(tmp1, tmp4);
4765     // redefinitions
4766     Register ch1 = rscratch1, ch2 = rscratch2, first = tmp3;
4767 
4768     __ push(spilled_regs, sp);
4769     Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
4770         L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
4771         L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
4772         L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
4773         L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
4774         L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
4775     // Read whole register from str1. It is safe, because length >=8 here
4776     __ ldr(ch1, Address(str1));
4777     // Read whole register from str2. It is safe, because length >=8 here
4778     __ ldr(ch2, Address(str2));
4779     __ sub(cnt2, cnt2, cnt1);
4780     __ andr(first, ch1, str1_isL ? 0xFF : 0xFFFF);
4781     if (str1_isL != str2_isL) {
4782       __ eor(v0, __ T16B, v0, v0);
4783     }
4784     __ mov(tmp1, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
4785     __ mul(first, first, tmp1);
4786     // check if we have less than 1 register to check
4787     __ subs(cnt2, cnt2, wordSize/str2_chr_size - 1);
4788     if (str1_isL != str2_isL) {
4789       __ fmovd(v1, ch1);
4790     }
4791     __ br(__ LE, L_SMALL);
4792     __ eor(ch2, first, ch2);
4793     if (str1_isL != str2_isL) {
4794       __ zip1(v1, __ T16B, v1, v0);
4795     }
4796     __ sub(tmp2, ch2, tmp1);
4797     __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4798     __ bics(tmp2, tmp2, ch2);
4799     if (str1_isL != str2_isL) {


< prev index next >