jdk-jdk Sdiff src/hotspot/cpu/aarch64

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

4340   chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
4341                                       (chr_insn)&MacroAssembler::ldrh;
4342   chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
4343   chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
4344 
4345   // Note, inline_string_indexOf() generates checks:
4346   // if (substr.count > string.count) return -1;
4347   // if (substr.count == 0) return 0;
4348 
4349   // We have two strings, a source string in str2, cnt2 and a pattern string
4350   // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
4351 
4352   // For larger pattern and source we use a simplified Boyer Moore algorithm.
4353   // With a small pattern and source we use linear scan.
4354 
4355   if (icnt1 == -1) {
4356     sub(result_tmp, cnt2, cnt1);
4357     cmp(cnt1, 8);             // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
4358     br(LT, LINEARSEARCH);
4359     dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
4360     cmp(cnt1, 256);
4361     lsr(tmp1, cnt2, 2);
4362     ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
4363     br(GE, LINEARSTUB);
4364   }
4365 
4366 // The Boyer Moore alogorithm is based on the description here:-
4367 //
4368 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
4369 //
4370 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
4371 // and the 'Good Suffix' rule.
4372 //
4373 // These rules are essentially heuristics for how far we can shift the
4374 // pattern along the search string.
4375 //
4376 // The implementation here uses the 'Bad Character' rule only because of the
4377 // complexity of initialisation for the 'Good Suffix' rule.
4378 //
4379 // This is also known as the Boyer-Moore-Horspool algorithm:-
4380 //

4446     const int firstStep = isL ? 7 : 3;
4447 
4448     const int ASIZE = 256;
4449     const int STORED_BYTES = 32; // amount of bytes stored per instruction
4450     sub(sp, sp, ASIZE);
4451     mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
4452     mov(ch1, sp);
4453     BIND(BM_INIT_LOOP);
4454       stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
4455       subs(tmp5, tmp5, 1);
4456       br(GT, BM_INIT_LOOP);
4457 
4458       sub(cnt1tmp, cnt1, 1);
4459       mov(tmp5, str2);
4460       add(str2end, str2, result_tmp, LSL, str2_chr_shift);
4461       sub(ch2, cnt1, 1);
4462       mov(tmp3, str1);
4463     BIND(BCLOOP);
4464       (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
4465       if (!str1_isL) {
4466         cmp(ch1, ASIZE);
4467         br(HS, BCSKIP);
4468       }
4469       strb(ch2, Address(sp, ch1));
4470     BIND(BCSKIP);
4471       subs(ch2, ch2, 1);
4472       br(GT, BCLOOP);
4473 
4474       add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
4475       if (str1_isL == str2_isL) {
4476         // load last 8 bytes (8LL/4UU symbols)
4477         ldr(tmp6, Address(tmp6, -wordSize));
4478       } else {
4479         ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
4480         // convert Latin1 to UTF. We'll have to wait until load completed, but
4481         // it's still faster than per-character loads+checks
4482         lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
4483         ubfx(ch1, tmp6, 8, 8); // str1[N-2]
4484         ubfx(ch2, tmp6, 16, 8); // str1[N-3]
4485         andr(tmp6, tmp6, 0xFF); // str1[N-4]
4486         orr(ch2, ch1, ch2, LSL, 16);

4510         b(BMLOOPSTR1_CMP);
4511       }
4512     BIND(BMLOOPSTR1);
4513       (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
4514       (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
4515     BIND(BMLOOPSTR1_AFTER_LOAD);
4516       subs(cnt1tmp, cnt1tmp, 1);
4517       br(LT, BMLOOPSTR1_LASTCMP);
4518     BIND(BMLOOPSTR1_CMP);
4519       cmp(ch1, ch2);
4520       br(EQ, BMLOOPSTR1);
4521     BIND(BMSKIP);
4522       if (!isL) {
4523         // if we've met UTF symbol while searching Latin1 pattern, then we can
4524         // skip cnt1 symbols
4525         if (str1_isL != str2_isL) {
4526           mov(result_tmp, cnt1);
4527         } else {
4528           mov(result_tmp, 1);
4529         }
4530         cmp(skipch, ASIZE);
4531         br(HS, BMADV);
4532       }
4533       ldrb(result_tmp, Address(sp, skipch)); // load skip distance
4534     BIND(BMADV);
4535       sub(cnt1tmp, cnt1, 1);
4536       add(str2, str2, result_tmp, LSL, str2_chr_shift);
4537       cmp(str2, str2end);
4538       br(LE, BMLOOPSTR2);
4539       add(sp, sp, ASIZE);
4540       b(NOMATCH);
4541     BIND(BMLOOPSTR1_LASTCMP);
4542       cmp(ch1, ch2);
4543       br(NE, BMSKIP);
4544     BIND(BMMATCH);
4545       sub(result, str2, tmp5);
4546       if (!str2_isL) lsr(result, result, 1);
4547       add(sp, sp, ASIZE);
4548       b(DONE);
4549 
4550     BIND(LINEARSTUB);

4340   chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
4341                                       (chr_insn)&MacroAssembler::ldrh;
4342   chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
4343   chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
4344 
4345   // Note, inline_string_indexOf() generates checks:
4346   // if (substr.count > string.count) return -1;
4347   // if (substr.count == 0) return 0;
4348 
4349   // We have two strings, a source string in str2, cnt2 and a pattern string
4350   // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
4351 
4352   // For larger pattern and source we use a simplified Boyer Moore algorithm.
4353   // With a small pattern and source we use linear scan.
4354 
4355   if (icnt1 == -1) {
4356     sub(result_tmp, cnt2, cnt1);
4357     cmp(cnt1, 8);             // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
4358     br(LT, LINEARSEARCH);
4359     dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
4360     subs(zr, cnt1, 256);
4361     lsr(tmp1, cnt2, 2);
4362     ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
4363     br(GE, LINEARSTUB);
4364   }
4365 
4366 // The Boyer Moore alogorithm is based on the description here:-
4367 //
4368 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
4369 //
4370 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
4371 // and the 'Good Suffix' rule.
4372 //
4373 // These rules are essentially heuristics for how far we can shift the
4374 // pattern along the search string.
4375 //
4376 // The implementation here uses the 'Bad Character' rule only because of the
4377 // complexity of initialisation for the 'Good Suffix' rule.
4378 //
4379 // This is also known as the Boyer-Moore-Horspool algorithm:-
4380 //

4446     const int firstStep = isL ? 7 : 3;
4447 
4448     const int ASIZE = 256;
4449     const int STORED_BYTES = 32; // amount of bytes stored per instruction
4450     sub(sp, sp, ASIZE);
4451     mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
4452     mov(ch1, sp);
4453     BIND(BM_INIT_LOOP);
4454       stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
4455       subs(tmp5, tmp5, 1);
4456       br(GT, BM_INIT_LOOP);
4457 
4458       sub(cnt1tmp, cnt1, 1);
4459       mov(tmp5, str2);
4460       add(str2end, str2, result_tmp, LSL, str2_chr_shift);
4461       sub(ch2, cnt1, 1);
4462       mov(tmp3, str1);
4463     BIND(BCLOOP);
4464       (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
4465       if (!str1_isL) {
4466         subs(zr, ch1, ASIZE);
4467         br(HS, BCSKIP);
4468       }
4469       strb(ch2, Address(sp, ch1));
4470     BIND(BCSKIP);
4471       subs(ch2, ch2, 1);
4472       br(GT, BCLOOP);
4473 
4474       add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
4475       if (str1_isL == str2_isL) {
4476         // load last 8 bytes (8LL/4UU symbols)
4477         ldr(tmp6, Address(tmp6, -wordSize));
4478       } else {
4479         ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
4480         // convert Latin1 to UTF. We'll have to wait until load completed, but
4481         // it's still faster than per-character loads+checks
4482         lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
4483         ubfx(ch1, tmp6, 8, 8); // str1[N-2]
4484         ubfx(ch2, tmp6, 16, 8); // str1[N-3]
4485         andr(tmp6, tmp6, 0xFF); // str1[N-4]
4486         orr(ch2, ch1, ch2, LSL, 16);

4510         b(BMLOOPSTR1_CMP);
4511       }
4512     BIND(BMLOOPSTR1);
4513       (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
4514       (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
4515     BIND(BMLOOPSTR1_AFTER_LOAD);
4516       subs(cnt1tmp, cnt1tmp, 1);
4517       br(LT, BMLOOPSTR1_LASTCMP);
4518     BIND(BMLOOPSTR1_CMP);
4519       cmp(ch1, ch2);
4520       br(EQ, BMLOOPSTR1);
4521     BIND(BMSKIP);
4522       if (!isL) {
4523         // if we've met UTF symbol while searching Latin1 pattern, then we can
4524         // skip cnt1 symbols
4525         if (str1_isL != str2_isL) {
4526           mov(result_tmp, cnt1);
4527         } else {
4528           mov(result_tmp, 1);
4529         }
4530         subs(zr, skipch, ASIZE);
4531         br(HS, BMADV);
4532       }
4533       ldrb(result_tmp, Address(sp, skipch)); // load skip distance
4534     BIND(BMADV);
4535       sub(cnt1tmp, cnt1, 1);
4536       add(str2, str2, result_tmp, LSL, str2_chr_shift);
4537       cmp(str2, str2end);
4538       br(LE, BMLOOPSTR2);
4539       add(sp, sp, ASIZE);
4540       b(NOMATCH);
4541     BIND(BMLOOPSTR1_LASTCMP);
4542       cmp(ch1, ch2);
4543       br(NE, BMSKIP);
4544     BIND(BMMATCH);
4545       sub(result, str2, tmp5);
4546       if (!str2_isL) lsr(result, result, 1);
4547       add(sp, sp, ASIZE);
4548       b(DONE);
4549 
4550     BIND(LINEARSTUB);

< prev index next >