4340 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
4341 (chr_insn)&MacroAssembler::ldrh;
4342 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
4343 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
4344
4345 // Note, inline_string_indexOf() generates checks:
4346 // if (substr.count > string.count) return -1;
4347 // if (substr.count == 0) return 0;
4348
4349 // We have two strings, a source string in str2, cnt2 and a pattern string
4350 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
4351
4352 // For larger pattern and source we use a simplified Boyer Moore algorithm.
4353 // With a small pattern and source we use linear scan.
4354
4355 if (icnt1 == -1) {
4356 sub(result_tmp, cnt2, cnt1);
4357 cmp(cnt1, 8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
4358 br(LT, LINEARSEARCH);
4359 dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
4360 cmp(cnt1, 256);
4361 lsr(tmp1, cnt2, 2);
4362 ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
4363 br(GE, LINEARSTUB);
4364 }
4365
4366 // The Boyer Moore alogorithm is based on the description here:-
4367 //
4368 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
4369 //
4370 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
4371 // and the 'Good Suffix' rule.
4372 //
4373 // These rules are essentially heuristics for how far we can shift the
4374 // pattern along the search string.
4375 //
4376 // The implementation here uses the 'Bad Character' rule only because of the
4377 // complexity of initialisation for the 'Good Suffix' rule.
4378 //
4379 // This is also known as the Boyer-Moore-Horspool algorithm:-
4380 //
4446 const int firstStep = isL ? 7 : 3;
4447
4448 const int ASIZE = 256;
4449 const int STORED_BYTES = 32; // amount of bytes stored per instruction
4450 sub(sp, sp, ASIZE);
4451 mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
4452 mov(ch1, sp);
4453 BIND(BM_INIT_LOOP);
4454 stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
4455 subs(tmp5, tmp5, 1);
4456 br(GT, BM_INIT_LOOP);
4457
4458 sub(cnt1tmp, cnt1, 1);
4459 mov(tmp5, str2);
4460 add(str2end, str2, result_tmp, LSL, str2_chr_shift);
4461 sub(ch2, cnt1, 1);
4462 mov(tmp3, str1);
4463 BIND(BCLOOP);
4464 (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
4465 if (!str1_isL) {
4466 cmp(ch1, ASIZE);
4467 br(HS, BCSKIP);
4468 }
4469 strb(ch2, Address(sp, ch1));
4470 BIND(BCSKIP);
4471 subs(ch2, ch2, 1);
4472 br(GT, BCLOOP);
4473
4474 add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
4475 if (str1_isL == str2_isL) {
4476 // load last 8 bytes (8LL/4UU symbols)
4477 ldr(tmp6, Address(tmp6, -wordSize));
4478 } else {
4479 ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
4480 // convert Latin1 to UTF. We'll have to wait until load completed, but
4481 // it's still faster than per-character loads+checks
4482 lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
4483 ubfx(ch1, tmp6, 8, 8); // str1[N-2]
4484 ubfx(ch2, tmp6, 16, 8); // str1[N-3]
4485 andr(tmp6, tmp6, 0xFF); // str1[N-4]
4486 orr(ch2, ch1, ch2, LSL, 16);
4510 b(BMLOOPSTR1_CMP);
4511 }
4512 BIND(BMLOOPSTR1);
4513 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
4514 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
4515 BIND(BMLOOPSTR1_AFTER_LOAD);
4516 subs(cnt1tmp, cnt1tmp, 1);
4517 br(LT, BMLOOPSTR1_LASTCMP);
4518 BIND(BMLOOPSTR1_CMP);
4519 cmp(ch1, ch2);
4520 br(EQ, BMLOOPSTR1);
4521 BIND(BMSKIP);
4522 if (!isL) {
4523 // if we've met UTF symbol while searching Latin1 pattern, then we can
4524 // skip cnt1 symbols
4525 if (str1_isL != str2_isL) {
4526 mov(result_tmp, cnt1);
4527 } else {
4528 mov(result_tmp, 1);
4529 }
4530 cmp(skipch, ASIZE);
4531 br(HS, BMADV);
4532 }
4533 ldrb(result_tmp, Address(sp, skipch)); // load skip distance
4534 BIND(BMADV);
4535 sub(cnt1tmp, cnt1, 1);
4536 add(str2, str2, result_tmp, LSL, str2_chr_shift);
4537 cmp(str2, str2end);
4538 br(LE, BMLOOPSTR2);
4539 add(sp, sp, ASIZE);
4540 b(NOMATCH);
4541 BIND(BMLOOPSTR1_LASTCMP);
4542 cmp(ch1, ch2);
4543 br(NE, BMSKIP);
4544 BIND(BMMATCH);
4545 sub(result, str2, tmp5);
4546 if (!str2_isL) lsr(result, result, 1);
4547 add(sp, sp, ASIZE);
4548 b(DONE);
4549
4550 BIND(LINEARSTUB);
|
4340 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
4341 (chr_insn)&MacroAssembler::ldrh;
4342 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
4343 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
4344
4345 // Note, inline_string_indexOf() generates checks:
4346 // if (substr.count > string.count) return -1;
4347 // if (substr.count == 0) return 0;
4348
4349 // We have two strings, a source string in str2, cnt2 and a pattern string
4350 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
4351
4352 // For larger pattern and source we use a simplified Boyer Moore algorithm.
4353 // With a small pattern and source we use linear scan.
4354
4355 if (icnt1 == -1) {
4356 sub(result_tmp, cnt2, cnt1);
4357 cmp(cnt1, 8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
4358 br(LT, LINEARSEARCH);
4359 dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
4360 subs(zr, cnt1, 256);
4361 lsr(tmp1, cnt2, 2);
4362 ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
4363 br(GE, LINEARSTUB);
4364 }
4365
4366 // The Boyer Moore alogorithm is based on the description here:-
4367 //
4368 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
4369 //
4370 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
4371 // and the 'Good Suffix' rule.
4372 //
4373 // These rules are essentially heuristics for how far we can shift the
4374 // pattern along the search string.
4375 //
4376 // The implementation here uses the 'Bad Character' rule only because of the
4377 // complexity of initialisation for the 'Good Suffix' rule.
4378 //
4379 // This is also known as the Boyer-Moore-Horspool algorithm:-
4380 //
4446 const int firstStep = isL ? 7 : 3;
4447
4448 const int ASIZE = 256;
4449 const int STORED_BYTES = 32; // amount of bytes stored per instruction
4450 sub(sp, sp, ASIZE);
4451 mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
4452 mov(ch1, sp);
4453 BIND(BM_INIT_LOOP);
4454 stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
4455 subs(tmp5, tmp5, 1);
4456 br(GT, BM_INIT_LOOP);
4457
4458 sub(cnt1tmp, cnt1, 1);
4459 mov(tmp5, str2);
4460 add(str2end, str2, result_tmp, LSL, str2_chr_shift);
4461 sub(ch2, cnt1, 1);
4462 mov(tmp3, str1);
4463 BIND(BCLOOP);
4464 (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
4465 if (!str1_isL) {
4466 subs(zr, ch1, ASIZE);
4467 br(HS, BCSKIP);
4468 }
4469 strb(ch2, Address(sp, ch1));
4470 BIND(BCSKIP);
4471 subs(ch2, ch2, 1);
4472 br(GT, BCLOOP);
4473
4474 add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
4475 if (str1_isL == str2_isL) {
4476 // load last 8 bytes (8LL/4UU symbols)
4477 ldr(tmp6, Address(tmp6, -wordSize));
4478 } else {
4479 ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
4480 // convert Latin1 to UTF. We'll have to wait until load completed, but
4481 // it's still faster than per-character loads+checks
4482 lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
4483 ubfx(ch1, tmp6, 8, 8); // str1[N-2]
4484 ubfx(ch2, tmp6, 16, 8); // str1[N-3]
4485 andr(tmp6, tmp6, 0xFF); // str1[N-4]
4486 orr(ch2, ch1, ch2, LSL, 16);
4510 b(BMLOOPSTR1_CMP);
4511 }
4512 BIND(BMLOOPSTR1);
4513 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
4514 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
4515 BIND(BMLOOPSTR1_AFTER_LOAD);
4516 subs(cnt1tmp, cnt1tmp, 1);
4517 br(LT, BMLOOPSTR1_LASTCMP);
4518 BIND(BMLOOPSTR1_CMP);
4519 cmp(ch1, ch2);
4520 br(EQ, BMLOOPSTR1);
4521 BIND(BMSKIP);
4522 if (!isL) {
4523 // if we've met UTF symbol while searching Latin1 pattern, then we can
4524 // skip cnt1 symbols
4525 if (str1_isL != str2_isL) {
4526 mov(result_tmp, cnt1);
4527 } else {
4528 mov(result_tmp, 1);
4529 }
4530 subs(zr, skipch, ASIZE);
4531 br(HS, BMADV);
4532 }
4533 ldrb(result_tmp, Address(sp, skipch)); // load skip distance
4534 BIND(BMADV);
4535 sub(cnt1tmp, cnt1, 1);
4536 add(str2, str2, result_tmp, LSL, str2_chr_shift);
4537 cmp(str2, str2end);
4538 br(LE, BMLOOPSTR2);
4539 add(sp, sp, ASIZE);
4540 b(NOMATCH);
4541 BIND(BMLOOPSTR1_LASTCMP);
4542 cmp(ch1, ch2);
4543 br(NE, BMSKIP);
4544 BIND(BMMATCH);
4545 sub(result, str2, tmp5);
4546 if (!str2_isL) lsr(result, result, 1);
4547 add(sp, sp, ASIZE);
4548 b(DONE);
4549
4550 BIND(LINEARSTUB);
|