4357 ldp(rfp, lr, Address(sp, framesize - 2 * wordSize)); 4358 add(sp, sp, framesize); 4359 } else { 4360 if (framesize < ((1 << 12) + 2 * wordSize)) 4361 add(sp, sp, framesize - 2 * wordSize); 4362 else { 4363 mov(rscratch1, framesize - 2 * wordSize); 4364 add(sp, sp, rscratch1); 4365 } 4366 ldp(rfp, lr, Address(post(sp, 2 * wordSize))); 4367 } 4368 } 4369 4370 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); 4371 4372 // Search for str1 in str2 and return index or -1 4373 void MacroAssembler::string_indexof(Register str2, Register str1, 4374 Register cnt2, Register cnt1, 4375 Register tmp1, Register tmp2, 4376 Register tmp3, Register tmp4, 4377 int icnt1, Register result, int ae) { 4378 Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH; 4379 4380 Register ch1 = rscratch1; 4381 Register ch2 = rscratch2; 4382 Register cnt1tmp = tmp1; 4383 Register cnt2tmp = tmp2; 4384 Register cnt1_neg = cnt1; 4385 Register cnt2_neg = cnt2; 4386 Register result_tmp = tmp4; 4387 4388 bool isL = ae == StrIntrinsicNode::LL; 4389 4390 bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; 4391 bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; 4392 int str1_chr_shift = str1_isL ? 0:1; 4393 int str2_chr_shift = str2_isL ? 0:1; 4394 int str1_chr_size = str1_isL ? 1:2; 4395 int str2_chr_size = str2_isL ? 1:2; 4396 chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb : 4397 (chr_insn)&MacroAssembler::ldrh; 4398 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb : 4399 (chr_insn)&MacroAssembler::ldrh; 4400 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw; 4401 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr; 4402 4403 // Note, inline_string_indexOf() generates checks: 4404 // if (substr.count > string.count) return -1; 4405 // if (substr.count == 0) return 0; 4406 4407 // We have two strings, a source string in str2, cnt2 and a pattern string 4408 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1. 4409 4410 // For larger pattern and source we use a simplified Boyer Moore algorithm. 4411 // With a small pattern and source we use linear scan. 4412 4413 if (icnt1 == -1) { 4414 cmp(cnt1, 256); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 4415 ccmp(cnt1, 8, 0b0000, LO); // Can't handle skip >= 256 because we use 4416 br(LO, LINEARSEARCH); // a byte array. 4417 cmp(cnt1, cnt2, LSR, 2); // Source must be 4 * pattern for BM 4418 br(HS, LINEARSEARCH); 4419 } 4420 4421 // The Boyer Moore alogorithm is based on the description here:- 4422 // 4423 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm 4424 // 4425 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule 4426 // and the 'Good Suffix' rule. 4427 // 4428 // These rules are essentially heuristics for how far we can shift the 4429 // pattern along the search string. 4430 // 4431 // The implementation here uses the 'Bad Character' rule only because of the 4432 // complexity of initialisation for the 'Good Suffix' rule. 4433 // 4434 // This is also known as the Boyer-Moore-Horspool algorithm:- 4435 // 4436 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm 4437 // 4438 // #define ASIZE 128 4439 // 4440 // int bm(unsigned char *x, int m, unsigned char *y, int n) { 4441 // int i, j; 4442 // unsigned c; 4443 // unsigned char bc[ASIZE]; 4444 // 4445 // /* Preprocessing */ 4446 // for (i = 0; i < ASIZE; ++i) 4447 // bc[i] = 0; 4448 // for (i = 0; i < m - 1; ) { 4449 // c = x[i]; 4450 // ++i; 4451 // if (c < ASIZE) bc[c] = i; 4452 // } 4453 // 4454 // /* Searching */ 4455 // j = 0; 4456 // while (j <= n - m) { 4457 // c = y[i+j]; 4458 // if (x[m-1] == c) 4459 // for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); 4460 // if (i < 0) return j; 4461 // if (c < ASIZE) 4462 // j = j - bc[y[j+m-1]] + m; 4463 // else 4464 // j += 1; // Advance by 1 only if char >= ASIZE 4465 // } 4466 // } 4467 4468 if (icnt1 == -1) { 4469 BIND(BM); 4470 4471 Label ZLOOP, BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP; 4472 Label BMADV, BMMATCH, BMCHECKEND; 4473 4474 Register cnt1end = tmp2; 4475 Register str2end = cnt2; 4476 Register skipch = tmp2; 4477 4478 // Restrict ASIZE to 128 to reduce stack space/initialisation. 4479 // The presence of chars >= ASIZE in the target string does not affect 4480 // performance, but we must be careful not to initialise them in the stack 4481 // array. 4482 // The presence of chars >= ASIZE in the source string may adversely affect 4483 // performance since we can only advance by one when we encounter one. 4484 4485 stp(zr, zr, pre(sp, -128)); 4486 for (int i = 1; i < 8; i++) 4487 stp(zr, zr, Address(sp, i*16)); 4488 4489 mov(cnt1tmp, 0); 4490 sub(cnt1end, cnt1, 1); 4491 BIND(BCLOOP); 4492 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift))); 4493 cmp(ch1, 128); 4494 add(cnt1tmp, cnt1tmp, 1); 4495 br(HS, BCSKIP); 4496 strb(cnt1tmp, Address(sp, ch1)); 4497 BIND(BCSKIP); 4498 cmp(cnt1tmp, cnt1end); 4499 br(LT, BCLOOP); 4500 4501 mov(result_tmp, str2); 4502 4503 sub(cnt2, cnt2, cnt1); 4504 add(str2end, str2, cnt2, LSL, str2_chr_shift); 4505 BIND(BMLOOPSTR2); 4506 sub(cnt1tmp, cnt1, 1); 4507 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift))); 4508 (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 4509 cmp(ch1, skipch); 4510 br(NE, BMSKIP); 4511 subs(cnt1tmp, cnt1tmp, 1); 4512 br(LT, BMMATCH); 4513 BIND(BMLOOPSTR1); 4514 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift))); 4515 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 4516 cmp(ch1, ch2); 4517 br(NE, BMSKIP); 4518 subs(cnt1tmp, cnt1tmp, 1); 4519 br(GE, BMLOOPSTR1); 4520 BIND(BMMATCH); 4521 sub(result, str2, result_tmp); 4522 if (!str2_isL) lsr(result, result, 1); 4523 add(sp, sp, 128); 4524 b(DONE); 4525 BIND(BMADV); 4526 add(str2, str2, str2_chr_size); 4527 b(BMCHECKEND); 4528 BIND(BMSKIP); 4529 cmp(skipch, 128); 4530 br(HS, BMADV); 4531 ldrb(ch2, Address(sp, skipch)); 4532 add(str2, str2, cnt1, LSL, str2_chr_shift); 4533 sub(str2, str2, ch2, LSL, str2_chr_shift); 4534 BIND(BMCHECKEND); 4535 cmp(str2, str2end); 4536 br(LE, BMLOOPSTR2); 4537 add(sp, sp, 128); 4538 b(NOMATCH); 4539 } 4540 4541 BIND(LINEARSEARCH); 4542 { 4543 Label DO1, DO2, DO3; 4544 4545 Register str2tmp = tmp2; 4546 Register first = tmp3; 4547 4548 if (icnt1 == -1) 4549 { 4550 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; 4551 4552 cmp(cnt1, str1_isL == str2_isL ? 4 : 2); 4553 br(LT, DOSHORT); 4554 4555 sub(cnt2, cnt2, cnt1); 4556 mov(result_tmp, cnt2); 4557 4558 lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift))); 4559 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift))); 4560 sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift); 4561 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift); 4562 (this->*str1_load_1chr)(first, Address(str1, cnt1_neg)); 4563 4564 BIND(FIRST_LOOP); 4565 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg)); 4566 cmp(first, ch2); 4567 br(EQ, STR1_LOOP); 4568 BIND(STR2_NEXT); 4569 adds(cnt2_neg, cnt2_neg, str2_chr_size); 4570 br(LE, FIRST_LOOP); 4571 b(NOMATCH); 4572 4573 BIND(STR1_LOOP); 4574 adds(cnt1tmp, cnt1_neg, str1_chr_size); 4575 add(cnt2tmp, cnt2_neg, str2_chr_size); 4576 br(GE, MATCH); 4577 4578 BIND(STR1_NEXT); 4579 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp)); 4580 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 4581 cmp(ch1, ch2); 4582 br(NE, STR2_NEXT); 4583 adds(cnt1tmp, cnt1tmp, str1_chr_size); 4584 add(cnt2tmp, cnt2tmp, str2_chr_size); 4585 br(LT, STR1_NEXT); 4586 b(MATCH); 4587 4588 BIND(DOSHORT); 4589 if (str1_isL == str2_isL) { 4590 cmp(cnt1, 2); 4591 br(LT, DO1); 4592 br(GT, DO3); 4593 } 4594 } 4595 4596 if (icnt1 == 4) { 4597 Label CH1_LOOP; 4598 4599 (this->*load_4chr)(ch1, str1); 4600 sub(cnt2, cnt2, 4); 4601 mov(result_tmp, cnt2); 4602 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift))); 4603 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift); 4604 4605 BIND(CH1_LOOP); 4606 (this->*load_4chr)(ch2, Address(str2, cnt2_neg)); 4607 cmp(ch1, ch2); 4608 br(EQ, MATCH); 4609 adds(cnt2_neg, cnt2_neg, str2_chr_size); 4610 br(LE, CH1_LOOP); 4611 b(NOMATCH); 4612 } 4613 4614 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) { 4615 Label CH1_LOOP; 4616 4617 BIND(DO2); 4618 (this->*load_2chr)(ch1, str1); 4619 sub(cnt2, cnt2, 2); 4620 mov(result_tmp, cnt2); 4621 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift))); 4622 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift); 4623 4624 BIND(CH1_LOOP); 4625 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 4626 cmp(ch1, ch2); 4627 br(EQ, MATCH); 4628 adds(cnt2_neg, cnt2_neg, str2_chr_size); 4629 br(LE, CH1_LOOP); 4630 b(NOMATCH); 4631 } 4632 4633 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) { 4634 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; 4635 4636 BIND(DO3); 4637 (this->*load_2chr)(first, str1); 4638 (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size)); 4639 4640 sub(cnt2, cnt2, 3); 4641 mov(result_tmp, cnt2); 4642 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift))); 4643 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift); 4644 4645 BIND(FIRST_LOOP); 4646 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 4647 cmpw(first, ch2); 4648 br(EQ, STR1_LOOP); 4649 BIND(STR2_NEXT); 4650 adds(cnt2_neg, cnt2_neg, str2_chr_size); 4651 br(LE, FIRST_LOOP); 4652 b(NOMATCH); 4653 4654 BIND(STR1_LOOP); 4655 add(cnt2tmp, cnt2_neg, 2*str2_chr_size); 4656 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 4657 cmp(ch1, ch2); 4658 br(NE, STR2_NEXT); 4659 b(MATCH); 4660 } 4661 4662 if (icnt1 == -1 || icnt1 == 1) { 4663 Label CH1_LOOP, HAS_ZERO; 4664 Label DO1_SHORT, DO1_LOOP; 4665 4666 BIND(DO1); 4667 (this->*str1_load_1chr)(ch1, str1); 4668 cmp(cnt2, 8); 4669 br(LT, DO1_SHORT); 4670 4671 if (str2_isL) { 4672 if (!str1_isL) { 4673 tst(ch1, 0xff00); 4674 br(NE, NOMATCH); 4675 } 4676 orr(ch1, ch1, ch1, LSL, 8); 4677 } 4678 orr(ch1, ch1, ch1, LSL, 16); 4679 orr(ch1, ch1, ch1, LSL, 32); 4680 4681 sub(cnt2, cnt2, 8/str2_chr_size); 4682 mov(result_tmp, cnt2); 4683 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift))); 4684 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift); 4685 4686 mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001); 4687 BIND(CH1_LOOP); 4688 ldr(ch2, Address(str2, cnt2_neg)); 4689 eor(ch2, ch1, ch2); 4690 sub(tmp1, ch2, tmp3); 4691 orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4692 bics(tmp1, tmp1, tmp2); 4693 br(NE, HAS_ZERO); 4694 adds(cnt2_neg, cnt2_neg, 8); 4695 br(LT, CH1_LOOP); 4696 4697 cmp(cnt2_neg, 8); 4698 mov(cnt2_neg, 0); 4699 br(LT, CH1_LOOP); 4700 b(NOMATCH); 4701 4702 BIND(HAS_ZERO); 4703 rev(tmp1, tmp1); 4704 clz(tmp1, tmp1); 4705 add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); 4706 b(MATCH); | 4357 ldp(rfp, lr, Address(sp, framesize - 2 * wordSize)); 4358 add(sp, sp, framesize); 4359 } else { 4360 if (framesize < ((1 << 12) + 2 * wordSize)) 4361 add(sp, sp, framesize - 2 * wordSize); 4362 else { 4363 mov(rscratch1, framesize - 2 * wordSize); 4364 add(sp, sp, rscratch1); 4365 } 4366 ldp(rfp, lr, Address(post(sp, 2 * wordSize))); 4367 } 4368 } 4369 4370 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); 4371 4372 // Search for str1 in str2 and return index or -1 4373 void MacroAssembler::string_indexof(Register str2, Register str1, 4374 Register cnt2, Register cnt1, 4375 Register tmp1, Register tmp2, 4376 Register tmp3, Register tmp4, 4377 Register tmp5, Register tmp6, 4378 int icnt1, Register result, int ae) { 4379 // NOTE: tmp5, tmp6 can be zr depending on specific method version 4380 Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH; 4381 4382 Register ch1 = rscratch1; 4383 Register ch2 = rscratch2; 4384 Register cnt1tmp = tmp1; 4385 Register cnt2tmp = tmp2; 4386 Register cnt1_neg = cnt1; 4387 Register cnt2_neg = cnt2; 4388 Register result_tmp = tmp4; 4389 4390 bool isL = ae == StrIntrinsicNode::LL; 4391 4392 bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; 4393 bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; 4394 int str1_chr_shift = str1_isL ? 0:1; 4395 int str2_chr_shift = str2_isL ? 0:1; 4396 int str1_chr_size = str1_isL ? 1:2; 4397 int str2_chr_size = str2_isL ? 1:2; 4398 chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb : 4399 (chr_insn)&MacroAssembler::ldrh; 4400 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb : 4401 (chr_insn)&MacroAssembler::ldrh; 4402 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw; 4403 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr; 4404 4405 // Note, inline_string_indexOf() generates checks: 4406 // if (substr.count > string.count) return -1; 4407 // if (substr.count == 0) return 0; 4408 4409 // We have two strings, a source string in str2, cnt2 and a pattern string 4410 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1. 4411 4412 // For larger pattern and source we use a simplified Boyer Moore algorithm. 4413 // With a small pattern and source we use linear scan. 4414 4415 if (icnt1 == -1) { 4416 sub(result_tmp, cnt2, cnt1); 4417 cmp(cnt1, 8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 4418 br(LT, LINEARSEARCH); 4419 dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty 4420 cmp(cnt1, 256); 4421 lsr(tmp1, cnt2, 2); 4422 ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM 4423 br(GE, LINEARSTUB); 4424 } 4425 4426 // The Boyer Moore alogorithm is based on the description here:- 4427 // 4428 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm 4429 // 4430 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule 4431 // and the 'Good Suffix' rule. 4432 // 4433 // These rules are essentially heuristics for how far we can shift the 4434 // pattern along the search string. 4435 // 4436 // The implementation here uses the 'Bad Character' rule only because of the 4437 // complexity of initialisation for the 'Good Suffix' rule. 4438 // 4439 // This is also known as the Boyer-Moore-Horspool algorithm:- 4440 // 4441 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm 4442 // 4443 // This particular implementation has few java-specific optimizations. 4444 // 4445 // #define ASIZE 256 4446 // 4447 // int bm(unsigned char *x, int m, unsigned char *y, int n) { 4448 // int i, j; 4449 // unsigned c; 4450 // unsigned char bc[ASIZE]; 4451 // 4452 // /* Preprocessing */ 4453 // for (i = 0; i < ASIZE; ++i) 4454 // bc[i] = m; 4455 // for (i = 0; i < m - 1; ) { 4456 // c = x[i]; 4457 // ++i; 4458 // // c < 256 for Latin1 string, so, no need for branch 4459 // #ifdef PATTERN_STRING_IS_LATIN1 4460 // bc[c] = m - i; 4461 // #else 4462 // if (c < ASIZE) bc[c] = m - i; 4463 // #endif 4464 // } 4465 // 4466 // /* Searching */ 4467 // j = 0; 4468 // while (j <= n - m) { 4469 // c = y[i+j]; 4470 // if (x[m-1] == c) 4471 // for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); 4472 // if (i < 0) return j; 4473 // // c < 256 for Latin1 string, so, no need for branch 4474 // #ifdef SOURCE_STRING_IS_LATIN1 4475 // // LL case: (c< 256) always true. Remove branch 4476 // j += bc[y[j+m-1]]; 4477 // #endif 4478 // #ifndef PATTERN_STRING_IS_UTF 4479 // // UU case: need if (c<ASIZE) check. Skip 1 character if not. 4480 // if (c < ASIZE) 4481 // j += bc[y[j+m-1]]; 4482 // else 4483 // j += 1 4484 // #endif 4485 // #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF 4486 // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not. 4487 // if (c < ASIZE) 4488 // j += bc[y[j+m-1]]; 4489 // else 4490 // j += m 4491 // #endif 4492 // } 4493 // } 4494 4495 if (icnt1 == -1) { 4496 Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, 4497 BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; 4498 Register cnt1end = tmp2; 4499 Register str2end = cnt2; 4500 Register skipch = tmp2; 4501 4502 // str1 length is >=8, so, we can read at least 1 register for cases when 4503 // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for 4504 // UL case. We'll re-read last character in inner pre-loop code to have 4505 // single outer pre-loop load 4506 const int firstStep = isL ? 7 : 3; 4507 4508 const int ASIZE = 256; 4509 const int STORED_BYTES = 32; // amount of bytes stored per instruction 4510 sub(sp, sp, ASIZE); 4511 mov(tmp5, ASIZE/STORED_BYTES); // loop iterations 4512 mov(ch1, sp); 4513 BIND(BM_INIT_LOOP); 4514 stpq(v0, v0, Address(post(ch1, STORED_BYTES))); 4515 subs(tmp5, tmp5, 1); 4516 br(GT, BM_INIT_LOOP); 4517 4518 sub(cnt1tmp, cnt1, 1); 4519 mov(tmp5, str2); 4520 add(str2end, str2, result_tmp, LSL, str2_chr_shift); 4521 sub(ch2, cnt1, 1); 4522 mov(tmp3, str1); 4523 BIND(BCLOOP); 4524 (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size))); 4525 if (!str1_isL) { 4526 cmp(ch1, ASIZE); 4527 br(HS, BCSKIP); 4528 } 4529 strb(ch2, Address(sp, ch1)); 4530 BIND(BCSKIP); 4531 subs(ch2, ch2, 1); 4532 br(GT, BCLOOP); 4533 4534 add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1 4535 if (str1_isL == str2_isL) { 4536 // load last 8 bytes (8LL/4UU symbols) 4537 ldr(tmp6, Address(tmp6, -wordSize)); 4538 } else { 4539 ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols) 4540 // convert Latin1 to UTF. We'll have to wait until load completed, but 4541 // it's still faster than per-character loads+checks 4542 lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1] 4543 ubfx(ch1, tmp6, 8, 8); // str1[N-2] 4544 ubfx(ch2, tmp6, 16, 8); // str1[N-3] 4545 andr(tmp6, tmp6, 0xFF); // str1[N-4] 4546 orr(ch2, ch1, ch2, LSL, 16); 4547 orr(tmp6, tmp6, tmp3, LSL, 48); 4548 orr(tmp6, tmp6, ch2, LSL, 16); 4549 } 4550 BIND(BMLOOPSTR2); 4551 (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 4552 sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8 4553 if (str1_isL == str2_isL) { 4554 // re-init tmp3. It's for free because it's executed in parallel with 4555 // load above. Alternative is to initialize it before loop, but it'll 4556 // affect performance on in-order systems with 2 or more ld/st pipelines 4557 lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size)); 4558 } 4559 if (!isL) { // UU/UL case 4560 lsl(ch2, cnt1tmp, 1); // offset in bytes 4561 } 4562 cmp(tmp3, skipch); 4563 br(NE, BMSKIP); 4564 ldr(ch2, Address(str2, isL ? cnt1tmp : ch2)); 4565 mov(ch1, tmp6); 4566 if (isL) { 4567 b(BMLOOPSTR1_AFTER_LOAD); 4568 } else { 4569 sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 4570 b(BMLOOPSTR1_CMP); 4571 } 4572 BIND(BMLOOPSTR1); 4573 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift))); 4574 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 4575 BIND(BMLOOPSTR1_AFTER_LOAD); 4576 subs(cnt1tmp, cnt1tmp, 1); 4577 br(LT, BMLOOPSTR1_LASTCMP); 4578 BIND(BMLOOPSTR1_CMP); 4579 cmp(ch1, ch2); 4580 br(EQ, BMLOOPSTR1); 4581 BIND(BMSKIP); 4582 if (!isL) { 4583 // if we've met UTF symbol while searching Latin1 pattern, then we can 4584 // skip cnt1 symbols 4585 if (str1_isL != str2_isL) { 4586 mov(result_tmp, cnt1); 4587 } else { 4588 mov(result_tmp, 1); 4589 } 4590 cmp(skipch, ASIZE); 4591 br(HS, BMADV); 4592 } 4593 ldrb(result_tmp, Address(sp, skipch)); // load skip distance 4594 BIND(BMADV); 4595 sub(cnt1tmp, cnt1, 1); 4596 add(str2, str2, result_tmp, LSL, str2_chr_shift); 4597 cmp(str2, str2end); 4598 br(LE, BMLOOPSTR2); 4599 add(sp, sp, ASIZE); 4600 b(NOMATCH); 4601 BIND(BMLOOPSTR1_LASTCMP); 4602 cmp(ch1, ch2); 4603 br(NE, BMSKIP); 4604 BIND(BMMATCH); 4605 sub(result, str2, tmp5); 4606 if (!str2_isL) lsr(result, result, 1); 4607 add(sp, sp, ASIZE); 4608 b(DONE); 4609 4610 BIND(LINEARSTUB); 4611 cmp(cnt1, 16); // small patterns still should be handled by simple algorithm 4612 br(LT, LINEAR_MEDIUM); 4613 mov(result, zr); 4614 RuntimeAddress stub = NULL; 4615 if (isL) { 4616 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll()); 4617 assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); 4618 } else if (str1_isL) { 4619 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul()); 4620 assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); 4621 } else { 4622 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu()); 4623 assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); 4624 } 4625 trampoline_call(stub); 4626 b(DONE); 4627 } 4628 4629 BIND(LINEARSEARCH); 4630 { 4631 Label DO1, DO2, DO3; 4632 4633 Register str2tmp = tmp2; 4634 Register first = tmp3; 4635 4636 if (icnt1 == -1) 4637 { 4638 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; 4639 4640 cmp(cnt1, str1_isL == str2_isL ? 4 : 2); 4641 br(LT, DOSHORT); 4642 BIND(LINEAR_MEDIUM); 4643 (this->*str1_load_1chr)(first, Address(str1)); 4644 lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift))); 4645 sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift); 4646 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 4647 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 4648 4649 BIND(FIRST_LOOP); 4650 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg)); 4651 cmp(first, ch2); 4652 br(EQ, STR1_LOOP); 4653 BIND(STR2_NEXT); 4654 adds(cnt2_neg, cnt2_neg, str2_chr_size); 4655 br(LE, FIRST_LOOP); 4656 b(NOMATCH); 4657 4658 BIND(STR1_LOOP); 4659 adds(cnt1tmp, cnt1_neg, str1_chr_size); 4660 add(cnt2tmp, cnt2_neg, str2_chr_size); 4661 br(GE, MATCH); 4662 4663 BIND(STR1_NEXT); 4664 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp)); 4665 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 4666 cmp(ch1, ch2); 4667 br(NE, STR2_NEXT); 4668 adds(cnt1tmp, cnt1tmp, str1_chr_size); 4669 add(cnt2tmp, cnt2tmp, str2_chr_size); 4670 br(LT, STR1_NEXT); 4671 b(MATCH); 4672 4673 BIND(DOSHORT); 4674 if (str1_isL == str2_isL) { 4675 cmp(cnt1, 2); 4676 br(LT, DO1); 4677 br(GT, DO3); 4678 } 4679 } 4680 4681 if (icnt1 == 4) { 4682 Label CH1_LOOP; 4683 4684 (this->*load_4chr)(ch1, str1); 4685 sub(result_tmp, cnt2, 4); 4686 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 4687 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 4688 4689 BIND(CH1_LOOP); 4690 (this->*load_4chr)(ch2, Address(str2, cnt2_neg)); 4691 cmp(ch1, ch2); 4692 br(EQ, MATCH); 4693 adds(cnt2_neg, cnt2_neg, str2_chr_size); 4694 br(LE, CH1_LOOP); 4695 b(NOMATCH); 4696 } 4697 4698 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) { 4699 Label CH1_LOOP; 4700 4701 BIND(DO2); 4702 (this->*load_2chr)(ch1, str1); 4703 if (icnt1 == 2) { 4704 sub(result_tmp, cnt2, 2); 4705 } 4706 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 4707 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 4708 BIND(CH1_LOOP); 4709 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 4710 cmp(ch1, ch2); 4711 br(EQ, MATCH); 4712 adds(cnt2_neg, cnt2_neg, str2_chr_size); 4713 br(LE, CH1_LOOP); 4714 b(NOMATCH); 4715 } 4716 4717 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) { 4718 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; 4719 4720 BIND(DO3); 4721 (this->*load_2chr)(first, str1); 4722 (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size)); 4723 if (icnt1 == 3) { 4724 sub(result_tmp, cnt2, 3); 4725 } 4726 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 4727 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 4728 BIND(FIRST_LOOP); 4729 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 4730 cmpw(first, ch2); 4731 br(EQ, STR1_LOOP); 4732 BIND(STR2_NEXT); 4733 adds(cnt2_neg, cnt2_neg, str2_chr_size); 4734 br(LE, FIRST_LOOP); 4735 b(NOMATCH); 4736 4737 BIND(STR1_LOOP); 4738 add(cnt2tmp, cnt2_neg, 2*str2_chr_size); 4739 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 4740 cmp(ch1, ch2); 4741 br(NE, STR2_NEXT); 4742 b(MATCH); 4743 } 4744 4745 if (icnt1 == -1 || icnt1 == 1) { 4746 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP; 4747 4748 BIND(DO1); 4749 (this->*str1_load_1chr)(ch1, str1); 4750 cmp(cnt2, 8); 4751 br(LT, DO1_SHORT); 4752 4753 sub(result_tmp, cnt2, 8/str2_chr_size); 4754 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 4755 mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001); 4756 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 4757 4758 if (str2_isL) { 4759 orr(ch1, ch1, ch1, LSL, 8); 4760 } 4761 orr(ch1, ch1, ch1, LSL, 16); 4762 orr(ch1, ch1, ch1, LSL, 32); 4763 BIND(CH1_LOOP); 4764 ldr(ch2, Address(str2, cnt2_neg)); 4765 eor(ch2, ch1, ch2); 4766 sub(tmp1, ch2, tmp3); 4767 orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4768 bics(tmp1, tmp1, tmp2); 4769 br(NE, HAS_ZERO); 4770 adds(cnt2_neg, cnt2_neg, 8); 4771 br(LT, CH1_LOOP); 4772 4773 cmp(cnt2_neg, 8); 4774 mov(cnt2_neg, 0); 4775 br(LT, CH1_LOOP); 4776 b(NOMATCH); 4777 4778 BIND(HAS_ZERO); 4779 rev(tmp1, tmp1); 4780 clz(tmp1, tmp1); 4781 add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); 4782 b(MATCH); |