< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page




4357     ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4358     add(sp, sp, framesize);
4359   } else {
4360     if (framesize < ((1 << 12) + 2 * wordSize))
4361       add(sp, sp, framesize - 2 * wordSize);
4362     else {
4363       mov(rscratch1, framesize - 2 * wordSize);
4364       add(sp, sp, rscratch1);
4365     }
4366     ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
4367   }
4368 }
4369 
4370 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
4371 
4372 // Search for str1 in str2 and return index or -1
4373 void MacroAssembler::string_indexof(Register str2, Register str1,
4374                                     Register cnt2, Register cnt1,
4375                                     Register tmp1, Register tmp2,
4376                                     Register tmp3, Register tmp4,

4377                                     int icnt1, Register result, int ae) {
4378   Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH;

4379 
4380   Register ch1 = rscratch1;
4381   Register ch2 = rscratch2;
4382   Register cnt1tmp = tmp1;
4383   Register cnt2tmp = tmp2;
4384   Register cnt1_neg = cnt1;
4385   Register cnt2_neg = cnt2;
4386   Register result_tmp = tmp4;
4387 
4388   bool isL = ae == StrIntrinsicNode::LL;
4389 
4390   bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
4391   bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
4392   int str1_chr_shift = str1_isL ? 0:1;
4393   int str2_chr_shift = str2_isL ? 0:1;
4394   int str1_chr_size = str1_isL ? 1:2;
4395   int str2_chr_size = str2_isL ? 1:2;
4396   chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
4397                                       (chr_insn)&MacroAssembler::ldrh;
4398   chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
4399                                       (chr_insn)&MacroAssembler::ldrh;
4400   chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
4401   chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
4402 
4403   // Note, inline_string_indexOf() generates checks:
4404   // if (substr.count > string.count) return -1;
4405   // if (substr.count == 0) return 0;
4406 
4407 // We have two strings, a source string in str2, cnt2 and a pattern string
4408 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
4409 
4410 // For larger pattern and source we use a simplified Boyer Moore algorithm.
4411 // With a small pattern and source we use linear scan.
4412 
4413   if (icnt1 == -1) {
4414     cmp(cnt1, 256);             // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
4415     ccmp(cnt1, 8, 0b0000, LO);  // Can't handle skip >= 256 because we use
4416     br(LO, LINEARSEARCH);       // a byte array.
4417     cmp(cnt1, cnt2, LSR, 2);    // Source must be 4 * pattern for BM
4418     br(HS, LINEARSEARCH);



4419   }
4420 
4421 // The Boyer Moore alogorithm is based on the description here:-
4422 //
4423 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
4424 //
4425 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
4426 // and the 'Good Suffix' rule.
4427 //
4428 // These rules are essentially heuristics for how far we can shift the
4429 // pattern along the search string.
4430 //
4431 // The implementation here uses the 'Bad Character' rule only because of the
4432 // complexity of initialisation for the 'Good Suffix' rule.
4433 //
4434 // This is also known as the Boyer-Moore-Horspool algorithm:-
4435 //
4436 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
4437 //
4438 // #define ASIZE 128


4439 //
4440 //    int bm(unsigned char *x, int m, unsigned char *y, int n) {
4441 //       int i, j;
4442 //       unsigned c;
4443 //       unsigned char bc[ASIZE];
4444 //
4445 //       /* Preprocessing */
4446 //       for (i = 0; i < ASIZE; ++i)
4447 //          bc[i] = 0;
4448 //       for (i = 0; i < m - 1; ) {
4449 //          c = x[i];
4450 //          ++i;
4451 //          if (c < ASIZE) bc[c] = i;





4452 //       }
4453 //
4454 //       /* Searching */
4455 //       j = 0;
4456 //       while (j <= n - m) {
4457 //          c = y[i+j];
4458 //          if (x[m-1] == c)
4459 //            for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
4460 //          if (i < 0) return j;














4461 //          if (c < ASIZE)
4462 //            j = j - bc[y[j+m-1]] + m;
4463 //          else
4464 //            j += 1; // Advance by 1 only if char >= ASIZE

4465 //       }
4466 //    }
4467 
4468   if (icnt1 == -1) {
4469     BIND(BM);
4470 
4471     Label ZLOOP, BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP;
4472     Label BMADV, BMMATCH, BMCHECKEND;
4473 
4474     Register cnt1end = tmp2;
4475     Register str2end = cnt2;
4476     Register skipch = tmp2;
4477 
4478     // Restrict ASIZE to 128 to reduce stack space/initialisation.
4479     // The presence of chars >= ASIZE in the target string does not affect
4480     // performance, but we must be careful not to initialise them in the stack
4481     // array.
4482     // The presence of chars >= ASIZE in the source string may adversely affect
4483     // performance since we can only advance by one when we encounter one.
4484 
4485       stp(zr, zr, pre(sp, -128));
4486       for (int i = 1; i < 8; i++)
4487           stp(zr, zr, Address(sp, i*16));





4488 
4489       mov(cnt1tmp, 0);
4490       sub(cnt1end, cnt1, 1);



4491     BIND(BCLOOP);
4492       (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
4493       cmp(ch1, 128);
4494       add(cnt1tmp, cnt1tmp, 1);
4495       br(HS, BCSKIP);
4496       strb(cnt1tmp, Address(sp, ch1));

4497     BIND(BCSKIP);
4498       cmp(cnt1tmp, cnt1end);
4499       br(LT, BCLOOP);
4500 
4501       mov(result_tmp, str2);
4502 
4503       sub(cnt2, cnt2, cnt1);
4504       add(str2end, str2, cnt2, LSL, str2_chr_shift);














4505     BIND(BMLOOPSTR2);
4506       sub(cnt1tmp, cnt1, 1);
4507       (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
4508       (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
4509       cmp(ch1, skipch);










4510       br(NE, BMSKIP);
4511       subs(cnt1tmp, cnt1tmp, 1);
4512       br(LT, BMMATCH);






4513     BIND(BMLOOPSTR1);
4514       (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
4515       (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
4516       cmp(ch1, ch2);
4517       br(NE, BMSKIP);
4518       subs(cnt1tmp, cnt1tmp, 1);
4519       br(GE, BMLOOPSTR1);
4520     BIND(BMMATCH);
4521       sub(result, str2, result_tmp);
4522       if (!str2_isL) lsr(result, result, 1);
4523       add(sp, sp, 128);
4524       b(DONE);
4525     BIND(BMADV);
4526       add(str2, str2, str2_chr_size);
4527       b(BMCHECKEND);
4528     BIND(BMSKIP);
4529       cmp(skipch, 128);








4530       br(HS, BMADV);
4531       ldrb(ch2, Address(sp, skipch));
4532       add(str2, str2, cnt1, LSL, str2_chr_shift);
4533       sub(str2, str2, ch2, LSL, str2_chr_shift);
4534     BIND(BMCHECKEND);

4535       cmp(str2, str2end);
4536       br(LE, BMLOOPSTR2);
4537       add(sp, sp, 128);
4538       b(NOMATCH);


























4539   }
4540 
4541   BIND(LINEARSEARCH);
4542   {
4543     Label DO1, DO2, DO3;
4544 
4545     Register str2tmp = tmp2;
4546     Register first = tmp3;
4547 
4548     if (icnt1 == -1)
4549     {
4550         Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
4551 
4552         cmp(cnt1, str1_isL == str2_isL ? 4 : 2);
4553         br(LT, DOSHORT);
4554 
4555         sub(cnt2, cnt2, cnt1);
4556         mov(result_tmp, cnt2);
4557 
4558         lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
4559         lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
4560         sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
4561         sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
4562         (this->*str1_load_1chr)(first, Address(str1, cnt1_neg));
4563 
4564       BIND(FIRST_LOOP);
4565         (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
4566         cmp(first, ch2);
4567         br(EQ, STR1_LOOP);
4568       BIND(STR2_NEXT);
4569         adds(cnt2_neg, cnt2_neg, str2_chr_size);
4570         br(LE, FIRST_LOOP);
4571         b(NOMATCH);
4572 
4573       BIND(STR1_LOOP);
4574         adds(cnt1tmp, cnt1_neg, str1_chr_size);
4575         add(cnt2tmp, cnt2_neg, str2_chr_size);
4576         br(GE, MATCH);
4577 
4578       BIND(STR1_NEXT);
4579         (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
4580         (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
4581         cmp(ch1, ch2);
4582         br(NE, STR2_NEXT);
4583         adds(cnt1tmp, cnt1tmp, str1_chr_size);
4584         add(cnt2tmp, cnt2tmp, str2_chr_size);
4585         br(LT, STR1_NEXT);
4586         b(MATCH);
4587 
4588       BIND(DOSHORT);
4589       if (str1_isL == str2_isL) {
4590         cmp(cnt1, 2);
4591         br(LT, DO1);
4592         br(GT, DO3);
4593       }
4594     }
4595 
4596     if (icnt1 == 4) {
4597       Label CH1_LOOP;
4598 
4599         (this->*load_4chr)(ch1, str1);
4600         sub(cnt2, cnt2, 4);
4601         mov(result_tmp, cnt2);
4602         lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
4603         sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
4604 
4605       BIND(CH1_LOOP);
4606         (this->*load_4chr)(ch2, Address(str2, cnt2_neg));
4607         cmp(ch1, ch2);
4608         br(EQ, MATCH);
4609         adds(cnt2_neg, cnt2_neg, str2_chr_size);
4610         br(LE, CH1_LOOP);
4611         b(NOMATCH);
4612     }
4613 
4614     if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
4615       Label CH1_LOOP;
4616 
4617       BIND(DO2);
4618         (this->*load_2chr)(ch1, str1);
4619         sub(cnt2, cnt2, 2);
4620         mov(result_tmp, cnt2);
4621         lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
4622         sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
4623 
4624       BIND(CH1_LOOP);
4625         (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
4626         cmp(ch1, ch2);
4627         br(EQ, MATCH);
4628         adds(cnt2_neg, cnt2_neg, str2_chr_size);
4629         br(LE, CH1_LOOP);
4630         b(NOMATCH);
4631     }
4632 
4633     if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
4634       Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
4635 
4636       BIND(DO3);
4637         (this->*load_2chr)(first, str1);
4638         (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
4639 
4640         sub(cnt2, cnt2, 3);
4641         mov(result_tmp, cnt2);
4642         lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
4643         sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
4644 
4645       BIND(FIRST_LOOP);
4646         (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
4647         cmpw(first, ch2);
4648         br(EQ, STR1_LOOP);
4649       BIND(STR2_NEXT);
4650         adds(cnt2_neg, cnt2_neg, str2_chr_size);
4651         br(LE, FIRST_LOOP);
4652         b(NOMATCH);
4653 
4654       BIND(STR1_LOOP);
4655         add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
4656         (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
4657         cmp(ch1, ch2);
4658         br(NE, STR2_NEXT);
4659         b(MATCH);
4660     }
4661 
4662     if (icnt1 == -1 || icnt1 == 1) {
4663       Label CH1_LOOP, HAS_ZERO;
4664       Label DO1_SHORT, DO1_LOOP;
4665 
4666       BIND(DO1);
4667         (this->*str1_load_1chr)(ch1, str1);
4668         cmp(cnt2, 8);
4669         br(LT, DO1_SHORT);
4670 





4671         if (str2_isL) {
4672           if (!str1_isL) {
4673             tst(ch1, 0xff00);
4674             br(NE, NOMATCH);
4675           }
4676           orr(ch1, ch1, ch1, LSL, 8);
4677         }
4678         orr(ch1, ch1, ch1, LSL, 16);
4679         orr(ch1, ch1, ch1, LSL, 32);
4680 
4681         sub(cnt2, cnt2, 8/str2_chr_size);
4682         mov(result_tmp, cnt2);
4683         lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
4684         sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
4685 
4686         mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
4687       BIND(CH1_LOOP);
4688         ldr(ch2, Address(str2, cnt2_neg));
4689         eor(ch2, ch1, ch2);
4690         sub(tmp1, ch2, tmp3);
4691         orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4692         bics(tmp1, tmp1, tmp2);
4693         br(NE, HAS_ZERO);
4694         adds(cnt2_neg, cnt2_neg, 8);
4695         br(LT, CH1_LOOP);
4696 
4697         cmp(cnt2_neg, 8);
4698         mov(cnt2_neg, 0);
4699         br(LT, CH1_LOOP);
4700         b(NOMATCH);
4701 
4702       BIND(HAS_ZERO);
4703         rev(tmp1, tmp1);
4704         clz(tmp1, tmp1);
4705         add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
4706         b(MATCH);




4357     ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4358     add(sp, sp, framesize);
4359   } else {
4360     if (framesize < ((1 << 12) + 2 * wordSize))
4361       add(sp, sp, framesize - 2 * wordSize);
4362     else {
4363       mov(rscratch1, framesize - 2 * wordSize);
4364       add(sp, sp, rscratch1);
4365     }
4366     ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
4367   }
4368 }
4369 
4370 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
4371 
4372 // Search for str1 in str2 and return index or -1
4373 void MacroAssembler::string_indexof(Register str2, Register str1,
4374                                     Register cnt2, Register cnt1,
4375                                     Register tmp1, Register tmp2,
4376                                     Register tmp3, Register tmp4,
4377                                     Register tmp5, Register tmp6,
4378                                     int icnt1, Register result, int ae) {
4379   // NOTE: tmp5, tmp6 can be zr depending on specific method version
4380   Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
4381 
4382   Register ch1 = rscratch1;
4383   Register ch2 = rscratch2;
4384   Register cnt1tmp = tmp1;
4385   Register cnt2tmp = tmp2;
4386   Register cnt1_neg = cnt1;
4387   Register cnt2_neg = cnt2;
4388   Register result_tmp = tmp4;
4389 
4390   bool isL = ae == StrIntrinsicNode::LL;
4391 
4392   bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
4393   bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
4394   int str1_chr_shift = str1_isL ? 0:1;
4395   int str2_chr_shift = str2_isL ? 0:1;
4396   int str1_chr_size = str1_isL ? 1:2;
4397   int str2_chr_size = str2_isL ? 1:2;
4398   chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
4399                                       (chr_insn)&MacroAssembler::ldrh;
4400   chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
4401                                       (chr_insn)&MacroAssembler::ldrh;
4402   chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
4403   chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
4404 
4405   // Note, inline_string_indexOf() generates checks:
4406   // if (substr.count > string.count) return -1;
4407   // if (substr.count == 0) return 0;
4408 
4409   // We have two strings, a source string in str2, cnt2 and a pattern string
4410   // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
4411 
4412   // For larger pattern and source we use a simplified Boyer Moore algorithm.
4413   // With a small pattern and source we use linear scan.
4414 
4415   if (icnt1 == -1) {
4416     sub(result_tmp, cnt2, cnt1);
4417     cmp(cnt1, 8);             // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
4418     br(LT, LINEARSEARCH);
4419     dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
4420     cmp(cnt1, 256);
4421     lsr(tmp1, cnt2, 2);
4422     ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
4423     br(GE, LINEARSTUB);
4424   }
4425 
4426 // The Boyer Moore alogorithm is based on the description here:-
4427 //
4428 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
4429 //
4430 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
4431 // and the 'Good Suffix' rule.
4432 //
4433 // These rules are essentially heuristics for how far we can shift the
4434 // pattern along the search string.
4435 //
4436 // The implementation here uses the 'Bad Character' rule only because of the
4437 // complexity of initialisation for the 'Good Suffix' rule.
4438 //
4439 // This is also known as the Boyer-Moore-Horspool algorithm:-
4440 //
4441 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
4442 //
4443 // This particular implementation has few java-specific optimizations.
4444 //
4445 // #define ASIZE 256
4446 //
4447 //    int bm(unsigned char *x, int m, unsigned char *y, int n) {
4448 //       int i, j;
4449 //       unsigned c;
4450 //       unsigned char bc[ASIZE];
4451 //
4452 //       /* Preprocessing */
4453 //       for (i = 0; i < ASIZE; ++i)
4454 //          bc[i] = m;
4455 //       for (i = 0; i < m - 1; ) {
4456 //          c = x[i];
4457 //          ++i;
4458 //          // c < 256 for Latin1 string, so, no need for branch
4459 //          #ifdef PATTERN_STRING_IS_LATIN1
4460 //          bc[c] = m - i;
4461 //          #else
4462 //          if (c < ASIZE) bc[c] = m - i;
4463 //          #endif
4464 //       }
4465 //
4466 //       /* Searching */
4467 //       j = 0;
4468 //       while (j <= n - m) {
4469 //          c = y[i+j];
4470 //          if (x[m-1] == c)
4471 //            for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
4472 //          if (i < 0) return j;
4473 //          // c < 256 for Latin1 string, so, no need for branch
4474 //          #ifdef SOURCE_STRING_IS_LATIN1
4475 //          // LL case: (c< 256) always true. Remove branch
4476 //          j += bc[y[j+m-1]];
4477 //          #endif
4478 //          #ifndef PATTERN_STRING_IS_UTF
4479 //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
4480 //          if (c < ASIZE)
4481 //            j += bc[y[j+m-1]];
4482 //          else
4483 //            j += 1
4484 //          #endif
4485 //          #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
4486 //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
4487 //          if (c < ASIZE)
4488 //            j += bc[y[j+m-1]];
4489 //          else
4490 //            j += m
4491 //          #endif
4492 //       }
4493 //    }
4494 
4495   if (icnt1 == -1) {
4496     Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
4497         BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;



4498     Register cnt1end = tmp2;
4499     Register str2end = cnt2;
4500     Register skipch = tmp2;
4501 
4502     // str1 length is >=8, so, we can read at least 1 register for cases when
4503     // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
4504     // UL case. We'll re-read last character in inner pre-loop code to have
4505     // single outer pre-loop load
4506     const int firstStep = isL ? 7 : 3;
4507 
4508     const int ASIZE = 256;
4509     const int STORED_BYTES = 32; // amount of bytes stored per instruction
4510     sub(sp, sp, ASIZE);
4511     mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
4512     mov(ch1, sp);
4513     BIND(BM_INIT_LOOP);
4514       stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
4515       subs(tmp5, tmp5, 1);
4516       br(GT, BM_INIT_LOOP);
4517 
4518       sub(cnt1tmp, cnt1, 1);
4519       mov(tmp5, str2);
4520       add(str2end, str2, result_tmp, LSL, str2_chr_shift);
4521       sub(ch2, cnt1, 1);
4522       mov(tmp3, str1);
4523     BIND(BCLOOP);
4524       (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
4525       if (!str1_isL) {
4526         cmp(ch1, ASIZE);
4527         br(HS, BCSKIP);
4528       }
4529       strb(ch2, Address(sp, ch1));
4530     BIND(BCSKIP);
4531       subs(ch2, ch2, 1);
4532       br(GT, BCLOOP);


4533 
4534       add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
4535       if (str1_isL == str2_isL) {
4536         // load last 8 bytes (8LL/4UU symbols)
4537         ldr(tmp6, Address(tmp6, -wordSize));
4538       } else {
4539         ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
4540         // convert Latin1 to UTF. We'll have to wait until load completed, but
4541         // it's still faster than per-character loads+checks
4542         lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
4543         ubfx(ch1, tmp6, 8, 8); // str1[N-2]
4544         ubfx(ch2, tmp6, 16, 8); // str1[N-3]
4545         andr(tmp6, tmp6, 0xFF); // str1[N-4]
4546         orr(ch2, ch1, ch2, LSL, 16);
4547         orr(tmp6, tmp6, tmp3, LSL, 48);
4548         orr(tmp6, tmp6, ch2, LSL, 16);
4549       }
4550     BIND(BMLOOPSTR2);


4551       (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
4552       sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
4553       if (str1_isL == str2_isL) {
4554         // re-init tmp3. It's for free because it's executed in parallel with
4555         // load above. Alternative is to initialize it before loop, but it'll
4556         // affect performance on in-order systems with 2 or more ld/st pipelines
4557         lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
4558       }
4559       if (!isL) { // UU/UL case
4560         lsl(ch2, cnt1tmp, 1); // offset in bytes
4561       }
4562       cmp(tmp3, skipch);
4563       br(NE, BMSKIP);
4564       ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
4565       mov(ch1, tmp6);
4566       if (isL) {
4567         b(BMLOOPSTR1_AFTER_LOAD);
4568       } else {
4569         sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
4570         b(BMLOOPSTR1_CMP);
4571       }
4572     BIND(BMLOOPSTR1);
4573       (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
4574       (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
4575     BIND(BMLOOPSTR1_AFTER_LOAD);

4576       subs(cnt1tmp, cnt1tmp, 1);
4577       br(LT, BMLOOPSTR1_LASTCMP);
4578     BIND(BMLOOPSTR1_CMP);
4579       cmp(ch1, ch2);
4580       br(EQ, BMLOOPSTR1);





4581     BIND(BMSKIP);
4582       if (!isL) {
4583         // if we've met UTF symbol while searching Latin1 pattern, then we can
4584         // skip cnt1 symbols
4585         if (str1_isL != str2_isL) {
4586           mov(result_tmp, cnt1);
4587         } else {
4588           mov(result_tmp, 1);
4589         }
4590         cmp(skipch, ASIZE);
4591         br(HS, BMADV);
4592       }
4593       ldrb(result_tmp, Address(sp, skipch)); // load skip distance
4594     BIND(BMADV);
4595       sub(cnt1tmp, cnt1, 1);
4596       add(str2, str2, result_tmp, LSL, str2_chr_shift);
4597       cmp(str2, str2end);
4598       br(LE, BMLOOPSTR2);
4599       add(sp, sp, ASIZE);
4600       b(NOMATCH);
4601     BIND(BMLOOPSTR1_LASTCMP);
4602       cmp(ch1, ch2);
4603       br(NE, BMSKIP);
4604     BIND(BMMATCH);
4605       sub(result, str2, tmp5);
4606       if (!str2_isL) lsr(result, result, 1);
4607       add(sp, sp, ASIZE);
4608       b(DONE);
4609 
4610     BIND(LINEARSTUB);
4611     cmp(cnt1, 16); // small patterns still should be handled by simple algorithm
4612     br(LT, LINEAR_MEDIUM);
4613     mov(result, zr);
4614     RuntimeAddress stub = NULL;
4615     if (isL) {
4616       stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
4617       assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
4618     } else if (str1_isL) {
4619       stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
4620        assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
4621     } else {
4622       stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
4623       assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
4624     }
4625     trampoline_call(stub);
4626     b(DONE);
4627   }
4628 
4629   BIND(LINEARSEARCH);
4630   {
4631     Label DO1, DO2, DO3;
4632 
4633     Register str2tmp = tmp2;
4634     Register first = tmp3;
4635 
4636     if (icnt1 == -1)
4637     {
4638         Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
4639 
4640         cmp(cnt1, str1_isL == str2_isL ? 4 : 2);
4641         br(LT, DOSHORT);
4642       BIND(LINEAR_MEDIUM);
4643         (this->*str1_load_1chr)(first, Address(str1));


4644         lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));

4645         sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
4646         lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
4647         sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
4648 
4649       BIND(FIRST_LOOP);
4650         (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
4651         cmp(first, ch2);
4652         br(EQ, STR1_LOOP);
4653       BIND(STR2_NEXT);
4654         adds(cnt2_neg, cnt2_neg, str2_chr_size);
4655         br(LE, FIRST_LOOP);
4656         b(NOMATCH);
4657 
4658       BIND(STR1_LOOP);
4659         adds(cnt1tmp, cnt1_neg, str1_chr_size);
4660         add(cnt2tmp, cnt2_neg, str2_chr_size);
4661         br(GE, MATCH);
4662 
4663       BIND(STR1_NEXT);
4664         (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
4665         (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
4666         cmp(ch1, ch2);
4667         br(NE, STR2_NEXT);
4668         adds(cnt1tmp, cnt1tmp, str1_chr_size);
4669         add(cnt2tmp, cnt2tmp, str2_chr_size);
4670         br(LT, STR1_NEXT);
4671         b(MATCH);
4672 
4673       BIND(DOSHORT);
4674       if (str1_isL == str2_isL) {
4675         cmp(cnt1, 2);
4676         br(LT, DO1);
4677         br(GT, DO3);
4678       }
4679     }
4680 
4681     if (icnt1 == 4) {
4682       Label CH1_LOOP;
4683 
4684         (this->*load_4chr)(ch1, str1);
4685         sub(result_tmp, cnt2, 4);
4686         lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
4687         sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);

4688 
4689       BIND(CH1_LOOP);
4690         (this->*load_4chr)(ch2, Address(str2, cnt2_neg));
4691         cmp(ch1, ch2);
4692         br(EQ, MATCH);
4693         adds(cnt2_neg, cnt2_neg, str2_chr_size);
4694         br(LE, CH1_LOOP);
4695         b(NOMATCH);
4696       }
4697 
4698     if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
4699       Label CH1_LOOP;
4700 
4701       BIND(DO2);
4702         (this->*load_2chr)(ch1, str1);
4703         if (icnt1 == 2) {
4704           sub(result_tmp, cnt2, 2);
4705         }
4706         lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
4707         sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
4708       BIND(CH1_LOOP);
4709         (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
4710         cmp(ch1, ch2);
4711         br(EQ, MATCH);
4712         adds(cnt2_neg, cnt2_neg, str2_chr_size);
4713         br(LE, CH1_LOOP);
4714         b(NOMATCH);
4715     }
4716 
4717     if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
4718       Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
4719 
4720       BIND(DO3);
4721         (this->*load_2chr)(first, str1);
4722         (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
4723         if (icnt1 == 3) {
4724           sub(result_tmp, cnt2, 3);
4725         }
4726         lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
4727         sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);

4728       BIND(FIRST_LOOP);
4729         (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
4730         cmpw(first, ch2);
4731         br(EQ, STR1_LOOP);
4732       BIND(STR2_NEXT);
4733         adds(cnt2_neg, cnt2_neg, str2_chr_size);
4734         br(LE, FIRST_LOOP);
4735         b(NOMATCH);
4736 
4737       BIND(STR1_LOOP);
4738         add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
4739         (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
4740         cmp(ch1, ch2);
4741         br(NE, STR2_NEXT);
4742         b(MATCH);
4743     }
4744 
4745     if (icnt1 == -1 || icnt1 == 1) {
4746       Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;

4747 
4748       BIND(DO1);
4749         (this->*str1_load_1chr)(ch1, str1);
4750         cmp(cnt2, 8);
4751         br(LT, DO1_SHORT);
4752 
4753         sub(result_tmp, cnt2, 8/str2_chr_size);
4754         sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
4755         mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
4756         lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
4757 
4758         if (str2_isL) {




4759           orr(ch1, ch1, ch1, LSL, 8);
4760         }
4761         orr(ch1, ch1, ch1, LSL, 16);
4762         orr(ch1, ch1, ch1, LSL, 32);







4763       BIND(CH1_LOOP);
4764         ldr(ch2, Address(str2, cnt2_neg));
4765         eor(ch2, ch1, ch2);
4766         sub(tmp1, ch2, tmp3);
4767         orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4768         bics(tmp1, tmp1, tmp2);
4769         br(NE, HAS_ZERO);
4770         adds(cnt2_neg, cnt2_neg, 8);
4771         br(LT, CH1_LOOP);
4772 
4773         cmp(cnt2_neg, 8);
4774         mov(cnt2_neg, 0);
4775         br(LT, CH1_LOOP);
4776         b(NOMATCH);
4777 
4778       BIND(HAS_ZERO);
4779         rev(tmp1, tmp1);
4780         clz(tmp1, tmp1);
4781         add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
4782         b(MATCH);


< prev index next >