< prev index next >
src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Print this page
*** 4372,4383 ****
// Search for str1 in str2 and return index or -1
void MacroAssembler::string_indexof(Register str2, Register str1,
Register cnt2, Register cnt1,
Register tmp1, Register tmp2,
Register tmp3, Register tmp4,
int icnt1, Register result, int ae) {
! Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH;
Register ch1 = rscratch1;
Register ch2 = rscratch2;
Register cnt1tmp = tmp1;
Register cnt2tmp = tmp2;
--- 4372,4385 ----
// Search for str1 in str2 and return index or -1
void MacroAssembler::string_indexof(Register str2, Register str1,
Register cnt2, Register cnt1,
Register tmp1, Register tmp2,
Register tmp3, Register tmp4,
+ Register tmp5, Register tmp6,
int icnt1, Register result, int ae) {
! // NOTE: tmp5, tmp6 can be zr depending on specific method version
! Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
Register ch1 = rscratch1;
Register ch2 = rscratch2;
Register cnt1tmp = tmp1;
Register cnt2tmp = tmp2;
*** 4402,4423 ****
// Note, inline_string_indexOf() generates checks:
// if (substr.count > string.count) return -1;
// if (substr.count == 0) return 0;
! // We have two strings, a source string in str2, cnt2 and a pattern string
! // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
! // For larger pattern and source we use a simplified Boyer Moore algorithm.
! // With a small pattern and source we use linear scan.
if (icnt1 == -1) {
! cmp(cnt1, 256); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
! ccmp(cnt1, 8, 0b0000, LO); // Can't handle skip >= 256 because we use
! br(LO, LINEARSEARCH); // a byte array.
! cmp(cnt1, cnt2, LSR, 2); // Source must be 4 * pattern for BM
! br(HS, LINEARSEARCH);
}
// The Boyer Moore alogorithm is based on the description here:-
//
// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
--- 4404,4428 ----
// Note, inline_string_indexOf() generates checks:
// if (substr.count > string.count) return -1;
// if (substr.count == 0) return 0;
! // We have two strings, a source string in str2, cnt2 and a pattern string
! // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
! // For larger pattern and source we use a simplified Boyer Moore algorithm.
! // With a small pattern and source we use linear scan.
if (icnt1 == -1) {
! sub(result_tmp, cnt2, cnt1);
! cmp(cnt1, 8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
! br(LT, LINEARSEARCH);
! dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
! cmp(cnt1, 256);
! lsr(tmp1, cnt2, 2);
! ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
! br(GE, LINEARSTUB);
}
// The Boyer Moore alogorithm is based on the description here:-
//
// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
*** 4433,4543 ****
//
// This is also known as the Boyer-Moore-Horspool algorithm:-
//
// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
//
! // #define ASIZE 128
//
// int bm(unsigned char *x, int m, unsigned char *y, int n) {
// int i, j;
// unsigned c;
// unsigned char bc[ASIZE];
//
// /* Preprocessing */
// for (i = 0; i < ASIZE; ++i)
! // bc[i] = 0;
// for (i = 0; i < m - 1; ) {
// c = x[i];
// ++i;
! // if (c < ASIZE) bc[c] = i;
// }
//
// /* Searching */
// j = 0;
// while (j <= n - m) {
// c = y[i+j];
// if (x[m-1] == c)
// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
// if (i < 0) return j;
// if (c < ASIZE)
! // j = j - bc[y[j+m-1]] + m;
// else
! // j += 1; // Advance by 1 only if char >= ASIZE
// }
// }
if (icnt1 == -1) {
! BIND(BM);
!
! Label ZLOOP, BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP;
! Label BMADV, BMMATCH, BMCHECKEND;
!
Register cnt1end = tmp2;
Register str2end = cnt2;
Register skipch = tmp2;
! // Restrict ASIZE to 128 to reduce stack space/initialisation.
! // The presence of chars >= ASIZE in the target string does not affect
! // performance, but we must be careful not to initialise them in the stack
! // array.
! // The presence of chars >= ASIZE in the source string may adversely affect
! // performance since we can only advance by one when we encounter one.
!
! stp(zr, zr, pre(sp, -128));
! for (int i = 1; i < 8; i++)
! stp(zr, zr, Address(sp, i*16));
! mov(cnt1tmp, 0);
! sub(cnt1end, cnt1, 1);
BIND(BCLOOP);
! (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
! cmp(ch1, 128);
! add(cnt1tmp, cnt1tmp, 1);
br(HS, BCSKIP);
! strb(cnt1tmp, Address(sp, ch1));
BIND(BCSKIP);
! cmp(cnt1tmp, cnt1end);
! br(LT, BCLOOP);
!
! mov(result_tmp, str2);
! sub(cnt2, cnt2, cnt1);
! add(str2end, str2, cnt2, LSL, str2_chr_shift);
BIND(BMLOOPSTR2);
- sub(cnt1tmp, cnt1, 1);
- (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
! cmp(ch1, skipch);
br(NE, BMSKIP);
! subs(cnt1tmp, cnt1tmp, 1);
! br(LT, BMMATCH);
BIND(BMLOOPSTR1);
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
! cmp(ch1, ch2);
! br(NE, BMSKIP);
subs(cnt1tmp, cnt1tmp, 1);
! br(GE, BMLOOPSTR1);
! BIND(BMMATCH);
! sub(result, str2, result_tmp);
! if (!str2_isL) lsr(result, result, 1);
! add(sp, sp, 128);
! b(DONE);
! BIND(BMADV);
! add(str2, str2, str2_chr_size);
! b(BMCHECKEND);
BIND(BMSKIP);
! cmp(skipch, 128);
br(HS, BMADV);
! ldrb(ch2, Address(sp, skipch));
! add(str2, str2, cnt1, LSL, str2_chr_shift);
! sub(str2, str2, ch2, LSL, str2_chr_shift);
! BIND(BMCHECKEND);
cmp(str2, str2end);
br(LE, BMLOOPSTR2);
! add(sp, sp, 128);
b(NOMATCH);
}
BIND(LINEARSEARCH);
{
Label DO1, DO2, DO3;
--- 4438,4631 ----
//
// This is also known as the Boyer-Moore-Horspool algorithm:-
//
// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
//
! // This particular implementation has few java-specific optimizations.
! //
! // #define ASIZE 256
//
// int bm(unsigned char *x, int m, unsigned char *y, int n) {
// int i, j;
// unsigned c;
// unsigned char bc[ASIZE];
//
// /* Preprocessing */
// for (i = 0; i < ASIZE; ++i)
! // bc[i] = m;
// for (i = 0; i < m - 1; ) {
// c = x[i];
// ++i;
! // // c < 256 for Latin1 string, so, no need for branch
! // #ifdef PATTERN_STRING_IS_LATIN1
! // bc[c] = m - i;
! // #else
! // if (c < ASIZE) bc[c] = m - i;
! // #endif
// }
//
// /* Searching */
// j = 0;
// while (j <= n - m) {
// c = y[i+j];
// if (x[m-1] == c)
// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
// if (i < 0) return j;
+ // // c < 256 for Latin1 string, so, no need for branch
+ // #ifdef SOURCE_STRING_IS_LATIN1
+ // // LL case: (c< 256) always true. Remove branch
+ // j += bc[y[j+m-1]];
+ // #endif
+ // #ifndef PATTERN_STRING_IS_UTF
+ // // UU case: need if (c<ASIZE) check. Skip 1 character if not.
+ // if (c < ASIZE)
+ // j += bc[y[j+m-1]];
+ // else
+ // j += 1
+ // #endif
+ // #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
+ // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
// if (c < ASIZE)
! // j += bc[y[j+m-1]];
// else
! // j += m
! // #endif
// }
// }
if (icnt1 == -1) {
! Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
! BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
Register cnt1end = tmp2;
Register str2end = cnt2;
Register skipch = tmp2;
! // str1 length is >=8, so, we can read at least 1 register for cases when
! // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
! // UL case. We'll re-read last character in inner pre-loop code to have
! // single outer pre-loop load
! const int firstStep = isL ? 7 : 3;
!
! const int ASIZE = 256;
! const int STORED_BYTES = 32; // amount of bytes stored per instruction
! sub(sp, sp, ASIZE);
! mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
! mov(ch1, sp);
! BIND(BM_INIT_LOOP);
! stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
! subs(tmp5, tmp5, 1);
! br(GT, BM_INIT_LOOP);
! sub(cnt1tmp, cnt1, 1);
! mov(tmp5, str2);
! add(str2end, str2, result_tmp, LSL, str2_chr_shift);
! sub(ch2, cnt1, 1);
! mov(tmp3, str1);
BIND(BCLOOP);
! (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
! if (!str1_isL) {
! cmp(ch1, ASIZE);
br(HS, BCSKIP);
! }
! strb(ch2, Address(sp, ch1));
BIND(BCSKIP);
! subs(ch2, ch2, 1);
! br(GT, BCLOOP);
! add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
! if (str1_isL == str2_isL) {
! // load last 8 bytes (8LL/4UU symbols)
! ldr(tmp6, Address(tmp6, -wordSize));
! } else {
! ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
! // convert Latin1 to UTF. We'll have to wait until load completed, but
! // it's still faster than per-character loads+checks
! lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
! ubfx(ch1, tmp6, 8, 8); // str1[N-2]
! ubfx(ch2, tmp6, 16, 8); // str1[N-3]
! andr(tmp6, tmp6, 0xFF); // str1[N-4]
! orr(ch2, ch1, ch2, LSL, 16);
! orr(tmp6, tmp6, tmp3, LSL, 48);
! orr(tmp6, tmp6, ch2, LSL, 16);
! }
BIND(BMLOOPSTR2);
(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
! sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
! if (str1_isL == str2_isL) {
! // re-init tmp3. It's for free because it's executed in parallel with
! // load above. Alternative is to initialize it before loop, but it'll
! // affect performance on in-order systems with 2 or more ld/st pipelines
! lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
! }
! if (!isL) { // UU/UL case
! lsl(ch2, cnt1tmp, 1); // offset in bytes
! }
! cmp(tmp3, skipch);
br(NE, BMSKIP);
! ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
! mov(ch1, tmp6);
! if (isL) {
! b(BMLOOPSTR1_AFTER_LOAD);
! } else {
! sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
! b(BMLOOPSTR1_CMP);
! }
BIND(BMLOOPSTR1);
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
! BIND(BMLOOPSTR1_AFTER_LOAD);
subs(cnt1tmp, cnt1tmp, 1);
! br(LT, BMLOOPSTR1_LASTCMP);
! BIND(BMLOOPSTR1_CMP);
! cmp(ch1, ch2);
! br(EQ, BMLOOPSTR1);
BIND(BMSKIP);
! if (!isL) {
! // if we've met UTF symbol while searching Latin1 pattern, then we can
! // skip cnt1 symbols
! if (str1_isL != str2_isL) {
! mov(result_tmp, cnt1);
! } else {
! mov(result_tmp, 1);
! }
! cmp(skipch, ASIZE);
br(HS, BMADV);
! }
! ldrb(result_tmp, Address(sp, skipch)); // load skip distance
! BIND(BMADV);
! sub(cnt1tmp, cnt1, 1);
! add(str2, str2, result_tmp, LSL, str2_chr_shift);
cmp(str2, str2end);
br(LE, BMLOOPSTR2);
! add(sp, sp, ASIZE);
b(NOMATCH);
+ BIND(BMLOOPSTR1_LASTCMP);
+ cmp(ch1, ch2);
+ br(NE, BMSKIP);
+ BIND(BMMATCH);
+ sub(result, str2, tmp5);
+ if (!str2_isL) lsr(result, result, 1);
+ add(sp, sp, ASIZE);
+ b(DONE);
+
+ BIND(LINEARSTUB);
+ cmp(cnt1, 16); // small patterns still should be handled by simple algorithm
+ br(LT, LINEAR_MEDIUM);
+ mov(result, zr);
+ RuntimeAddress stub = NULL;
+ if (isL) {
+ stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
+ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
+ } else if (str1_isL) {
+ stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
+ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
+ } else {
+ stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
+ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
+ }
+ trampoline_call(stub);
+ b(DONE);
}
BIND(LINEARSEARCH);
{
Label DO1, DO2, DO3;
*** 4549,4567 ****
{
Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
cmp(cnt1, str1_isL == str2_isL ? 4 : 2);
br(LT, DOSHORT);
!
! sub(cnt2, cnt2, cnt1);
! mov(result_tmp, cnt2);
!
lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
- lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
! sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
! (this->*str1_load_1chr)(first, Address(str1, cnt1_neg));
BIND(FIRST_LOOP);
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
cmp(first, ch2);
br(EQ, STR1_LOOP);
--- 4637,4652 ----
{
Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
cmp(cnt1, str1_isL == str2_isL ? 4 : 2);
br(LT, DOSHORT);
! BIND(LINEAR_MEDIUM);
! (this->*str1_load_1chr)(first, Address(str1));
lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
! lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
! sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(FIRST_LOOP);
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
cmp(first, ch2);
br(EQ, STR1_LOOP);
*** 4595,4608 ****
if (icnt1 == 4) {
Label CH1_LOOP;
(this->*load_4chr)(ch1, str1);
! sub(cnt2, cnt2, 4);
! mov(result_tmp, cnt2);
! lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
! sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
BIND(CH1_LOOP);
(this->*load_4chr)(ch2, Address(str2, cnt2_neg));
cmp(ch1, ch2);
br(EQ, MATCH);
--- 4680,4692 ----
if (icnt1 == 4) {
Label CH1_LOOP;
(this->*load_4chr)(ch1, str1);
! sub(result_tmp, cnt2, 4);
! lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
! sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(CH1_LOOP);
(this->*load_4chr)(ch2, Address(str2, cnt2_neg));
cmp(ch1, ch2);
br(EQ, MATCH);
*** 4614,4628 ****
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
Label CH1_LOOP;
BIND(DO2);
(this->*load_2chr)(ch1, str1);
! sub(cnt2, cnt2, 2);
! mov(result_tmp, cnt2);
! lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
! sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
!
BIND(CH1_LOOP);
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
cmp(ch1, ch2);
br(EQ, MATCH);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
--- 4698,4712 ----
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
Label CH1_LOOP;
BIND(DO2);
(this->*load_2chr)(ch1, str1);
! if (icnt1 == 2) {
! sub(result_tmp, cnt2, 2);
! }
! lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
! sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(CH1_LOOP);
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
cmp(ch1, ch2);
br(EQ, MATCH);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
*** 4634,4649 ****
Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
BIND(DO3);
(this->*load_2chr)(first, str1);
(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
!
! sub(cnt2, cnt2, 3);
! mov(result_tmp, cnt2);
! lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
! sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
!
BIND(FIRST_LOOP);
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
cmpw(first, ch2);
br(EQ, STR1_LOOP);
BIND(STR2_NEXT);
--- 4718,4732 ----
Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
BIND(DO3);
(this->*load_2chr)(first, str1);
(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
! if (icnt1 == 3) {
! sub(result_tmp, cnt2, 3);
! }
! lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
! sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(FIRST_LOOP);
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
cmpw(first, ch2);
br(EQ, STR1_LOOP);
BIND(STR2_NEXT);
*** 4658,4691 ****
br(NE, STR2_NEXT);
b(MATCH);
}
if (icnt1 == -1 || icnt1 == 1) {
! Label CH1_LOOP, HAS_ZERO;
! Label DO1_SHORT, DO1_LOOP;
BIND(DO1);
(this->*str1_load_1chr)(ch1, str1);
cmp(cnt2, 8);
br(LT, DO1_SHORT);
if (str2_isL) {
- if (!str1_isL) {
- tst(ch1, 0xff00);
- br(NE, NOMATCH);
- }
orr(ch1, ch1, ch1, LSL, 8);
}
orr(ch1, ch1, ch1, LSL, 16);
orr(ch1, ch1, ch1, LSL, 32);
-
- sub(cnt2, cnt2, 8/str2_chr_size);
- mov(result_tmp, cnt2);
- lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
- sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
-
- mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
BIND(CH1_LOOP);
ldr(ch2, Address(str2, cnt2_neg));
eor(ch2, ch1, ch2);
sub(tmp1, ch2, tmp3);
orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
--- 4741,4767 ----
br(NE, STR2_NEXT);
b(MATCH);
}
if (icnt1 == -1 || icnt1 == 1) {
! Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
BIND(DO1);
(this->*str1_load_1chr)(ch1, str1);
cmp(cnt2, 8);
br(LT, DO1_SHORT);
+ sub(result_tmp, cnt2, 8/str2_chr_size);
+ sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
+ mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
+ lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
+
if (str2_isL) {
orr(ch1, ch1, ch1, LSL, 8);
}
orr(ch1, ch1, ch1, LSL, 16);
orr(ch1, ch1, ch1, LSL, 32);
BIND(CH1_LOOP);
ldr(ch2, Address(str2, cnt2_neg));
eor(ch2, ch1, ch2);
sub(tmp1, ch2, tmp3);
orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
< prev index next >