1 /* 2 * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "opto/c2_MacroAssembler.hpp" 29 #include "opto/intrinsicnode.hpp" 30 31 #ifdef PRODUCT 32 #define BLOCK_COMMENT(str) /* nothing */ 33 #define STOP(error) stop(error) 34 #else 35 #define BLOCK_COMMENT(str) block_comment(str) 36 #define STOP(error) block_comment(error); stop(error) 37 #endif 38 39 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 40 41 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); 42 43 // Search for str1 in str2 and return index or -1 44 void C2_MacroAssembler::string_indexof(Register str2, Register str1, 45 Register cnt2, Register cnt1, 46 Register tmp1, Register tmp2, 47 Register tmp3, Register tmp4, 48 Register tmp5, Register tmp6, 49 int icnt1, Register result, int ae) { 50 // NOTE: tmp5, tmp6 can be zr depending on specific method version 51 Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH; 52 53 Register ch1 = rscratch1; 54 Register ch2 = rscratch2; 55 Register cnt1tmp = tmp1; 56 Register cnt2tmp = tmp2; 57 Register cnt1_neg = cnt1; 58 Register cnt2_neg = cnt2; 59 Register result_tmp = tmp4; 60 61 bool isL = ae == StrIntrinsicNode::LL; 62 63 bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; 64 bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; 65 int str1_chr_shift = str1_isL ? 0:1; 66 int str2_chr_shift = str2_isL ? 0:1; 67 int str1_chr_size = str1_isL ? 1:2; 68 int str2_chr_size = str2_isL ? 1:2; 69 chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb : 70 (chr_insn)&MacroAssembler::ldrh; 71 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb : 72 (chr_insn)&MacroAssembler::ldrh; 73 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw; 74 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr; 75 76 // Note, inline_string_indexOf() generates checks: 77 // if (substr.count > string.count) return -1; 78 // if (substr.count == 0) return 0; 79 80 // We have two strings, a source string in str2, cnt2 and a pattern string 81 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1. 82 83 // For larger pattern and source we use a simplified Boyer Moore algorithm. 84 // With a small pattern and source we use linear scan. 85 86 if (icnt1 == -1) { 87 sub(result_tmp, cnt2, cnt1); 88 cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 89 br(LT, LINEARSEARCH); 90 dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty 91 subs(zr, cnt1, 256); 92 lsr(tmp1, cnt2, 2); 93 ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM 94 br(GE, LINEARSTUB); 95 } 96 97 // The Boyer Moore alogorithm is based on the description here:- 98 // 99 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm 100 // 101 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule 102 // and the 'Good Suffix' rule. 103 // 104 // These rules are essentially heuristics for how far we can shift the 105 // pattern along the search string. 106 // 107 // The implementation here uses the 'Bad Character' rule only because of the 108 // complexity of initialisation for the 'Good Suffix' rule. 109 // 110 // This is also known as the Boyer-Moore-Horspool algorithm:- 111 // 112 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm 113 // 114 // This particular implementation has few java-specific optimizations. 115 // 116 // #define ASIZE 256 117 // 118 // int bm(unsigned char *x, int m, unsigned char *y, int n) { 119 // int i, j; 120 // unsigned c; 121 // unsigned char bc[ASIZE]; 122 // 123 // /* Preprocessing */ 124 // for (i = 0; i < ASIZE; ++i) 125 // bc[i] = m; 126 // for (i = 0; i < m - 1; ) { 127 // c = x[i]; 128 // ++i; 129 // // c < 256 for Latin1 string, so, no need for branch 130 // #ifdef PATTERN_STRING_IS_LATIN1 131 // bc[c] = m - i; 132 // #else 133 // if (c < ASIZE) bc[c] = m - i; 134 // #endif 135 // } 136 // 137 // /* Searching */ 138 // j = 0; 139 // while (j <= n - m) { 140 // c = y[i+j]; 141 // if (x[m-1] == c) 142 // for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); 143 // if (i < 0) return j; 144 // // c < 256 for Latin1 string, so, no need for branch 145 // #ifdef SOURCE_STRING_IS_LATIN1 146 // // LL case: (c< 256) always true. Remove branch 147 // j += bc[y[j+m-1]]; 148 // #endif 149 // #ifndef PATTERN_STRING_IS_UTF 150 // // UU case: need if (c<ASIZE) check. Skip 1 character if not. 151 // if (c < ASIZE) 152 // j += bc[y[j+m-1]]; 153 // else 154 // j += 1 155 // #endif 156 // #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF 157 // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not. 158 // if (c < ASIZE) 159 // j += bc[y[j+m-1]]; 160 // else 161 // j += m 162 // #endif 163 // } 164 // } 165 166 if (icnt1 == -1) { 167 Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, 168 BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; 169 Register cnt1end = tmp2; 170 Register str2end = cnt2; 171 Register skipch = tmp2; 172 173 // str1 length is >=8, so, we can read at least 1 register for cases when 174 // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for 175 // UL case. We'll re-read last character in inner pre-loop code to have 176 // single outer pre-loop load 177 const int firstStep = isL ? 7 : 3; 178 179 const int ASIZE = 256; 180 const int STORED_BYTES = 32; // amount of bytes stored per instruction 181 sub(sp, sp, ASIZE); 182 mov(tmp5, ASIZE/STORED_BYTES); // loop iterations 183 mov(ch1, sp); 184 BIND(BM_INIT_LOOP); 185 stpq(v0, v0, Address(post(ch1, STORED_BYTES))); 186 subs(tmp5, tmp5, 1); 187 br(GT, BM_INIT_LOOP); 188 189 sub(cnt1tmp, cnt1, 1); 190 mov(tmp5, str2); 191 add(str2end, str2, result_tmp, LSL, str2_chr_shift); 192 sub(ch2, cnt1, 1); 193 mov(tmp3, str1); 194 BIND(BCLOOP); 195 (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size))); 196 if (!str1_isL) { 197 subs(zr, ch1, ASIZE); 198 br(HS, BCSKIP); 199 } 200 strb(ch2, Address(sp, ch1)); 201 BIND(BCSKIP); 202 subs(ch2, ch2, 1); 203 br(GT, BCLOOP); 204 205 add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1 206 if (str1_isL == str2_isL) { 207 // load last 8 bytes (8LL/4UU symbols) 208 ldr(tmp6, Address(tmp6, -wordSize)); 209 } else { 210 ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols) 211 // convert Latin1 to UTF. We'll have to wait until load completed, but 212 // it's still faster than per-character loads+checks 213 lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1] 214 ubfx(ch1, tmp6, 8, 8); // str1[N-2] 215 ubfx(ch2, tmp6, 16, 8); // str1[N-3] 216 andr(tmp6, tmp6, 0xFF); // str1[N-4] 217 orr(ch2, ch1, ch2, LSL, 16); 218 orr(tmp6, tmp6, tmp3, LSL, 48); 219 orr(tmp6, tmp6, ch2, LSL, 16); 220 } 221 BIND(BMLOOPSTR2); 222 (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 223 sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8 224 if (str1_isL == str2_isL) { 225 // re-init tmp3. It's for free because it's executed in parallel with 226 // load above. Alternative is to initialize it before loop, but it'll 227 // affect performance on in-order systems with 2 or more ld/st pipelines 228 lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size)); 229 } 230 if (!isL) { // UU/UL case 231 lsl(ch2, cnt1tmp, 1); // offset in bytes 232 } 233 cmp(tmp3, skipch); 234 br(NE, BMSKIP); 235 ldr(ch2, Address(str2, isL ? cnt1tmp : ch2)); 236 mov(ch1, tmp6); 237 if (isL) { 238 b(BMLOOPSTR1_AFTER_LOAD); 239 } else { 240 sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 241 b(BMLOOPSTR1_CMP); 242 } 243 BIND(BMLOOPSTR1); 244 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift))); 245 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 246 BIND(BMLOOPSTR1_AFTER_LOAD); 247 subs(cnt1tmp, cnt1tmp, 1); 248 br(LT, BMLOOPSTR1_LASTCMP); 249 BIND(BMLOOPSTR1_CMP); 250 cmp(ch1, ch2); 251 br(EQ, BMLOOPSTR1); 252 BIND(BMSKIP); 253 if (!isL) { 254 // if we've met UTF symbol while searching Latin1 pattern, then we can 255 // skip cnt1 symbols 256 if (str1_isL != str2_isL) { 257 mov(result_tmp, cnt1); 258 } else { 259 mov(result_tmp, 1); 260 } 261 subs(zr, skipch, ASIZE); 262 br(HS, BMADV); 263 } 264 ldrb(result_tmp, Address(sp, skipch)); // load skip distance 265 BIND(BMADV); 266 sub(cnt1tmp, cnt1, 1); 267 add(str2, str2, result_tmp, LSL, str2_chr_shift); 268 cmp(str2, str2end); 269 br(LE, BMLOOPSTR2); 270 add(sp, sp, ASIZE); 271 b(NOMATCH); 272 BIND(BMLOOPSTR1_LASTCMP); 273 cmp(ch1, ch2); 274 br(NE, BMSKIP); 275 BIND(BMMATCH); 276 sub(result, str2, tmp5); 277 if (!str2_isL) lsr(result, result, 1); 278 add(sp, sp, ASIZE); 279 b(DONE); 280 281 BIND(LINEARSTUB); 282 cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm 283 br(LT, LINEAR_MEDIUM); 284 mov(result, zr); 285 RuntimeAddress stub = NULL; 286 if (isL) { 287 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll()); 288 assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); 289 } else if (str1_isL) { 290 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul()); 291 assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); 292 } else { 293 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu()); 294 assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); 295 } 296 trampoline_call(stub); 297 b(DONE); 298 } 299 300 BIND(LINEARSEARCH); 301 { 302 Label DO1, DO2, DO3; 303 304 Register str2tmp = tmp2; 305 Register first = tmp3; 306 307 if (icnt1 == -1) 308 { 309 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; 310 311 cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2)); 312 br(LT, DOSHORT); 313 BIND(LINEAR_MEDIUM); 314 (this->*str1_load_1chr)(first, Address(str1)); 315 lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift))); 316 sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift); 317 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 318 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 319 320 BIND(FIRST_LOOP); 321 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg)); 322 cmp(first, ch2); 323 br(EQ, STR1_LOOP); 324 BIND(STR2_NEXT); 325 adds(cnt2_neg, cnt2_neg, str2_chr_size); 326 br(LE, FIRST_LOOP); 327 b(NOMATCH); 328 329 BIND(STR1_LOOP); 330 adds(cnt1tmp, cnt1_neg, str1_chr_size); 331 add(cnt2tmp, cnt2_neg, str2_chr_size); 332 br(GE, MATCH); 333 334 BIND(STR1_NEXT); 335 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp)); 336 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 337 cmp(ch1, ch2); 338 br(NE, STR2_NEXT); 339 adds(cnt1tmp, cnt1tmp, str1_chr_size); 340 add(cnt2tmp, cnt2tmp, str2_chr_size); 341 br(LT, STR1_NEXT); 342 b(MATCH); 343 344 BIND(DOSHORT); 345 if (str1_isL == str2_isL) { 346 cmp(cnt1, (u1)2); 347 br(LT, DO1); 348 br(GT, DO3); 349 } 350 } 351 352 if (icnt1 == 4) { 353 Label CH1_LOOP; 354 355 (this->*load_4chr)(ch1, str1); 356 sub(result_tmp, cnt2, 4); 357 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 358 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 359 360 BIND(CH1_LOOP); 361 (this->*load_4chr)(ch2, Address(str2, cnt2_neg)); 362 cmp(ch1, ch2); 363 br(EQ, MATCH); 364 adds(cnt2_neg, cnt2_neg, str2_chr_size); 365 br(LE, CH1_LOOP); 366 b(NOMATCH); 367 } 368 369 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) { 370 Label CH1_LOOP; 371 372 BIND(DO2); 373 (this->*load_2chr)(ch1, str1); 374 if (icnt1 == 2) { 375 sub(result_tmp, cnt2, 2); 376 } 377 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 378 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 379 BIND(CH1_LOOP); 380 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 381 cmp(ch1, ch2); 382 br(EQ, MATCH); 383 adds(cnt2_neg, cnt2_neg, str2_chr_size); 384 br(LE, CH1_LOOP); 385 b(NOMATCH); 386 } 387 388 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) { 389 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; 390 391 BIND(DO3); 392 (this->*load_2chr)(first, str1); 393 (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size)); 394 if (icnt1 == 3) { 395 sub(result_tmp, cnt2, 3); 396 } 397 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 398 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 399 BIND(FIRST_LOOP); 400 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 401 cmpw(first, ch2); 402 br(EQ, STR1_LOOP); 403 BIND(STR2_NEXT); 404 adds(cnt2_neg, cnt2_neg, str2_chr_size); 405 br(LE, FIRST_LOOP); 406 b(NOMATCH); 407 408 BIND(STR1_LOOP); 409 add(cnt2tmp, cnt2_neg, 2*str2_chr_size); 410 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 411 cmp(ch1, ch2); 412 br(NE, STR2_NEXT); 413 b(MATCH); 414 } 415 416 if (icnt1 == -1 || icnt1 == 1) { 417 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP; 418 419 BIND(DO1); 420 (this->*str1_load_1chr)(ch1, str1); 421 cmp(cnt2, (u1)8); 422 br(LT, DO1_SHORT); 423 424 sub(result_tmp, cnt2, 8/str2_chr_size); 425 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 426 mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001); 427 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 428 429 if (str2_isL) { 430 orr(ch1, ch1, ch1, LSL, 8); 431 } 432 orr(ch1, ch1, ch1, LSL, 16); 433 orr(ch1, ch1, ch1, LSL, 32); 434 BIND(CH1_LOOP); 435 ldr(ch2, Address(str2, cnt2_neg)); 436 eor(ch2, ch1, ch2); 437 sub(tmp1, ch2, tmp3); 438 orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 439 bics(tmp1, tmp1, tmp2); 440 br(NE, HAS_ZERO); 441 adds(cnt2_neg, cnt2_neg, 8); 442 br(LT, CH1_LOOP); 443 444 cmp(cnt2_neg, (u1)8); 445 mov(cnt2_neg, 0); 446 br(LT, CH1_LOOP); 447 b(NOMATCH); 448 449 BIND(HAS_ZERO); 450 rev(tmp1, tmp1); 451 clz(tmp1, tmp1); 452 add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); 453 b(MATCH); 454 455 BIND(DO1_SHORT); 456 mov(result_tmp, cnt2); 457 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift))); 458 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift); 459 BIND(DO1_LOOP); 460 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg)); 461 cmpw(ch1, ch2); 462 br(EQ, MATCH); 463 adds(cnt2_neg, cnt2_neg, str2_chr_size); 464 br(LT, DO1_LOOP); 465 } 466 } 467 BIND(NOMATCH); 468 mov(result, -1); 469 b(DONE); 470 BIND(MATCH); 471 add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift); 472 BIND(DONE); 473 } 474 475 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); 476 typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn); 477 478 void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, 479 Register ch, Register result, 480 Register tmp1, Register tmp2, Register tmp3) 481 { 482 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; 483 Register cnt1_neg = cnt1; 484 Register ch1 = rscratch1; 485 Register result_tmp = rscratch2; 486 487 cbz(cnt1, NOMATCH); 488 489 cmp(cnt1, (u1)4); 490 br(LT, DO1_SHORT); 491 492 orr(ch, ch, ch, LSL, 16); 493 orr(ch, ch, ch, LSL, 32); 494 495 sub(cnt1, cnt1, 4); 496 mov(result_tmp, cnt1); 497 lea(str1, Address(str1, cnt1, Address::uxtw(1))); 498 sub(cnt1_neg, zr, cnt1, LSL, 1); 499 500 mov(tmp3, 0x0001000100010001); 501 502 BIND(CH1_LOOP); 503 ldr(ch1, Address(str1, cnt1_neg)); 504 eor(ch1, ch, ch1); 505 sub(tmp1, ch1, tmp3); 506 orr(tmp2, ch1, 0x7fff7fff7fff7fff); 507 bics(tmp1, tmp1, tmp2); 508 br(NE, HAS_ZERO); 509 adds(cnt1_neg, cnt1_neg, 8); 510 br(LT, CH1_LOOP); 511 512 cmp(cnt1_neg, (u1)8); 513 mov(cnt1_neg, 0); 514 br(LT, CH1_LOOP); 515 b(NOMATCH); 516 517 BIND(HAS_ZERO); 518 rev(tmp1, tmp1); 519 clz(tmp1, tmp1); 520 add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); 521 b(MATCH); 522 523 BIND(DO1_SHORT); 524 mov(result_tmp, cnt1); 525 lea(str1, Address(str1, cnt1, Address::uxtw(1))); 526 sub(cnt1_neg, zr, cnt1, LSL, 1); 527 BIND(DO1_LOOP); 528 ldrh(ch1, Address(str1, cnt1_neg)); 529 cmpw(ch, ch1); 530 br(EQ, MATCH); 531 adds(cnt1_neg, cnt1_neg, 2); 532 br(LT, DO1_LOOP); 533 BIND(NOMATCH); 534 mov(result, -1); 535 b(DONE); 536 BIND(MATCH); 537 add(result, result_tmp, cnt1_neg, ASR, 1); 538 BIND(DONE); 539 } 540 541 // Compare strings. 542 void C2_MacroAssembler::string_compare(Register str1, Register str2, 543 Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, 544 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) { 545 Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, 546 DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, 547 SHORT_LOOP_START, TAIL_CHECK; 548 549 bool isLL = ae == StrIntrinsicNode::LL; 550 bool isLU = ae == StrIntrinsicNode::LU; 551 bool isUL = ae == StrIntrinsicNode::UL; 552 553 // The stub threshold for LL strings is: 72 (64 + 8) chars 554 // UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch) 555 // LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least) 556 const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36); 557 558 bool str1_isL = isLL || isLU; 559 bool str2_isL = isLL || isUL; 560 561 int str1_chr_shift = str1_isL ? 0 : 1; 562 int str2_chr_shift = str2_isL ? 0 : 1; 563 int str1_chr_size = str1_isL ? 1 : 2; 564 int str2_chr_size = str2_isL ? 1 : 2; 565 int minCharsInWord = isLL ? wordSize : wordSize/2; 566 567 FloatRegister vtmpZ = vtmp1, vtmp = vtmp2; 568 chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb : 569 (chr_insn)&MacroAssembler::ldrh; 570 chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb : 571 (chr_insn)&MacroAssembler::ldrh; 572 uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw : 573 (uxt_insn)&MacroAssembler::uxthw; 574 575 BLOCK_COMMENT("string_compare {"); 576 577 // Bizzarely, the counts are passed in bytes, regardless of whether they 578 // are L or U strings, however the result is always in characters. 579 if (!str1_isL) asrw(cnt1, cnt1, 1); 580 if (!str2_isL) asrw(cnt2, cnt2, 1); 581 582 // Compute the minimum of the string lengths and save the difference. 583 subsw(result, cnt1, cnt2); 584 cselw(cnt2, cnt1, cnt2, Assembler::LE); // min 585 586 // A very short string 587 cmpw(cnt2, minCharsInWord); 588 br(Assembler::LE, SHORT_STRING); 589 590 // Compare longwords 591 // load first parts of strings and finish initialization while loading 592 { 593 if (str1_isL == str2_isL) { // LL or UU 594 ldr(tmp1, Address(str1)); 595 cmp(str1, str2); 596 br(Assembler::EQ, DONE); 597 ldr(tmp2, Address(str2)); 598 cmp(cnt2, stub_threshold); 599 br(GE, STUB); 600 subsw(cnt2, cnt2, minCharsInWord); 601 br(EQ, TAIL_CHECK); 602 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 603 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 604 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 605 } else if (isLU) { 606 ldrs(vtmp, Address(str1)); 607 ldr(tmp2, Address(str2)); 608 cmp(cnt2, stub_threshold); 609 br(GE, STUB); 610 subw(cnt2, cnt2, 4); 611 eor(vtmpZ, T16B, vtmpZ, vtmpZ); 612 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 613 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 614 zip1(vtmp, T8B, vtmp, vtmpZ); 615 sub(cnt1, zr, cnt2, LSL, str1_chr_shift); 616 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 617 add(cnt1, cnt1, 4); 618 fmovd(tmp1, vtmp); 619 } else { // UL case 620 ldr(tmp1, Address(str1)); 621 ldrs(vtmp, Address(str2)); 622 cmp(cnt2, stub_threshold); 623 br(GE, STUB); 624 subw(cnt2, cnt2, 4); 625 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 626 eor(vtmpZ, T16B, vtmpZ, vtmpZ); 627 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 628 sub(cnt1, zr, cnt2, LSL, str1_chr_shift); 629 zip1(vtmp, T8B, vtmp, vtmpZ); 630 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 631 add(cnt1, cnt1, 8); 632 fmovd(tmp2, vtmp); 633 } 634 adds(cnt2, cnt2, isUL ? 4 : 8); 635 br(GE, TAIL); 636 eor(rscratch2, tmp1, tmp2); 637 cbnz(rscratch2, DIFF); 638 // main loop 639 bind(NEXT_WORD); 640 if (str1_isL == str2_isL) { 641 ldr(tmp1, Address(str1, cnt2)); 642 ldr(tmp2, Address(str2, cnt2)); 643 adds(cnt2, cnt2, 8); 644 } else if (isLU) { 645 ldrs(vtmp, Address(str1, cnt1)); 646 ldr(tmp2, Address(str2, cnt2)); 647 add(cnt1, cnt1, 4); 648 zip1(vtmp, T8B, vtmp, vtmpZ); 649 fmovd(tmp1, vtmp); 650 adds(cnt2, cnt2, 8); 651 } else { // UL 652 ldrs(vtmp, Address(str2, cnt2)); 653 ldr(tmp1, Address(str1, cnt1)); 654 zip1(vtmp, T8B, vtmp, vtmpZ); 655 add(cnt1, cnt1, 8); 656 fmovd(tmp2, vtmp); 657 adds(cnt2, cnt2, 4); 658 } 659 br(GE, TAIL); 660 661 eor(rscratch2, tmp1, tmp2); 662 cbz(rscratch2, NEXT_WORD); 663 b(DIFF); 664 bind(TAIL); 665 eor(rscratch2, tmp1, tmp2); 666 cbnz(rscratch2, DIFF); 667 // Last longword. In the case where length == 4 we compare the 668 // same longword twice, but that's still faster than another 669 // conditional branch. 670 if (str1_isL == str2_isL) { 671 ldr(tmp1, Address(str1)); 672 ldr(tmp2, Address(str2)); 673 } else if (isLU) { 674 ldrs(vtmp, Address(str1)); 675 ldr(tmp2, Address(str2)); 676 zip1(vtmp, T8B, vtmp, vtmpZ); 677 fmovd(tmp1, vtmp); 678 } else { // UL 679 ldrs(vtmp, Address(str2)); 680 ldr(tmp1, Address(str1)); 681 zip1(vtmp, T8B, vtmp, vtmpZ); 682 fmovd(tmp2, vtmp); 683 } 684 bind(TAIL_CHECK); 685 eor(rscratch2, tmp1, tmp2); 686 cbz(rscratch2, DONE); 687 688 // Find the first different characters in the longwords and 689 // compute their difference. 690 bind(DIFF); 691 rev(rscratch2, rscratch2); 692 clz(rscratch2, rscratch2); 693 andr(rscratch2, rscratch2, isLL ? -8 : -16); 694 lsrv(tmp1, tmp1, rscratch2); 695 (this->*ext_chr)(tmp1, tmp1); 696 lsrv(tmp2, tmp2, rscratch2); 697 (this->*ext_chr)(tmp2, tmp2); 698 subw(result, tmp1, tmp2); 699 b(DONE); 700 } 701 702 bind(STUB); 703 RuntimeAddress stub = NULL; 704 switch(ae) { 705 case StrIntrinsicNode::LL: 706 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL()); 707 break; 708 case StrIntrinsicNode::UU: 709 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU()); 710 break; 711 case StrIntrinsicNode::LU: 712 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU()); 713 break; 714 case StrIntrinsicNode::UL: 715 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL()); 716 break; 717 default: 718 ShouldNotReachHere(); 719 } 720 assert(stub.target() != NULL, "compare_long_string stub has not been generated"); 721 trampoline_call(stub); 722 b(DONE); 723 724 bind(SHORT_STRING); 725 // Is the minimum length zero? 726 cbz(cnt2, DONE); 727 // arrange code to do most branches while loading and loading next characters 728 // while comparing previous 729 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size))); 730 subs(cnt2, cnt2, 1); 731 br(EQ, SHORT_LAST_INIT); 732 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 733 b(SHORT_LOOP_START); 734 bind(SHORT_LOOP); 735 subs(cnt2, cnt2, 1); 736 br(EQ, SHORT_LAST); 737 bind(SHORT_LOOP_START); 738 (this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size))); 739 (this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size))); 740 cmp(tmp1, cnt1); 741 br(NE, SHORT_LOOP_TAIL); 742 subs(cnt2, cnt2, 1); 743 br(EQ, SHORT_LAST2); 744 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size))); 745 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 746 cmp(tmp2, rscratch1); 747 br(EQ, SHORT_LOOP); 748 sub(result, tmp2, rscratch1); 749 b(DONE); 750 bind(SHORT_LOOP_TAIL); 751 sub(result, tmp1, cnt1); 752 b(DONE); 753 bind(SHORT_LAST2); 754 cmp(tmp2, rscratch1); 755 br(EQ, DONE); 756 sub(result, tmp2, rscratch1); 757 758 b(DONE); 759 bind(SHORT_LAST_INIT); 760 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 761 bind(SHORT_LAST); 762 cmp(tmp1, cnt1); 763 br(EQ, DONE); 764 sub(result, tmp1, cnt1); 765 766 bind(DONE); 767 768 BLOCK_COMMENT("} string_compare"); 769 }