1 /* 2 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.graalvm.compiler.asm.amd64; 24 25 import static jdk.vm.ci.amd64.AMD64.rax; 26 import static jdk.vm.ci.amd64.AMD64.rcx; 27 import static jdk.vm.ci.amd64.AMD64.rdx; 28 import static jdk.vm.ci.amd64.AMD64.rsp; 29 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIncDec; 30 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmLoadAndClearUpper; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmRegToRegMoveAll; 32 33 import org.graalvm.compiler.asm.Label; 34 import org.graalvm.compiler.core.common.NumUtil; 35 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 36 37 import jdk.vm.ci.amd64.AMD64; 38 import jdk.vm.ci.amd64.AMD64Kind; 39 import jdk.vm.ci.code.Register; 40 import jdk.vm.ci.code.TargetDescription; 41 42 /** 43 * This class implements commonly used X86 code patterns. 44 */ 45 public class AMD64MacroAssembler extends AMD64Assembler { 46 47 public AMD64MacroAssembler(TargetDescription target) { 48 super(target); 49 } 50 51 public final void decrementq(Register reg, int value) { 52 if (value == Integer.MIN_VALUE) { 53 subq(reg, value); 54 return; 55 } 56 if (value < 0) { 57 incrementq(reg, -value); 58 return; 59 } 60 if (value == 0) { 61 return; 62 } 63 if (value == 1 && UseIncDec) { 64 decq(reg); 65 } else { 66 subq(reg, value); 67 } 68 } 69 70 public final void decrementq(AMD64Address dst, int value) { 71 if (value == Integer.MIN_VALUE) { 72 subq(dst, value); 73 return; 74 } 75 if (value < 0) { 76 incrementq(dst, -value); 77 return; 78 } 79 if (value == 0) { 80 return; 81 } 82 if (value == 1 && UseIncDec) { 83 decq(dst); 84 } else { 85 subq(dst, value); 86 } 87 } 88 89 public void incrementq(Register reg, int value) { 90 if (value == Integer.MIN_VALUE) { 91 addq(reg, value); 92 return; 93 } 94 if (value < 0) { 95 decrementq(reg, -value); 96 return; 97 } 98 if (value == 0) { 99 return; 100 } 101 if (value == 1 && UseIncDec) { 102 incq(reg); 103 } else { 104 addq(reg, value); 105 } 106 } 107 108 public final void incrementq(AMD64Address dst, int value) { 109 if (value == Integer.MIN_VALUE) { 110 addq(dst, value); 111 return; 112 } 113 if (value < 0) { 114 decrementq(dst, -value); 115 return; 116 } 117 if (value == 0) { 118 return; 119 } 120 if (value == 1 && UseIncDec) { 121 incq(dst); 122 } else { 123 addq(dst, value); 124 } 125 } 126 127 public final void movptr(Register dst, AMD64Address src) { 128 movq(dst, src); 129 } 130 131 public final void movptr(AMD64Address dst, Register src) { 132 movq(dst, src); 133 } 134 135 public final void movptr(AMD64Address dst, int src) { 136 movslq(dst, src); 137 } 138 139 public final void cmpptr(Register src1, Register src2) { 140 cmpq(src1, src2); 141 } 142 143 public final void cmpptr(Register src1, AMD64Address src2) { 144 cmpq(src1, src2); 145 } 146 147 public final void decrementl(Register reg) { 148 decrementl(reg, 1); 149 } 150 151 public final void decrementl(Register reg, int value) { 152 if (value == Integer.MIN_VALUE) { 153 subl(reg, value); 154 return; 155 } 156 if (value < 0) { 157 incrementl(reg, -value); 158 return; 159 } 160 if (value == 0) { 161 return; 162 } 163 if (value == 1 && UseIncDec) { 164 decl(reg); 165 } else { 166 subl(reg, value); 167 } 168 } 169 170 public final void decrementl(AMD64Address dst, int value) { 171 if (value == Integer.MIN_VALUE) { 172 subl(dst, value); 173 return; 174 } 175 if (value < 0) { 176 incrementl(dst, -value); 177 return; 178 } 179 if (value == 0) { 180 return; 181 } 182 if (value == 1 && UseIncDec) { 183 decl(dst); 184 } else { 185 subl(dst, value); 186 } 187 } 188 189 public final void incrementl(Register reg, int value) { 190 if (value == Integer.MIN_VALUE) { 191 addl(reg, value); 192 return; 193 } 194 if (value < 0) { 195 decrementl(reg, -value); 196 return; 197 } 198 if (value == 0) { 199 return; 200 } 201 if (value == 1 && UseIncDec) { 202 incl(reg); 203 } else { 204 addl(reg, value); 205 } 206 } 207 208 public final void incrementl(AMD64Address dst, int value) { 209 if (value == Integer.MIN_VALUE) { 210 addl(dst, value); 211 return; 212 } 213 if (value < 0) { 214 decrementl(dst, -value); 215 return; 216 } 217 if (value == 0) { 218 return; 219 } 220 if (value == 1 && UseIncDec) { 221 incl(dst); 222 } else { 223 addl(dst, value); 224 } 225 } 226 227 public void movflt(Register dst, Register src) { 228 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 229 if (UseXmmRegToRegMoveAll) { 230 movaps(dst, src); 231 } else { 232 movss(dst, src); 233 } 234 } 235 236 public void movflt(Register dst, AMD64Address src) { 237 assert dst.getRegisterCategory().equals(AMD64.XMM); 238 movss(dst, src); 239 } 240 241 public void movflt(AMD64Address dst, Register src) { 242 assert src.getRegisterCategory().equals(AMD64.XMM); 243 movss(dst, src); 244 } 245 246 public void movdbl(Register dst, Register src) { 247 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 248 if (UseXmmRegToRegMoveAll) { 249 movapd(dst, src); 250 } else { 251 movsd(dst, src); 252 } 253 } 254 255 public void movdbl(Register dst, AMD64Address src) { 256 assert dst.getRegisterCategory().equals(AMD64.XMM); 257 if (UseXmmLoadAndClearUpper) { 258 movsd(dst, src); 259 } else { 260 movlpd(dst, src); 261 } 262 } 263 264 public void movdbl(AMD64Address dst, Register src) { 265 assert src.getRegisterCategory().equals(AMD64.XMM); 266 movsd(dst, src); 267 } 268 269 /** 270 * Non-atomic write of a 64-bit constant to memory. Do not use if the address might be a 271 * volatile field! 272 */ 273 public final void movlong(AMD64Address dst, long src) { 274 if (NumUtil.isInt(src)) { 275 AMD64MIOp.MOV.emit(this, OperandSize.QWORD, dst, (int) src); 276 } else { 277 AMD64Address high = new AMD64Address(dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4); 278 movl(dst, (int) (src & 0xFFFFFFFF)); 279 movl(high, (int) (src >> 32)); 280 } 281 282 } 283 284 public final void flog(Register dest, Register value, boolean base10) { 285 if (base10) { 286 fldlg2(); 287 } else { 288 fldln2(); 289 } 290 AMD64Address tmp = trigPrologue(value); 291 fyl2x(); 292 trigEpilogue(dest, tmp); 293 } 294 295 public final void fsin(Register dest, Register value) { 296 AMD64Address tmp = trigPrologue(value); 297 fsin(); 298 trigEpilogue(dest, tmp); 299 } 300 301 public final void fcos(Register dest, Register value) { 302 AMD64Address tmp = trigPrologue(value); 303 fcos(); 304 trigEpilogue(dest, tmp); 305 } 306 307 public final void ftan(Register dest, Register value) { 308 AMD64Address tmp = trigPrologue(value); 309 fptan(); 310 fstp(0); // ftan pushes 1.0 in addition to the actual result, pop 311 trigEpilogue(dest, tmp); 312 } 313 314 public final void fpop() { 315 ffree(0); 316 fincstp(); 317 } 318 319 private AMD64Address trigPrologue(Register value) { 320 assert value.getRegisterCategory().equals(AMD64.XMM); 321 AMD64Address tmp = new AMD64Address(AMD64.rsp); 322 subq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); 323 movdbl(tmp, value); 324 fldd(tmp); 325 return tmp; 326 } 327 328 private void trigEpilogue(Register dest, AMD64Address tmp) { 329 assert dest.getRegisterCategory().equals(AMD64.XMM); 330 fstpd(tmp); 331 movdbl(dest, tmp); 332 addq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); 333 } 334 335 // IndexOf for constant substrings with size >= 8 chars 336 // which don't need to be loaded through stack. 337 public void stringIndexofC8(Register str1, Register str2, 338 Register cnt1, Register cnt2, 339 int intCnt2, Register result, 340 Register vec, Register tmp) { 341 // assert(UseSSE42Intrinsics, "SSE4.2 is required"); 342 343 // This method uses pcmpestri inxtruction with bound registers 344 // inputs: 345 // xmm - substring 346 // rax - substring length (elements count) 347 // mem - scanned string 348 // rdx - string length (elements count) 349 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 350 // outputs: 351 // rcx - matched index in string 352 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 353 354 Label reloadSubstr = new Label(); 355 Label scanToSubstr = new Label(); 356 Label scanSubstr = new Label(); 357 Label retFound = new Label(); 358 Label retNotFound = new Label(); 359 Label exit = new Label(); 360 Label foundSubstr = new Label(); 361 Label matchSubstrHead = new Label(); 362 Label reloadStr = new Label(); 363 Label foundCandidate = new Label(); 364 365 // Note, inline_string_indexOf() generates checks: 366 // if (substr.count > string.count) return -1; 367 // if (substr.count == 0) return 0; 368 assert intCnt2 >= 8 : "this code isused only for cnt2 >= 8 chars"; 369 370 // Load substring. 371 movdqu(vec, new AMD64Address(str2, 0)); 372 movl(cnt2, intCnt2); 373 movq(result, str1); // string addr 374 375 if (intCnt2 > 8) { 376 jmpb(scanToSubstr); 377 378 // Reload substr for rescan, this code 379 // is executed only for large substrings (> 8 chars) 380 bind(reloadSubstr); 381 movdqu(vec, new AMD64Address(str2, 0)); 382 negq(cnt2); // Jumped here with negative cnt2, convert to positive 383 384 bind(reloadStr); 385 // We came here after the beginning of the substring was 386 // matched but the rest of it was not so we need to search 387 // again. Start from the next element after the previous match. 388 389 // cnt2 is number of substring reminding elements and 390 // cnt1 is number of string reminding elements when cmp failed. 391 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 392 subl(cnt1, cnt2); 393 addl(cnt1, intCnt2); 394 movl(cnt2, intCnt2); // Now restore cnt2 395 396 decrementl(cnt1, 1); // Shift to next element 397 cmpl(cnt1, cnt2); 398 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 399 400 addq(result, 2); 401 402 } // (int_cnt2 > 8) 403 404 // Scan string for start of substr in 16-byte vectors 405 bind(scanToSubstr); 406 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 407 jccb(ConditionFlag.Below, foundCandidate); // CF == 1 408 subl(cnt1, 8); 409 jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string 410 cmpl(cnt1, cnt2); 411 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 412 addq(result, 16); 413 jmpb(scanToSubstr); 414 415 // Found a potential substr 416 bind(foundCandidate); 417 // Matched whole vector if first element matched (tmp(rcx) == 0). 418 if (intCnt2 == 8) { 419 jccb(ConditionFlag.Overflow, retFound); // OF == 1 420 } else { // int_cnt2 > 8 421 jccb(ConditionFlag.Overflow, foundSubstr); 422 } 423 // After pcmpestri tmp(rcx) contains matched element index 424 // Compute start addr of substr 425 leaq(result, new AMD64Address(result, tmp, Scale.Times2, 0)); 426 427 // Make sure string is still long enough 428 subl(cnt1, tmp); 429 cmpl(cnt1, cnt2); 430 if (intCnt2 == 8) { 431 jccb(ConditionFlag.GreaterEqual, scanToSubstr); 432 } else { // int_cnt2 > 8 433 jccb(ConditionFlag.GreaterEqual, matchSubstrHead); 434 } 435 // Left less then substring. 436 437 bind(retNotFound); 438 movl(result, -1); 439 jmpb(exit); 440 441 if (intCnt2 > 8) { 442 // This code is optimized for the case when whole substring 443 // is matched if its head is matched. 444 bind(matchSubstrHead); 445 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 446 // Reload only string if does not match 447 jccb(ConditionFlag.NoOverflow, reloadStr); // OF == 0 448 449 Label contScanSubstr = new Label(); 450 // Compare the rest of substring (> 8 chars). 451 bind(foundSubstr); 452 // First 8 chars are already matched. 453 negq(cnt2); 454 addq(cnt2, 8); 455 456 bind(scanSubstr); 457 subl(cnt1, 8); 458 cmpl(cnt2, -8); // Do not read beyond substring 459 jccb(ConditionFlag.LessEqual, contScanSubstr); 460 // Back-up strings to avoid reading beyond substring: 461 // cnt1 = cnt1 - cnt2 + 8 462 addl(cnt1, cnt2); // cnt2 is negative 463 addl(cnt1, 8); 464 movl(cnt2, 8); 465 negq(cnt2); 466 bind(contScanSubstr); 467 if (intCnt2 < 1024 * 1024 * 1024) { 468 movdqu(vec, new AMD64Address(str2, cnt2, Scale.Times2, intCnt2 * 2)); 469 pcmpestri(vec, new AMD64Address(result, cnt2, Scale.Times2, intCnt2 * 2), 0x0d); 470 } else { 471 // calculate index in register to avoid integer overflow (int_cnt2*2) 472 movl(tmp, intCnt2); 473 addq(tmp, cnt2); 474 movdqu(vec, new AMD64Address(str2, tmp, Scale.Times2, 0)); 475 pcmpestri(vec, new AMD64Address(result, tmp, Scale.Times2, 0), 0x0d); 476 } 477 // Need to reload strings pointers if not matched whole vector 478 jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 479 addq(cnt2, 8); 480 jcc(ConditionFlag.Negative, scanSubstr); 481 // Fall through if found full substring 482 483 } // (int_cnt2 > 8) 484 485 bind(retFound); 486 // Found result if we matched full small substring. 487 // Compute substr offset 488 subq(result, str1); 489 shrl(result, 1); // index 490 bind(exit); 491 492 } // string_indexofC8 493 494 // Small strings are loaded through stack if they cross page boundary. 495 public void stringIndexOf(Register str1, Register str2, 496 Register cnt1, Register cnt2, 497 int intCnt2, Register result, 498 Register vec, Register tmp, int vmPageSize) { 499 // 500 // int_cnt2 is length of small (< 8 chars) constant substring 501 // or (-1) for non constant substring in which case its length 502 // is in cnt2 register. 503 // 504 // Note, inline_string_indexOf() generates checks: 505 // if (substr.count > string.count) return -1; 506 // if (substr.count == 0) return 0; 507 // 508 assert intCnt2 == -1 || (0 < intCnt2 && intCnt2 < 8) : "should be != 0"; 509 510 // This method uses pcmpestri instruction with bound registers 511 // inputs: 512 // xmm - substring 513 // rax - substring length (elements count) 514 // mem - scanned string 515 // rdx - string length (elements count) 516 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 517 // outputs: 518 // rcx - matched index in string 519 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 520 521 Label reloadSubstr = new Label(); 522 Label scanToSubstr = new Label(); 523 Label scanSubstr = new Label(); 524 Label adjustStr = new Label(); 525 Label retFound = new Label(); 526 Label retNotFound = new Label(); 527 Label cleanup = new Label(); 528 Label foundSubstr = new Label(); 529 Label foundCandidate = new Label(); 530 531 int wordSize = 8; 532 // We don't know where these strings are located 533 // and we can't read beyond them. Load them through stack. 534 Label bigStrings = new Label(); 535 Label checkStr = new Label(); 536 Label copySubstr = new Label(); 537 Label copyStr = new Label(); 538 539 movq(tmp, rsp); // save old SP 540 541 if (intCnt2 > 0) { // small (< 8 chars) constant substring 542 if (intCnt2 == 1) { // One char 543 movzwl(result, new AMD64Address(str2, 0)); 544 movdl(vec, result); // move 32 bits 545 } else if (intCnt2 == 2) { // Two chars 546 movdl(vec, new AMD64Address(str2, 0)); // move 32 bits 547 } else if (intCnt2 == 4) { // Four chars 548 movq(vec, new AMD64Address(str2, 0)); // move 64 bits 549 } else { // cnt2 = { 3, 5, 6, 7 } 550 // Array header size is 12 bytes in 32-bit VM 551 // + 6 bytes for 3 chars == 18 bytes, 552 // enough space to load vec and shift. 553 movdqu(vec, new AMD64Address(str2, (intCnt2 * 2) - 16)); 554 psrldq(vec, 16 - (intCnt2 * 2)); 555 } 556 } else { // not constant substring 557 cmpl(cnt2, 8); 558 jccb(ConditionFlag.AboveEqual, bigStrings); // Both strings are big enough 559 560 // We can read beyond string if str+16 does not cross page boundary 561 // since heaps are aligned and mapped by pages. 562 assert vmPageSize < 1024 * 1024 * 1024 : "default page should be small"; 563 movl(result, str2); // We need only low 32 bits 564 andl(result, (vmPageSize - 1)); 565 cmpl(result, (vmPageSize - 16)); 566 jccb(ConditionFlag.BelowEqual, checkStr); 567 568 // Move small strings to stack to allow load 16 bytes into vec. 569 subq(rsp, 16); 570 int stackOffset = wordSize - 2; 571 push(cnt2); 572 573 bind(copySubstr); 574 movzwl(result, new AMD64Address(str2, cnt2, Scale.Times2, -2)); 575 movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); 576 decrementl(cnt2, 1); 577 jccb(ConditionFlag.NotZero, copySubstr); 578 579 pop(cnt2); 580 movq(str2, rsp); // New substring address 581 } // non constant 582 583 bind(checkStr); 584 cmpl(cnt1, 8); 585 jccb(ConditionFlag.AboveEqual, bigStrings); 586 587 // Check cross page boundary. 588 movl(result, str1); // We need only low 32 bits 589 andl(result, (vmPageSize - 1)); 590 cmpl(result, (vmPageSize - 16)); 591 jccb(ConditionFlag.BelowEqual, bigStrings); 592 593 subq(rsp, 16); 594 int stackOffset = -2; 595 if (intCnt2 < 0) { // not constant 596 push(cnt2); 597 stackOffset += wordSize; 598 } 599 movl(cnt2, cnt1); 600 601 bind(copyStr); 602 movzwl(result, new AMD64Address(str1, cnt2, Scale.Times2, -2)); 603 movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); 604 decrementl(cnt2, 1); 605 jccb(ConditionFlag.NotZero, copyStr); 606 607 if (intCnt2 < 0) { // not constant 608 pop(cnt2); 609 } 610 movq(str1, rsp); // New string address 611 612 bind(bigStrings); 613 // Load substring. 614 if (intCnt2 < 0) { // -1 615 movdqu(vec, new AMD64Address(str2, 0)); 616 push(cnt2); // substr count 617 push(str2); // substr addr 618 push(str1); // string addr 619 } else { 620 // Small (< 8 chars) constant substrings are loaded already. 621 movl(cnt2, intCnt2); 622 } 623 push(tmp); // original SP 624 // Finished loading 625 626 // ======================================================== 627 // Start search 628 // 629 630 movq(result, str1); // string addr 631 632 if (intCnt2 < 0) { // Only for non constant substring 633 jmpb(scanToSubstr); 634 635 // SP saved at sp+0 636 // String saved at sp+1*wordSize 637 // Substr saved at sp+2*wordSize 638 // Substr count saved at sp+3*wordSize 639 640 // Reload substr for rescan, this code 641 // is executed only for large substrings (> 8 chars) 642 bind(reloadSubstr); 643 movq(str2, new AMD64Address(rsp, 2 * wordSize)); 644 movl(cnt2, new AMD64Address(rsp, 3 * wordSize)); 645 movdqu(vec, new AMD64Address(str2, 0)); 646 // We came here after the beginning of the substring was 647 // matched but the rest of it was not so we need to search 648 // again. Start from the next element after the previous match. 649 subq(str1, result); // Restore counter 650 shrl(str1, 1); 651 addl(cnt1, str1); 652 decrementl(cnt1); // Shift to next element 653 cmpl(cnt1, cnt2); 654 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 655 656 addq(result, 2); 657 } // non constant 658 659 // Scan string for start of substr in 16-byte vectors 660 bind(scanToSubstr); 661 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 662 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 663 jccb(ConditionFlag.Below, foundCandidate); // CF == 1 664 subl(cnt1, 8); 665 jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string 666 cmpl(cnt1, cnt2); 667 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 668 addq(result, 16); 669 670 bind(adjustStr); 671 cmpl(cnt1, 8); // Do not read beyond string 672 jccb(ConditionFlag.GreaterEqual, scanToSubstr); 673 // Back-up string to avoid reading beyond string. 674 leaq(result, new AMD64Address(result, cnt1, Scale.Times2, -16)); 675 movl(cnt1, 8); 676 jmpb(scanToSubstr); 677 678 // Found a potential substr 679 bind(foundCandidate); 680 // After pcmpestri tmp(rcx) contains matched element index 681 682 // Make sure string is still long enough 683 subl(cnt1, tmp); 684 cmpl(cnt1, cnt2); 685 jccb(ConditionFlag.GreaterEqual, foundSubstr); 686 // Left less then substring. 687 688 bind(retNotFound); 689 movl(result, -1); 690 jmpb(cleanup); 691 692 bind(foundSubstr); 693 // Compute start addr of substr 694 leaq(result, new AMD64Address(result, tmp, Scale.Times2)); 695 696 if (intCnt2 > 0) { // Constant substring 697 // Repeat search for small substring (< 8 chars) 698 // from new point without reloading substring. 699 // Have to check that we don't read beyond string. 700 cmpl(tmp, 8 - intCnt2); 701 jccb(ConditionFlag.Greater, adjustStr); 702 // Fall through if matched whole substring. 703 } else { // non constant 704 assert intCnt2 == -1 : "should be != 0"; 705 706 addl(tmp, cnt2); 707 // Found result if we matched whole substring. 708 cmpl(tmp, 8); 709 jccb(ConditionFlag.LessEqual, retFound); 710 711 // Repeat search for small substring (<= 8 chars) 712 // from new point 'str1' without reloading substring. 713 cmpl(cnt2, 8); 714 // Have to check that we don't read beyond string. 715 jccb(ConditionFlag.LessEqual, adjustStr); 716 717 Label checkNext = new Label(); 718 Label contScanSubstr = new Label(); 719 Label retFoundLong = new Label(); 720 // Compare the rest of substring (> 8 chars). 721 movq(str1, result); 722 723 cmpl(tmp, cnt2); 724 // First 8 chars are already matched. 725 jccb(ConditionFlag.Equal, checkNext); 726 727 bind(scanSubstr); 728 pcmpestri(vec, new AMD64Address(str1, 0), 0x0d); 729 // Need to reload strings pointers if not matched whole vector 730 jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 731 732 bind(checkNext); 733 subl(cnt2, 8); 734 jccb(ConditionFlag.LessEqual, retFoundLong); // Found full substring 735 addq(str1, 16); 736 addq(str2, 16); 737 subl(cnt1, 8); 738 cmpl(cnt2, 8); // Do not read beyond substring 739 jccb(ConditionFlag.GreaterEqual, contScanSubstr); 740 // Back-up strings to avoid reading beyond substring. 741 leaq(str2, new AMD64Address(str2, cnt2, Scale.Times2, -16)); 742 leaq(str1, new AMD64Address(str1, cnt2, Scale.Times2, -16)); 743 subl(cnt1, cnt2); 744 movl(cnt2, 8); 745 addl(cnt1, 8); 746 bind(contScanSubstr); 747 movdqu(vec, new AMD64Address(str2, 0)); 748 jmpb(scanSubstr); 749 750 bind(retFoundLong); 751 movq(str1, new AMD64Address(rsp, wordSize)); 752 } // non constant 753 754 bind(retFound); 755 // Compute substr offset 756 subq(result, str1); 757 shrl(result, 1); // index 758 759 bind(cleanup); 760 pop(rsp); // restore SP 761 762 } 763 764 }