1 /* 2 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.graalvm.compiler.asm.amd64; 24 25 import static jdk.vm.ci.amd64.AMD64.rax; 26 import static jdk.vm.ci.amd64.AMD64.rcx; 27 import static jdk.vm.ci.amd64.AMD64.rdx; 28 import static jdk.vm.ci.amd64.AMD64.rsp; 29 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIncDec; 30 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmLoadAndClearUpper; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmRegToRegMoveAll; 32 33 import org.graalvm.compiler.asm.Label; 34 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 35 import org.graalvm.compiler.core.common.NumUtil; 36 37 import jdk.vm.ci.amd64.AMD64; 38 import jdk.vm.ci.amd64.AMD64Kind; 39 import jdk.vm.ci.code.Register; 40 import jdk.vm.ci.code.TargetDescription; 41 42 /** 43 * This class implements commonly used X86 code patterns. 44 */ 45 public class AMD64MacroAssembler extends AMD64Assembler { 46 47 public AMD64MacroAssembler(TargetDescription target) { 48 super(target); 49 } 50 51 public final void decrementq(Register reg, int value) { 52 if (value == Integer.MIN_VALUE) { 53 subq(reg, value); 54 return; 55 } 56 if (value < 0) { 57 incrementq(reg, -value); 58 return; 59 } 60 if (value == 0) { 61 return; 62 } 63 if (value == 1 && UseIncDec) { 64 decq(reg); 65 } else { 66 subq(reg, value); 67 } 68 } 69 70 public final void decrementq(AMD64Address dst, int value) { 71 if (value == Integer.MIN_VALUE) { 72 subq(dst, value); 73 return; 74 } 75 if (value < 0) { 76 incrementq(dst, -value); 77 return; 78 } 79 if (value == 0) { 80 return; 81 } 82 if (value == 1 && UseIncDec) { 83 decq(dst); 84 } else { 85 subq(dst, value); 86 } 87 } 88 89 public void incrementq(Register reg, int value) { 90 if (value == Integer.MIN_VALUE) { 91 addq(reg, value); 92 return; 93 } 94 if (value < 0) { 95 decrementq(reg, -value); 96 return; 97 } 98 if (value == 0) { 99 return; 100 } 101 if (value == 1 && UseIncDec) { 102 incq(reg); 103 } else { 104 addq(reg, value); 105 } 106 } 107 108 public final void incrementq(AMD64Address dst, int value) { 109 if (value == Integer.MIN_VALUE) { 110 addq(dst, value); 111 return; 112 } 113 if (value < 0) { 114 decrementq(dst, -value); 115 return; 116 } 117 if (value == 0) { 118 return; 119 } 120 if (value == 1 && UseIncDec) { 121 incq(dst); 122 } else { 123 addq(dst, value); 124 } 125 } 126 127 public final void movptr(Register dst, AMD64Address src) { 128 movq(dst, src); 129 } 130 131 public final void movptr(AMD64Address dst, Register src) { 132 movq(dst, src); 133 } 134 135 public final void movptr(AMD64Address dst, int src) { 136 movslq(dst, src); 137 } 138 139 public final void cmpptr(Register src1, Register src2) { 140 cmpq(src1, src2); 141 } 142 143 public final void cmpptr(Register src1, AMD64Address src2) { 144 cmpq(src1, src2); 145 } 146 147 public final void decrementl(Register reg) { 148 decrementl(reg, 1); 149 } 150 151 public final void decrementl(Register reg, int value) { 152 if (value == Integer.MIN_VALUE) { 153 subl(reg, value); 154 return; 155 } 156 if (value < 0) { 157 incrementl(reg, -value); 158 return; 159 } 160 if (value == 0) { 161 return; 162 } 163 if (value == 1 && UseIncDec) { 164 decl(reg); 165 } else { 166 subl(reg, value); 167 } 168 } 169 170 public final void decrementl(AMD64Address dst, int value) { 171 if (value == Integer.MIN_VALUE) { 172 subl(dst, value); 173 return; 174 } 175 if (value < 0) { 176 incrementl(dst, -value); 177 return; 178 } 179 if (value == 0) { 180 return; 181 } 182 if (value == 1 && UseIncDec) { 183 decl(dst); 184 } else { 185 subl(dst, value); 186 } 187 } 188 189 public final void incrementl(Register reg, int value) { 190 if (value == Integer.MIN_VALUE) { 191 addl(reg, value); 192 return; 193 } 194 if (value < 0) { 195 decrementl(reg, -value); 196 return; 197 } 198 if (value == 0) { 199 return; 200 } 201 if (value == 1 && UseIncDec) { 202 incl(reg); 203 } else { 204 addl(reg, value); 205 } 206 } 207 208 public final void incrementl(AMD64Address dst, int value) { 209 if (value == Integer.MIN_VALUE) { 210 addl(dst, value); 211 return; 212 } 213 if (value < 0) { 214 decrementl(dst, -value); 215 return; 216 } 217 if (value == 0) { 218 return; 219 } 220 if (value == 1 && UseIncDec) { 221 incl(dst); 222 } else { 223 addl(dst, value); 224 } 225 } 226 227 public void movflt(Register dst, Register src) { 228 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 229 if (UseXmmRegToRegMoveAll) { 230 movaps(dst, src); 231 } else { 232 movss(dst, src); 233 } 234 } 235 236 public void movflt(Register dst, AMD64Address src) { 237 assert dst.getRegisterCategory().equals(AMD64.XMM); 238 movss(dst, src); 239 } 240 241 public void movflt(AMD64Address dst, Register src) { 242 assert src.getRegisterCategory().equals(AMD64.XMM); 243 movss(dst, src); 244 } 245 246 public void movdbl(Register dst, Register src) { 247 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 248 if (UseXmmRegToRegMoveAll) { 249 movapd(dst, src); 250 } else { 251 movsd(dst, src); 252 } 253 } 254 255 public void movdbl(Register dst, AMD64Address src) { 256 assert dst.getRegisterCategory().equals(AMD64.XMM); 257 if (UseXmmLoadAndClearUpper) { 258 movsd(dst, src); 259 } else { 260 movlpd(dst, src); 261 } 262 } 263 264 public void movdbl(AMD64Address dst, Register src) { 265 assert src.getRegisterCategory().equals(AMD64.XMM); 266 movsd(dst, src); 267 } 268 269 /** 270 * Non-atomic write of a 64-bit constant to memory. Do not use if the address might be a 271 * volatile field! 272 */ 273 public final void movlong(AMD64Address dst, long src) { 274 if (NumUtil.isInt(src)) { 275 AMD64MIOp.MOV.emit(this, OperandSize.QWORD, dst, (int) src); 276 } else { 277 AMD64Address high = new AMD64Address(dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4); 278 movl(dst, (int) (src & 0xFFFFFFFF)); 279 movl(high, (int) (src >> 32)); 280 } 281 282 } 283 284 public final void setl(ConditionFlag cc, Register dst) { 285 setb(cc, dst); 286 movzbl(dst, dst); 287 } 288 289 public final void setq(ConditionFlag cc, Register dst) { 290 setb(cc, dst); 291 movzbq(dst, dst); 292 } 293 294 public final void flog(Register dest, Register value, boolean base10) { 295 if (base10) { 296 fldlg2(); 297 } else { 298 fldln2(); 299 } 300 AMD64Address tmp = trigPrologue(value); 301 fyl2x(); 302 trigEpilogue(dest, tmp); 303 } 304 305 public final void fsin(Register dest, Register value) { 306 AMD64Address tmp = trigPrologue(value); 307 fsin(); 308 trigEpilogue(dest, tmp); 309 } 310 311 public final void fcos(Register dest, Register value) { 312 AMD64Address tmp = trigPrologue(value); 313 fcos(); 314 trigEpilogue(dest, tmp); 315 } 316 317 public final void ftan(Register dest, Register value) { 318 AMD64Address tmp = trigPrologue(value); 319 fptan(); 320 fstp(0); // ftan pushes 1.0 in addition to the actual result, pop 321 trigEpilogue(dest, tmp); 322 } 323 324 public final void fpop() { 325 ffree(0); 326 fincstp(); 327 } 328 329 private AMD64Address trigPrologue(Register value) { 330 assert value.getRegisterCategory().equals(AMD64.XMM); 331 AMD64Address tmp = new AMD64Address(AMD64.rsp); 332 subq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); 333 movdbl(tmp, value); 334 fldd(tmp); 335 return tmp; 336 } 337 338 private void trigEpilogue(Register dest, AMD64Address tmp) { 339 assert dest.getRegisterCategory().equals(AMD64.XMM); 340 fstpd(tmp); 341 movdbl(dest, tmp); 342 addq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); 343 } 344 345 // IndexOf for constant substrings with size >= 8 chars 346 // which don't need to be loaded through stack. 347 public void stringIndexofC8(Register str1, Register str2, 348 Register cnt1, Register cnt2, 349 int intCnt2, Register result, 350 Register vec, Register tmp) { 351 // assert(UseSSE42Intrinsics, "SSE4.2 is required"); 352 353 // This method uses pcmpestri inxtruction with bound registers 354 // inputs: 355 // xmm - substring 356 // rax - substring length (elements count) 357 // mem - scanned string 358 // rdx - string length (elements count) 359 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 360 // outputs: 361 // rcx - matched index in string 362 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 363 364 Label reloadSubstr = new Label(); 365 Label scanToSubstr = new Label(); 366 Label scanSubstr = new Label(); 367 Label retFound = new Label(); 368 Label retNotFound = new Label(); 369 Label exit = new Label(); 370 Label foundSubstr = new Label(); 371 Label matchSubstrHead = new Label(); 372 Label reloadStr = new Label(); 373 Label foundCandidate = new Label(); 374 375 // Note, inline_string_indexOf() generates checks: 376 // if (substr.count > string.count) return -1; 377 // if (substr.count == 0) return 0; 378 assert intCnt2 >= 8 : "this code isused only for cnt2 >= 8 chars"; 379 380 // Load substring. 381 movdqu(vec, new AMD64Address(str2, 0)); 382 movl(cnt2, intCnt2); 383 movq(result, str1); // string addr 384 385 if (intCnt2 > 8) { 386 jmpb(scanToSubstr); 387 388 // Reload substr for rescan, this code 389 // is executed only for large substrings (> 8 chars) 390 bind(reloadSubstr); 391 movdqu(vec, new AMD64Address(str2, 0)); 392 negq(cnt2); // Jumped here with negative cnt2, convert to positive 393 394 bind(reloadStr); 395 // We came here after the beginning of the substring was 396 // matched but the rest of it was not so we need to search 397 // again. Start from the next element after the previous match. 398 399 // cnt2 is number of substring reminding elements and 400 // cnt1 is number of string reminding elements when cmp failed. 401 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 402 subl(cnt1, cnt2); 403 addl(cnt1, intCnt2); 404 movl(cnt2, intCnt2); // Now restore cnt2 405 406 decrementl(cnt1, 1); // Shift to next element 407 cmpl(cnt1, cnt2); 408 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 409 410 addq(result, 2); 411 412 } // (int_cnt2 > 8) 413 414 // Scan string for start of substr in 16-byte vectors 415 bind(scanToSubstr); 416 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 417 jccb(ConditionFlag.Below, foundCandidate); // CF == 1 418 subl(cnt1, 8); 419 jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string 420 cmpl(cnt1, cnt2); 421 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 422 addq(result, 16); 423 jmpb(scanToSubstr); 424 425 // Found a potential substr 426 bind(foundCandidate); 427 // Matched whole vector if first element matched (tmp(rcx) == 0). 428 if (intCnt2 == 8) { 429 jccb(ConditionFlag.Overflow, retFound); // OF == 1 430 } else { // int_cnt2 > 8 431 jccb(ConditionFlag.Overflow, foundSubstr); 432 } 433 // After pcmpestri tmp(rcx) contains matched element index 434 // Compute start addr of substr 435 leaq(result, new AMD64Address(result, tmp, Scale.Times2, 0)); 436 437 // Make sure string is still long enough 438 subl(cnt1, tmp); 439 cmpl(cnt1, cnt2); 440 if (intCnt2 == 8) { 441 jccb(ConditionFlag.GreaterEqual, scanToSubstr); 442 } else { // int_cnt2 > 8 443 jccb(ConditionFlag.GreaterEqual, matchSubstrHead); 444 } 445 // Left less then substring. 446 447 bind(retNotFound); 448 movl(result, -1); 449 jmpb(exit); 450 451 if (intCnt2 > 8) { 452 // This code is optimized for the case when whole substring 453 // is matched if its head is matched. 454 bind(matchSubstrHead); 455 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 456 // Reload only string if does not match 457 jccb(ConditionFlag.NoOverflow, reloadStr); // OF == 0 458 459 Label contScanSubstr = new Label(); 460 // Compare the rest of substring (> 8 chars). 461 bind(foundSubstr); 462 // First 8 chars are already matched. 463 negq(cnt2); 464 addq(cnt2, 8); 465 466 bind(scanSubstr); 467 subl(cnt1, 8); 468 cmpl(cnt2, -8); // Do not read beyond substring 469 jccb(ConditionFlag.LessEqual, contScanSubstr); 470 // Back-up strings to avoid reading beyond substring: 471 // cnt1 = cnt1 - cnt2 + 8 472 addl(cnt1, cnt2); // cnt2 is negative 473 addl(cnt1, 8); 474 movl(cnt2, 8); 475 negq(cnt2); 476 bind(contScanSubstr); 477 if (intCnt2 < 1024 * 1024 * 1024) { 478 movdqu(vec, new AMD64Address(str2, cnt2, Scale.Times2, intCnt2 * 2)); 479 pcmpestri(vec, new AMD64Address(result, cnt2, Scale.Times2, intCnt2 * 2), 0x0d); 480 } else { 481 // calculate index in register to avoid integer overflow (int_cnt2*2) 482 movl(tmp, intCnt2); 483 addq(tmp, cnt2); 484 movdqu(vec, new AMD64Address(str2, tmp, Scale.Times2, 0)); 485 pcmpestri(vec, new AMD64Address(result, tmp, Scale.Times2, 0), 0x0d); 486 } 487 // Need to reload strings pointers if not matched whole vector 488 jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 489 addq(cnt2, 8); 490 jcc(ConditionFlag.Negative, scanSubstr); 491 // Fall through if found full substring 492 493 } // (int_cnt2 > 8) 494 495 bind(retFound); 496 // Found result if we matched full small substring. 497 // Compute substr offset 498 subq(result, str1); 499 shrl(result, 1); // index 500 bind(exit); 501 502 } // string_indexofC8 503 504 // Small strings are loaded through stack if they cross page boundary. 505 public void stringIndexOf(Register str1, Register str2, 506 Register cnt1, Register cnt2, 507 int intCnt2, Register result, 508 Register vec, Register tmp, int vmPageSize) { 509 // 510 // int_cnt2 is length of small (< 8 chars) constant substring 511 // or (-1) for non constant substring in which case its length 512 // is in cnt2 register. 513 // 514 // Note, inline_string_indexOf() generates checks: 515 // if (substr.count > string.count) return -1; 516 // if (substr.count == 0) return 0; 517 // 518 assert intCnt2 == -1 || (0 < intCnt2 && intCnt2 < 8) : "should be != 0"; 519 520 // This method uses pcmpestri instruction with bound registers 521 // inputs: 522 // xmm - substring 523 // rax - substring length (elements count) 524 // mem - scanned string 525 // rdx - string length (elements count) 526 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 527 // outputs: 528 // rcx - matched index in string 529 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 530 531 Label reloadSubstr = new Label(); 532 Label scanToSubstr = new Label(); 533 Label scanSubstr = new Label(); 534 Label adjustStr = new Label(); 535 Label retFound = new Label(); 536 Label retNotFound = new Label(); 537 Label cleanup = new Label(); 538 Label foundSubstr = new Label(); 539 Label foundCandidate = new Label(); 540 541 int wordSize = 8; 542 // We don't know where these strings are located 543 // and we can't read beyond them. Load them through stack. 544 Label bigStrings = new Label(); 545 Label checkStr = new Label(); 546 Label copySubstr = new Label(); 547 Label copyStr = new Label(); 548 549 movq(tmp, rsp); // save old SP 550 551 if (intCnt2 > 0) { // small (< 8 chars) constant substring 552 if (intCnt2 == 1) { // One char 553 movzwl(result, new AMD64Address(str2, 0)); 554 movdl(vec, result); // move 32 bits 555 } else if (intCnt2 == 2) { // Two chars 556 movdl(vec, new AMD64Address(str2, 0)); // move 32 bits 557 } else if (intCnt2 == 4) { // Four chars 558 movq(vec, new AMD64Address(str2, 0)); // move 64 bits 559 } else { // cnt2 = { 3, 5, 6, 7 } 560 // Array header size is 12 bytes in 32-bit VM 561 // + 6 bytes for 3 chars == 18 bytes, 562 // enough space to load vec and shift. 563 movdqu(vec, new AMD64Address(str2, (intCnt2 * 2) - 16)); 564 psrldq(vec, 16 - (intCnt2 * 2)); 565 } 566 } else { // not constant substring 567 cmpl(cnt2, 8); 568 jccb(ConditionFlag.AboveEqual, bigStrings); // Both strings are big enough 569 570 // We can read beyond string if str+16 does not cross page boundary 571 // since heaps are aligned and mapped by pages. 572 assert vmPageSize < 1024 * 1024 * 1024 : "default page should be small"; 573 movl(result, str2); // We need only low 32 bits 574 andl(result, (vmPageSize - 1)); 575 cmpl(result, (vmPageSize - 16)); 576 jccb(ConditionFlag.BelowEqual, checkStr); 577 578 // Move small strings to stack to allow load 16 bytes into vec. 579 subq(rsp, 16); 580 int stackOffset = wordSize - 2; 581 push(cnt2); 582 583 bind(copySubstr); 584 movzwl(result, new AMD64Address(str2, cnt2, Scale.Times2, -2)); 585 movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); 586 decrementl(cnt2, 1); 587 jccb(ConditionFlag.NotZero, copySubstr); 588 589 pop(cnt2); 590 movq(str2, rsp); // New substring address 591 } // non constant 592 593 bind(checkStr); 594 cmpl(cnt1, 8); 595 jccb(ConditionFlag.AboveEqual, bigStrings); 596 597 // Check cross page boundary. 598 movl(result, str1); // We need only low 32 bits 599 andl(result, (vmPageSize - 1)); 600 cmpl(result, (vmPageSize - 16)); 601 jccb(ConditionFlag.BelowEqual, bigStrings); 602 603 subq(rsp, 16); 604 int stackOffset = -2; 605 if (intCnt2 < 0) { // not constant 606 push(cnt2); 607 stackOffset += wordSize; 608 } 609 movl(cnt2, cnt1); 610 611 bind(copyStr); 612 movzwl(result, new AMD64Address(str1, cnt2, Scale.Times2, -2)); 613 movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); 614 decrementl(cnt2, 1); 615 jccb(ConditionFlag.NotZero, copyStr); 616 617 if (intCnt2 < 0) { // not constant 618 pop(cnt2); 619 } 620 movq(str1, rsp); // New string address 621 622 bind(bigStrings); 623 // Load substring. 624 if (intCnt2 < 0) { // -1 625 movdqu(vec, new AMD64Address(str2, 0)); 626 push(cnt2); // substr count 627 push(str2); // substr addr 628 push(str1); // string addr 629 } else { 630 // Small (< 8 chars) constant substrings are loaded already. 631 movl(cnt2, intCnt2); 632 } 633 push(tmp); // original SP 634 // Finished loading 635 636 // ======================================================== 637 // Start search 638 // 639 640 movq(result, str1); // string addr 641 642 if (intCnt2 < 0) { // Only for non constant substring 643 jmpb(scanToSubstr); 644 645 // SP saved at sp+0 646 // String saved at sp+1*wordSize 647 // Substr saved at sp+2*wordSize 648 // Substr count saved at sp+3*wordSize 649 650 // Reload substr for rescan, this code 651 // is executed only for large substrings (> 8 chars) 652 bind(reloadSubstr); 653 movq(str2, new AMD64Address(rsp, 2 * wordSize)); 654 movl(cnt2, new AMD64Address(rsp, 3 * wordSize)); 655 movdqu(vec, new AMD64Address(str2, 0)); 656 // We came here after the beginning of the substring was 657 // matched but the rest of it was not so we need to search 658 // again. Start from the next element after the previous match. 659 subq(str1, result); // Restore counter 660 shrl(str1, 1); 661 addl(cnt1, str1); 662 decrementl(cnt1); // Shift to next element 663 cmpl(cnt1, cnt2); 664 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 665 666 addq(result, 2); 667 } // non constant 668 669 // Scan string for start of substr in 16-byte vectors 670 bind(scanToSubstr); 671 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 672 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 673 jccb(ConditionFlag.Below, foundCandidate); // CF == 1 674 subl(cnt1, 8); 675 jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string 676 cmpl(cnt1, cnt2); 677 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 678 addq(result, 16); 679 680 bind(adjustStr); 681 cmpl(cnt1, 8); // Do not read beyond string 682 jccb(ConditionFlag.GreaterEqual, scanToSubstr); 683 // Back-up string to avoid reading beyond string. 684 leaq(result, new AMD64Address(result, cnt1, Scale.Times2, -16)); 685 movl(cnt1, 8); 686 jmpb(scanToSubstr); 687 688 // Found a potential substr 689 bind(foundCandidate); 690 // After pcmpestri tmp(rcx) contains matched element index 691 692 // Make sure string is still long enough 693 subl(cnt1, tmp); 694 cmpl(cnt1, cnt2); 695 jccb(ConditionFlag.GreaterEqual, foundSubstr); 696 // Left less then substring. 697 698 bind(retNotFound); 699 movl(result, -1); 700 jmpb(cleanup); 701 702 bind(foundSubstr); 703 // Compute start addr of substr 704 leaq(result, new AMD64Address(result, tmp, Scale.Times2)); 705 706 if (intCnt2 > 0) { // Constant substring 707 // Repeat search for small substring (< 8 chars) 708 // from new point without reloading substring. 709 // Have to check that we don't read beyond string. 710 cmpl(tmp, 8 - intCnt2); 711 jccb(ConditionFlag.Greater, adjustStr); 712 // Fall through if matched whole substring. 713 } else { // non constant 714 assert intCnt2 == -1 : "should be != 0"; 715 716 addl(tmp, cnt2); 717 // Found result if we matched whole substring. 718 cmpl(tmp, 8); 719 jccb(ConditionFlag.LessEqual, retFound); 720 721 // Repeat search for small substring (<= 8 chars) 722 // from new point 'str1' without reloading substring. 723 cmpl(cnt2, 8); 724 // Have to check that we don't read beyond string. 725 jccb(ConditionFlag.LessEqual, adjustStr); 726 727 Label checkNext = new Label(); 728 Label contScanSubstr = new Label(); 729 Label retFoundLong = new Label(); 730 // Compare the rest of substring (> 8 chars). 731 movq(str1, result); 732 733 cmpl(tmp, cnt2); 734 // First 8 chars are already matched. 735 jccb(ConditionFlag.Equal, checkNext); 736 737 bind(scanSubstr); 738 pcmpestri(vec, new AMD64Address(str1, 0), 0x0d); 739 // Need to reload strings pointers if not matched whole vector 740 jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 741 742 bind(checkNext); 743 subl(cnt2, 8); 744 jccb(ConditionFlag.LessEqual, retFoundLong); // Found full substring 745 addq(str1, 16); 746 addq(str2, 16); 747 subl(cnt1, 8); 748 cmpl(cnt2, 8); // Do not read beyond substring 749 jccb(ConditionFlag.GreaterEqual, contScanSubstr); 750 // Back-up strings to avoid reading beyond substring. 751 leaq(str2, new AMD64Address(str2, cnt2, Scale.Times2, -16)); 752 leaq(str1, new AMD64Address(str1, cnt2, Scale.Times2, -16)); 753 subl(cnt1, cnt2); 754 movl(cnt2, 8); 755 addl(cnt1, 8); 756 bind(contScanSubstr); 757 movdqu(vec, new AMD64Address(str2, 0)); 758 jmpb(scanSubstr); 759 760 bind(retFoundLong); 761 movq(str1, new AMD64Address(rsp, wordSize)); 762 } // non constant 763 764 bind(retFound); 765 // Compute substr offset 766 subq(result, str1); 767 shrl(result, 1); // index 768 769 bind(cleanup); 770 pop(rsp); // restore SP 771 772 } 773 774 }