1 /*
   2  * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.lir.amd64;
  26 
  27 import static jdk.vm.ci.amd64.AMD64.rax;
  28 import static jdk.vm.ci.amd64.AMD64.rcx;
  29 import static jdk.vm.ci.amd64.AMD64.rdx;
  30 import static jdk.vm.ci.amd64.AMD64.rsp;
  31 import static jdk.vm.ci.code.ValueUtil.asRegister;
  32 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
  33 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
  34 
  35 import org.graalvm.compiler.asm.Label;
  36 import org.graalvm.compiler.asm.amd64.AMD64Address;
  37 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
  38 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
  39 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
  40 import org.graalvm.compiler.lir.LIRInstructionClass;
  41 import org.graalvm.compiler.lir.Opcode;
  42 import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
  43 import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
  44 
  45 import jdk.vm.ci.amd64.AMD64;
  46 import jdk.vm.ci.amd64.AMD64.CPUFeature;
  47 import jdk.vm.ci.code.Register;
  48 import jdk.vm.ci.code.RegisterValue;
  49 import jdk.vm.ci.meta.Value;
  50 
  51 /**
  52   */
  53 @Opcode("AMD64_STRING_INDEX_OF")
  54 public final class AMD64StringIndexOfOp extends AMD64LIRInstruction {
  55     public static final LIRInstructionClass<AMD64StringIndexOfOp> TYPE = LIRInstructionClass.create(AMD64StringIndexOfOp.class);
  56 
  57     @Def({REG}) protected Value resultValue;
  58     @Alive({REG}) protected Value charPtr1Value;
  59     @Alive({REG}) protected Value charPtr2Value;
  60     @Use({REG}) protected RegisterValue cnt1Value;
  61     @Temp({REG}) protected RegisterValue cnt1ValueT;
  62     @Use({REG}) protected RegisterValue cnt2Value;
  63     @Temp({REG}) protected RegisterValue cnt2ValueT;
  64     @Temp({REG}) protected Value temp1;
  65     @Temp({REG, ILLEGAL}) protected Value vectorTemp1;
  66 
  67     private final int intCnt2;
  68 
  69     private final int vmPageSize;
  70 
  71     public AMD64StringIndexOfOp(LIRGeneratorTool tool, Value result, Value charPtr1, Value charPtr2, RegisterValue cnt1, RegisterValue cnt2, RegisterValue temp1, RegisterValue vectorTemp1,
  72                     int intCnt2, int vmPageSize) {
  73         super(TYPE);
  74         assert ((AMD64) tool.target().arch).getFeatures().contains(CPUFeature.SSE4_2);
  75         resultValue = result;
  76         charPtr1Value = charPtr1;
  77         charPtr2Value = charPtr2;
  78         /*
  79          * The count values are inputs but are also killed like temporaries so need both Use and
  80          * Temp annotations, which will only work with fixed registers.
  81          */
  82         cnt1Value = cnt1;
  83         cnt1ValueT = cnt1;
  84         cnt2Value = cnt2;
  85         cnt2ValueT = cnt2;
  86         assert asRegister(cnt1).equals(rdx) && asRegister(cnt2).equals(rax) && asRegister(temp1).equals(rcx) : "fixed register usage required";
  87 
  88         this.temp1 = temp1;
  89         this.vectorTemp1 = vectorTemp1;
  90         this.intCnt2 = intCnt2;
  91         this.vmPageSize = vmPageSize;
  92     }
  93 
  94     @Override
  95     public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
  96         Register charPtr1 = asRegister(charPtr1Value);
  97         Register charPtr2 = asRegister(charPtr2Value);
  98         Register cnt1 = asRegister(cnt1Value);
  99         Register cnt2 = asRegister(cnt2Value);
 100         Register result = asRegister(resultValue);
 101         Register vec = asRegister(vectorTemp1);
 102         Register tmp = asRegister(temp1);
 103         if (intCnt2 >= 8) {
 104             // IndexOf for constant substrings with size >= 8 chars which don't need to be loaded
 105             // through stack.
 106             stringIndexofC8(masm, charPtr1, charPtr2, cnt1, cnt2, result, vec, tmp);
 107         } else {
 108             // Small strings are loaded through stack if they cross page boundary.
 109             stringIndexOf(masm, charPtr1, charPtr2, cnt1, cnt2, result, vec, tmp);
 110         }
 111     }
 112 
 113     private void stringIndexofC8(AMD64MacroAssembler masm, Register charPtr1, Register charPtr2, Register cnt1, Register cnt2, Register result, Register vec, Register tmp) {
 114         // assert(UseSSE42Intrinsics, "SSE4.2 is required");
 115 
 116         // This method uses pcmpestri inxtruction with bound registers
 117         // inputs:
 118         // xmm - substring
 119         // rax - substring length (elements count)
 120         // mem - scanned string
 121         // rdx - string length (elements count)
 122         // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
 123         // outputs:
 124         // rcx - matched index in string
 125         assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri";
 126 
 127         Label reloadSubstr = new Label();
 128         Label scanToSubstr = new Label();
 129         Label scanSubstr = new Label();
 130         Label retFound = new Label();
 131         Label retNotFound = new Label();
 132         Label exit = new Label();
 133         Label foundSubstr = new Label();
 134         Label matchSubstrHead = new Label();
 135         Label reloadStr = new Label();
 136         Label foundCandidate = new Label();
 137 
 138         // Note, inline_string_indexOf() generates checks:
 139         // if (substr.count > string.count) return -1;
 140         // if (substr.count == 0) return 0;
 141         assert intCnt2 >= 8 : "this code isused only for cnt2 >= 8 chars";
 142 
 143         // Load substring.
 144         masm.movdqu(vec, new AMD64Address(charPtr2, 0));
 145         masm.movl(cnt2, intCnt2);
 146         masm.movq(result, charPtr1); // string addr
 147 
 148         if (intCnt2 > 8) {
 149             masm.jmpb(scanToSubstr);
 150 
 151             // Reload substr for rescan, this code
 152             // is executed only for large substrings (> 8 chars)
 153             masm.bind(reloadSubstr);
 154             masm.movdqu(vec, new AMD64Address(charPtr2, 0));
 155             masm.negq(cnt2); // Jumped here with negative cnt2, convert to positive
 156 
 157             masm.bind(reloadStr);
 158             // We came here after the beginning of the substring was
 159             // matched but the rest of it was not so we need to search
 160             // again. Start from the next element after the previous match.
 161 
 162             // cnt2 is number of substring reminding elements and
 163             // cnt1 is number of string reminding elements when cmp failed.
 164             // Restored cnt1 = cnt1 - cnt2 + int_cnt2
 165             masm.subl(cnt1, cnt2);
 166             masm.addl(cnt1, intCnt2);
 167             masm.movl(cnt2, intCnt2); // Now restore cnt2
 168 
 169             masm.decrementl(cnt1, 1);     // Shift to next element
 170             masm.cmpl(cnt1, cnt2);
 171             masm.jccb(ConditionFlag.Negative, retNotFound);  // Left less then substring
 172 
 173             masm.addq(result, 2);
 174 
 175         } // (int_cnt2 > 8)
 176 
 177         // Scan string for start of substr in 16-byte vectors
 178         masm.bind(scanToSubstr);
 179         masm.pcmpestri(vec, new AMD64Address(result, 0), 0x0d);
 180         masm.jccb(ConditionFlag.Below, foundCandidate);   // CF == 1
 181         masm.subl(cnt1, 8);
 182         masm.jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string
 183         masm.cmpl(cnt1, cnt2);
 184         masm.jccb(ConditionFlag.Negative, retNotFound);  // Left less then substring
 185         masm.addq(result, 16);
 186         masm.jmpb(scanToSubstr);
 187 
 188         // Found a potential substr
 189         masm.bind(foundCandidate);
 190         // Matched whole vector if first element matched (tmp(rcx) == 0).
 191         if (intCnt2 == 8) {
 192             masm.jccb(ConditionFlag.Overflow, retFound);    // OF == 1
 193         } else { // int_cnt2 > 8
 194             masm.jccb(ConditionFlag.Overflow, foundSubstr);
 195         }
 196         // After pcmpestri tmp(rcx) contains matched element index
 197         // Compute start addr of substr
 198         masm.leaq(result, new AMD64Address(result, tmp, Scale.Times2, 0));
 199 
 200         // Make sure string is still long enough
 201         masm.subl(cnt1, tmp);
 202         masm.cmpl(cnt1, cnt2);
 203         if (intCnt2 == 8) {
 204             masm.jccb(ConditionFlag.GreaterEqual, scanToSubstr);
 205         } else { // int_cnt2 > 8
 206             masm.jccb(ConditionFlag.GreaterEqual, matchSubstrHead);
 207         }
 208         // Left less then substring.
 209 
 210         masm.bind(retNotFound);
 211         masm.movl(result, -1);
 212         masm.jmpb(exit);
 213 
 214         if (intCnt2 > 8) {
 215             // This code is optimized for the case when whole substring
 216             // is matched if its head is matched.
 217             masm.bind(matchSubstrHead);
 218             masm.pcmpestri(vec, new AMD64Address(result, 0), 0x0d);
 219             // Reload only string if does not match
 220             masm.jccb(ConditionFlag.NoOverflow, reloadStr); // OF == 0
 221 
 222             Label contScanSubstr = new Label();
 223             // Compare the rest of substring (> 8 chars).
 224             masm.bind(foundSubstr);
 225             // First 8 chars are already matched.
 226             masm.negq(cnt2);
 227             masm.addq(cnt2, 8);
 228 
 229             masm.bind(scanSubstr);
 230             masm.subl(cnt1, 8);
 231             masm.cmpl(cnt2, -8); // Do not read beyond substring
 232             masm.jccb(ConditionFlag.LessEqual, contScanSubstr);
 233             // Back-up strings to avoid reading beyond substring:
 234             // cnt1 = cnt1 - cnt2 + 8
 235             masm.addl(cnt1, cnt2); // cnt2 is negative
 236             masm.addl(cnt1, 8);
 237             masm.movl(cnt2, 8);
 238             masm.negq(cnt2);
 239             masm.bind(contScanSubstr);
 240             if (intCnt2 < 1024 * 1024 * 1024) {
 241                 masm.movdqu(vec, new AMD64Address(charPtr2, cnt2, Scale.Times2, intCnt2 * 2));
 242                 masm.pcmpestri(vec, new AMD64Address(result, cnt2, Scale.Times2, intCnt2 * 2), 0x0d);
 243             } else {
 244                 // calculate index in register to avoid integer overflow (int_cnt2*2)
 245                 masm.movl(tmp, intCnt2);
 246                 masm.addq(tmp, cnt2);
 247                 masm.movdqu(vec, new AMD64Address(charPtr2, tmp, Scale.Times2, 0));
 248                 masm.pcmpestri(vec, new AMD64Address(result, tmp, Scale.Times2, 0), 0x0d);
 249             }
 250             // Need to reload strings pointers if not matched whole vector
 251             masm.jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0
 252             masm.addq(cnt2, 8);
 253             masm.jcc(ConditionFlag.Negative, scanSubstr);
 254             // Fall through if found full substring
 255 
 256         } // (int_cnt2 > 8)
 257 
 258         masm.bind(retFound);
 259         // Found result if we matched full small substring.
 260         // Compute substr offset
 261         masm.subq(result, charPtr1);
 262         masm.shrl(result, 1); // index
 263         masm.bind(exit);
 264     }
 265 
 266     private void stringIndexOf(AMD64MacroAssembler masm, Register charPtr1, Register charPtr2, Register cnt1, Register cnt2, Register result, Register vec, Register tmp) {
 267         //
 268         // int_cnt2 is length of small (< 8 chars) constant substring
 269         // or (-1) for non constant substring in which case its length
 270         // is in cnt2 register.
 271         //
 272         // Note, inline_string_indexOf() generates checks:
 273         // if (substr.count > string.count) return -1;
 274         // if (substr.count == 0) return 0;
 275         //
 276         assert intCnt2 == -1 || (0 < intCnt2 && intCnt2 < 8) : "should be != 0";
 277 
 278         // This method uses pcmpestri instruction with bound registers
 279         // inputs:
 280         // xmm - substring
 281         // rax - substring length (elements count)
 282         // mem - scanned string
 283         // rdx - string length (elements count)
 284         // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
 285         // outputs:
 286         // rcx - matched index in string
 287         assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri";
 288 
 289         Label reloadSubstr = new Label();
 290         Label scanToSubstr = new Label();
 291         Label scanSubstr = new Label();
 292         Label adjustStr = new Label();
 293         Label retFound = new Label();
 294         Label retNotFound = new Label();
 295         Label cleanup = new Label();
 296         Label foundSubstr = new Label();
 297         Label foundCandidate = new Label();
 298 
 299         int wordSize = 8;
 300         // We don't know where these strings are located
 301         // and we can't read beyond them. Load them through stack.
 302         Label bigStrings = new Label();
 303         Label checkStr = new Label();
 304         Label copySubstr = new Label();
 305         Label copyStr = new Label();
 306 
 307         masm.movq(tmp, rsp); // save old SP
 308 
 309         if (intCnt2 > 0) {     // small (< 8 chars) constant substring
 310             if (intCnt2 == 1) {  // One char
 311                 masm.movzwl(result, new AMD64Address(charPtr2, 0));
 312                 masm.movdl(vec, result); // move 32 bits
 313             } else if (intCnt2 == 2) { // Two chars
 314                 masm.movdl(vec, new AMD64Address(charPtr2, 0)); // move 32 bits
 315             } else if (intCnt2 == 4) { // Four chars
 316                 masm.movq(vec, new AMD64Address(charPtr2, 0));  // move 64 bits
 317             } else { // cnt2 = { 3, 5, 6, 7 }
 318                 // Array header size is 12 bytes in 32-bit VM
 319                 // + 6 bytes for 3 chars == 18 bytes,
 320                 // enough space to load vec and shift.
 321                 masm.movdqu(vec, new AMD64Address(charPtr2, (intCnt2 * 2) - 16));
 322                 masm.psrldq(vec, 16 - (intCnt2 * 2));
 323             }
 324         } else { // not constant substring
 325             masm.cmpl(cnt2, 8);
 326             masm.jccb(ConditionFlag.AboveEqual, bigStrings); // Both strings are big enough
 327 
 328             // We can read beyond string if str+16 does not cross page boundary
 329             // since heaps are aligned and mapped by pages.
 330             assert vmPageSize < 1024 * 1024 * 1024 : "default page should be small";
 331             masm.movl(result, charPtr2); // We need only low 32 bits
 332             masm.andl(result, (vmPageSize - 1));
 333             masm.cmpl(result, (vmPageSize - 16));
 334             masm.jccb(ConditionFlag.BelowEqual, checkStr);
 335 
 336             // Move small strings to stack to allow load 16 bytes into vec.
 337             masm.subq(rsp, 16);
 338             int stackOffset = wordSize - 2;
 339             masm.push(cnt2);
 340 
 341             masm.bind(copySubstr);
 342             masm.movzwl(result, new AMD64Address(charPtr2, cnt2, Scale.Times2, -2));
 343             masm.movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result);
 344             masm.decrementl(cnt2, 1);
 345             masm.jccb(ConditionFlag.NotZero, copySubstr);
 346 
 347             masm.pop(cnt2);
 348             masm.movq(charPtr2, rsp);  // New substring address
 349         } // non constant
 350 
 351         masm.bind(checkStr);
 352         masm.cmpl(cnt1, 8);
 353         masm.jccb(ConditionFlag.AboveEqual, bigStrings);
 354 
 355         // Check cross page boundary.
 356         masm.movl(result, charPtr1); // We need only low 32 bits
 357         masm.andl(result, (vmPageSize - 1));
 358         masm.cmpl(result, (vmPageSize - 16));
 359         masm.jccb(ConditionFlag.BelowEqual, bigStrings);
 360 
 361         masm.subq(rsp, 16);
 362         int stackOffset = -2;
 363         if (intCnt2 < 0) { // not constant
 364             masm.push(cnt2);
 365             stackOffset += wordSize;
 366         }
 367         masm.movl(cnt2, cnt1);
 368 
 369         masm.bind(copyStr);
 370         masm.movzwl(result, new AMD64Address(charPtr1, cnt2, Scale.Times2, -2));
 371         masm.movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result);
 372         masm.decrementl(cnt2, 1);
 373         masm.jccb(ConditionFlag.NotZero, copyStr);
 374 
 375         if (intCnt2 < 0) { // not constant
 376             masm.pop(cnt2);
 377         }
 378         masm.movq(charPtr1, rsp);  // New string address
 379 
 380         masm.bind(bigStrings);
 381         // Load substring.
 382         if (intCnt2 < 0) { // -1
 383             masm.movdqu(vec, new AMD64Address(charPtr2, 0));
 384             masm.push(cnt2);       // substr count
 385             masm.push(charPtr2);       // substr addr
 386             masm.push(charPtr1);       // string addr
 387         } else {
 388             // Small (< 8 chars) constant substrings are loaded already.
 389             masm.movl(cnt2, intCnt2);
 390         }
 391         masm.push(tmp);  // original SP
 392         // Finished loading
 393 
 394         // ========================================================
 395         // Start search
 396         //
 397 
 398         masm.movq(result, charPtr1); // string addr
 399 
 400         if (intCnt2 < 0) {  // Only for non constant substring
 401             masm.jmpb(scanToSubstr);
 402 
 403             // SP saved at sp+0
 404             // String saved at sp+1*wordSize
 405             // Substr saved at sp+2*wordSize
 406             // Substr count saved at sp+3*wordSize
 407 
 408             // Reload substr for rescan, this code
 409             // is executed only for large substrings (> 8 chars)
 410             masm.bind(reloadSubstr);
 411             masm.movq(charPtr2, new AMD64Address(rsp, 2 * wordSize));
 412             masm.movl(cnt2, new AMD64Address(rsp, 3 * wordSize));
 413             masm.movdqu(vec, new AMD64Address(charPtr2, 0));
 414             // We came here after the beginning of the substring was
 415             // matched but the rest of it was not so we need to search
 416             // again. Start from the next element after the previous match.
 417             masm.subq(charPtr1, result); // Restore counter
 418             masm.shrl(charPtr1, 1);
 419             masm.addl(cnt1, charPtr1);
 420             masm.decrementl(cnt1);   // Shift to next element
 421             masm.cmpl(cnt1, cnt2);
 422             masm.jccb(ConditionFlag.Negative, retNotFound);  // Left less then substring
 423 
 424             masm.addq(result, 2);
 425         } // non constant
 426 
 427         // Scan string for start of substr in 16-byte vectors
 428         masm.bind(scanToSubstr);
 429         assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri";
 430         masm.pcmpestri(vec, new AMD64Address(result, 0), 0x0d);
 431         masm.jccb(ConditionFlag.Below, foundCandidate);   // CF == 1
 432         masm.subl(cnt1, 8);
 433         masm.jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string
 434         masm.cmpl(cnt1, cnt2);
 435         masm.jccb(ConditionFlag.Negative, retNotFound);  // Left less then substring
 436         masm.addq(result, 16);
 437 
 438         masm.bind(adjustStr);
 439         masm.cmpl(cnt1, 8); // Do not read beyond string
 440         masm.jccb(ConditionFlag.GreaterEqual, scanToSubstr);
 441         // Back-up string to avoid reading beyond string.
 442         masm.leaq(result, new AMD64Address(result, cnt1, Scale.Times2, -16));
 443         masm.movl(cnt1, 8);
 444         masm.jmpb(scanToSubstr);
 445 
 446         // Found a potential substr
 447         masm.bind(foundCandidate);
 448         // After pcmpestri tmp(rcx) contains matched element index
 449 
 450         // Make sure string is still long enough
 451         masm.subl(cnt1, tmp);
 452         masm.cmpl(cnt1, cnt2);
 453         masm.jccb(ConditionFlag.GreaterEqual, foundSubstr);
 454         // Left less then substring.
 455 
 456         masm.bind(retNotFound);
 457         masm.movl(result, -1);
 458         masm.jmpb(cleanup);
 459 
 460         masm.bind(foundSubstr);
 461         // Compute start addr of substr
 462         masm.leaq(result, new AMD64Address(result, tmp, Scale.Times2));
 463 
 464         if (intCnt2 > 0) { // Constant substring
 465             // Repeat search for small substring (< 8 chars)
 466             // from new point without reloading substring.
 467             // Have to check that we don't read beyond string.
 468             masm.cmpl(tmp, 8 - intCnt2);
 469             masm.jccb(ConditionFlag.Greater, adjustStr);
 470             // Fall through if matched whole substring.
 471         } else { // non constant
 472             assert intCnt2 == -1 : "should be != 0";
 473             masm.addl(tmp, cnt2);
 474             // Found result if we matched whole substring.
 475             masm.cmpl(tmp, 8);
 476             masm.jccb(ConditionFlag.LessEqual, retFound);
 477 
 478             // Repeat search for small substring (<= 8 chars)
 479             // from new point 'str1' without reloading substring.
 480             masm.cmpl(cnt2, 8);
 481             // Have to check that we don't read beyond string.
 482             masm.jccb(ConditionFlag.LessEqual, adjustStr);
 483 
 484             Label checkNext = new Label();
 485             Label contScanSubstr = new Label();
 486             Label retFoundLong = new Label();
 487             // Compare the rest of substring (> 8 chars).
 488             masm.movq(charPtr1, result);
 489 
 490             masm.cmpl(tmp, cnt2);
 491             // First 8 chars are already matched.
 492             masm.jccb(ConditionFlag.Equal, checkNext);
 493 
 494             masm.bind(scanSubstr);
 495             masm.pcmpestri(vec, new AMD64Address(charPtr1, 0), 0x0d);
 496             // Need to reload strings pointers if not matched whole vector
 497             masm.jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0
 498 
 499             masm.bind(checkNext);
 500             masm.subl(cnt2, 8);
 501             masm.jccb(ConditionFlag.LessEqual, retFoundLong); // Found full substring
 502             masm.addq(charPtr1, 16);
 503             masm.addq(charPtr2, 16);
 504             masm.subl(cnt1, 8);
 505             masm.cmpl(cnt2, 8); // Do not read beyond substring
 506             masm.jccb(ConditionFlag.GreaterEqual, contScanSubstr);
 507             // Back-up strings to avoid reading beyond substring.
 508             masm.leaq(charPtr2, new AMD64Address(charPtr2, cnt2, Scale.Times2, -16));
 509             masm.leaq(charPtr1, new AMD64Address(charPtr1, cnt2, Scale.Times2, -16));
 510             masm.subl(cnt1, cnt2);
 511             masm.movl(cnt2, 8);
 512             masm.addl(cnt1, 8);
 513             masm.bind(contScanSubstr);
 514             masm.movdqu(vec, new AMD64Address(charPtr2, 0));
 515             masm.jmpb(scanSubstr);
 516 
 517             masm.bind(retFoundLong);
 518             masm.movq(charPtr1, new AMD64Address(rsp, wordSize));
 519         } // non constant
 520 
 521         masm.bind(retFound);
 522         // Compute substr offset
 523         masm.subq(result, charPtr1);
 524         masm.shrl(result, 1); // index
 525 
 526         masm.bind(cleanup);
 527         masm.pop(rsp); // restore SP
 528     }
 529 
 530 }