1 /* 2 * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.lir.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.rax; 28 import static jdk.vm.ci.amd64.AMD64.rcx; 29 import static jdk.vm.ci.amd64.AMD64.rdx; 30 import static jdk.vm.ci.amd64.AMD64.rsp; 31 import static jdk.vm.ci.code.ValueUtil.asRegister; 32 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL; 33 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; 34 35 import org.graalvm.compiler.asm.Label; 36 import org.graalvm.compiler.asm.amd64.AMD64Address; 37 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 38 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; 39 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; 40 import org.graalvm.compiler.lir.LIRInstructionClass; 41 import org.graalvm.compiler.lir.Opcode; 42 import org.graalvm.compiler.lir.asm.CompilationResultBuilder; 43 import org.graalvm.compiler.lir.gen.LIRGeneratorTool; 44 45 import jdk.vm.ci.amd64.AMD64; 46 import jdk.vm.ci.amd64.AMD64.CPUFeature; 47 import jdk.vm.ci.code.Register; 48 import jdk.vm.ci.code.RegisterValue; 49 import jdk.vm.ci.meta.Value; 50 51 /** 52 */ 53 @Opcode("AMD64_STRING_INDEX_OF") 54 public final class AMD64StringIndexOfOp extends AMD64LIRInstruction { 55 public static final LIRInstructionClass<AMD64StringIndexOfOp> TYPE = LIRInstructionClass.create(AMD64StringIndexOfOp.class); 56 57 @Def({REG}) protected Value resultValue; 58 @Alive({REG}) protected Value charPtr1Value; 59 @Alive({REG}) protected Value charPtr2Value; 60 @Use({REG}) protected RegisterValue cnt1Value; 61 @Temp({REG}) protected RegisterValue cnt1ValueT; 62 @Use({REG}) protected RegisterValue cnt2Value; 63 @Temp({REG}) protected RegisterValue cnt2ValueT; 64 @Temp({REG}) protected Value temp1; 65 @Temp({REG, ILLEGAL}) protected Value vectorTemp1; 66 67 private final int intCnt2; 68 69 private final int vmPageSize; 70 71 public AMD64StringIndexOfOp(LIRGeneratorTool tool, Value result, Value charPtr1, Value charPtr2, RegisterValue cnt1, RegisterValue cnt2, RegisterValue temp1, RegisterValue vectorTemp1, 72 int intCnt2, int vmPageSize) { 73 super(TYPE); 74 assert ((AMD64) tool.target().arch).getFeatures().contains(CPUFeature.SSE4_2); 75 resultValue = result; 76 charPtr1Value = charPtr1; 77 charPtr2Value = charPtr2; 78 /* 79 * The count values are inputs but are also killed like temporaries so need both Use and 80 * Temp annotations, which will only work with fixed registers. 81 */ 82 cnt1Value = cnt1; 83 cnt1ValueT = cnt1; 84 cnt2Value = cnt2; 85 cnt2ValueT = cnt2; 86 assert asRegister(cnt1).equals(rdx) && asRegister(cnt2).equals(rax) && asRegister(temp1).equals(rcx) : "fixed register usage required"; 87 88 this.temp1 = temp1; 89 this.vectorTemp1 = vectorTemp1; 90 this.intCnt2 = intCnt2; 91 this.vmPageSize = vmPageSize; 92 } 93 94 @Override 95 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { 96 Register charPtr1 = asRegister(charPtr1Value); 97 Register charPtr2 = asRegister(charPtr2Value); 98 Register cnt1 = asRegister(cnt1Value); 99 Register cnt2 = asRegister(cnt2Value); 100 Register result = asRegister(resultValue); 101 Register vec = asRegister(vectorTemp1); 102 Register tmp = asRegister(temp1); 103 if (intCnt2 >= 8) { 104 // IndexOf for constant substrings with size >= 8 chars which don't need to be loaded 105 // through stack. 106 stringIndexofC8(masm, charPtr1, charPtr2, cnt1, cnt2, result, vec, tmp); 107 } else { 108 // Small strings are loaded through stack if they cross page boundary. 109 stringIndexOf(masm, charPtr1, charPtr2, cnt1, cnt2, result, vec, tmp); 110 } 111 } 112 113 private void stringIndexofC8(AMD64MacroAssembler masm, Register charPtr1, Register charPtr2, Register cnt1, Register cnt2, Register result, Register vec, Register tmp) { 114 // assert(UseSSE42Intrinsics, "SSE4.2 is required"); 115 116 // This method uses pcmpestri inxtruction with bound registers 117 // inputs: 118 // xmm - substring 119 // rax - substring length (elements count) 120 // mem - scanned string 121 // rdx - string length (elements count) 122 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 123 // outputs: 124 // rcx - matched index in string 125 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 126 127 Label reloadSubstr = new Label(); 128 Label scanToSubstr = new Label(); 129 Label scanSubstr = new Label(); 130 Label retFound = new Label(); 131 Label retNotFound = new Label(); 132 Label exit = new Label(); 133 Label foundSubstr = new Label(); 134 Label matchSubstrHead = new Label(); 135 Label reloadStr = new Label(); 136 Label foundCandidate = new Label(); 137 138 // Note, inline_string_indexOf() generates checks: 139 // if (substr.count > string.count) return -1; 140 // if (substr.count == 0) return 0; 141 assert intCnt2 >= 8 : "this code isused only for cnt2 >= 8 chars"; 142 143 // Load substring. 144 masm.movdqu(vec, new AMD64Address(charPtr2, 0)); 145 masm.movl(cnt2, intCnt2); 146 masm.movq(result, charPtr1); // string addr 147 148 if (intCnt2 > 8) { 149 masm.jmpb(scanToSubstr); 150 151 // Reload substr for rescan, this code 152 // is executed only for large substrings (> 8 chars) 153 masm.bind(reloadSubstr); 154 masm.movdqu(vec, new AMD64Address(charPtr2, 0)); 155 masm.negq(cnt2); // Jumped here with negative cnt2, convert to positive 156 157 masm.bind(reloadStr); 158 // We came here after the beginning of the substring was 159 // matched but the rest of it was not so we need to search 160 // again. Start from the next element after the previous match. 161 162 // cnt2 is number of substring reminding elements and 163 // cnt1 is number of string reminding elements when cmp failed. 164 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 165 masm.subl(cnt1, cnt2); 166 masm.addl(cnt1, intCnt2); 167 masm.movl(cnt2, intCnt2); // Now restore cnt2 168 169 masm.decrementl(cnt1, 1); // Shift to next element 170 masm.cmpl(cnt1, cnt2); 171 masm.jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 172 173 masm.addq(result, 2); 174 175 } // (int_cnt2 > 8) 176 177 // Scan string for start of substr in 16-byte vectors 178 masm.bind(scanToSubstr); 179 masm.pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 180 masm.jccb(ConditionFlag.Below, foundCandidate); // CF == 1 181 masm.subl(cnt1, 8); 182 masm.jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string 183 masm.cmpl(cnt1, cnt2); 184 masm.jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 185 masm.addq(result, 16); 186 masm.jmpb(scanToSubstr); 187 188 // Found a potential substr 189 masm.bind(foundCandidate); 190 // Matched whole vector if first element matched (tmp(rcx) == 0). 191 if (intCnt2 == 8) { 192 masm.jccb(ConditionFlag.Overflow, retFound); // OF == 1 193 } else { // int_cnt2 > 8 194 masm.jccb(ConditionFlag.Overflow, foundSubstr); 195 } 196 // After pcmpestri tmp(rcx) contains matched element index 197 // Compute start addr of substr 198 masm.leaq(result, new AMD64Address(result, tmp, Scale.Times2, 0)); 199 200 // Make sure string is still long enough 201 masm.subl(cnt1, tmp); 202 masm.cmpl(cnt1, cnt2); 203 if (intCnt2 == 8) { 204 masm.jccb(ConditionFlag.GreaterEqual, scanToSubstr); 205 } else { // int_cnt2 > 8 206 masm.jccb(ConditionFlag.GreaterEqual, matchSubstrHead); 207 } 208 // Left less then substring. 209 210 masm.bind(retNotFound); 211 masm.movl(result, -1); 212 masm.jmpb(exit); 213 214 if (intCnt2 > 8) { 215 // This code is optimized for the case when whole substring 216 // is matched if its head is matched. 217 masm.bind(matchSubstrHead); 218 masm.pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 219 // Reload only string if does not match 220 masm.jccb(ConditionFlag.NoOverflow, reloadStr); // OF == 0 221 222 Label contScanSubstr = new Label(); 223 // Compare the rest of substring (> 8 chars). 224 masm.bind(foundSubstr); 225 // First 8 chars are already matched. 226 masm.negq(cnt2); 227 masm.addq(cnt2, 8); 228 229 masm.bind(scanSubstr); 230 masm.subl(cnt1, 8); 231 masm.cmpl(cnt2, -8); // Do not read beyond substring 232 masm.jccb(ConditionFlag.LessEqual, contScanSubstr); 233 // Back-up strings to avoid reading beyond substring: 234 // cnt1 = cnt1 - cnt2 + 8 235 masm.addl(cnt1, cnt2); // cnt2 is negative 236 masm.addl(cnt1, 8); 237 masm.movl(cnt2, 8); 238 masm.negq(cnt2); 239 masm.bind(contScanSubstr); 240 if (intCnt2 < 1024 * 1024 * 1024) { 241 masm.movdqu(vec, new AMD64Address(charPtr2, cnt2, Scale.Times2, intCnt2 * 2)); 242 masm.pcmpestri(vec, new AMD64Address(result, cnt2, Scale.Times2, intCnt2 * 2), 0x0d); 243 } else { 244 // calculate index in register to avoid integer overflow (int_cnt2*2) 245 masm.movl(tmp, intCnt2); 246 masm.addq(tmp, cnt2); 247 masm.movdqu(vec, new AMD64Address(charPtr2, tmp, Scale.Times2, 0)); 248 masm.pcmpestri(vec, new AMD64Address(result, tmp, Scale.Times2, 0), 0x0d); 249 } 250 // Need to reload strings pointers if not matched whole vector 251 masm.jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 252 masm.addq(cnt2, 8); 253 masm.jcc(ConditionFlag.Negative, scanSubstr); 254 // Fall through if found full substring 255 256 } // (int_cnt2 > 8) 257 258 masm.bind(retFound); 259 // Found result if we matched full small substring. 260 // Compute substr offset 261 masm.subq(result, charPtr1); 262 masm.shrl(result, 1); // index 263 masm.bind(exit); 264 } 265 266 private void stringIndexOf(AMD64MacroAssembler masm, Register charPtr1, Register charPtr2, Register cnt1, Register cnt2, Register result, Register vec, Register tmp) { 267 // 268 // int_cnt2 is length of small (< 8 chars) constant substring 269 // or (-1) for non constant substring in which case its length 270 // is in cnt2 register. 271 // 272 // Note, inline_string_indexOf() generates checks: 273 // if (substr.count > string.count) return -1; 274 // if (substr.count == 0) return 0; 275 // 276 assert intCnt2 == -1 || (0 < intCnt2 && intCnt2 < 8) : "should be != 0"; 277 278 // This method uses pcmpestri instruction with bound registers 279 // inputs: 280 // xmm - substring 281 // rax - substring length (elements count) 282 // mem - scanned string 283 // rdx - string length (elements count) 284 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 285 // outputs: 286 // rcx - matched index in string 287 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 288 289 Label reloadSubstr = new Label(); 290 Label scanToSubstr = new Label(); 291 Label scanSubstr = new Label(); 292 Label adjustStr = new Label(); 293 Label retFound = new Label(); 294 Label retNotFound = new Label(); 295 Label cleanup = new Label(); 296 Label foundSubstr = new Label(); 297 Label foundCandidate = new Label(); 298 299 int wordSize = 8; 300 // We don't know where these strings are located 301 // and we can't read beyond them. Load them through stack. 302 Label bigStrings = new Label(); 303 Label checkStr = new Label(); 304 Label copySubstr = new Label(); 305 Label copyStr = new Label(); 306 307 masm.movq(tmp, rsp); // save old SP 308 309 if (intCnt2 > 0) { // small (< 8 chars) constant substring 310 if (intCnt2 == 1) { // One char 311 masm.movzwl(result, new AMD64Address(charPtr2, 0)); 312 masm.movdl(vec, result); // move 32 bits 313 } else if (intCnt2 == 2) { // Two chars 314 masm.movdl(vec, new AMD64Address(charPtr2, 0)); // move 32 bits 315 } else if (intCnt2 == 4) { // Four chars 316 masm.movq(vec, new AMD64Address(charPtr2, 0)); // move 64 bits 317 } else { // cnt2 = { 3, 5, 6, 7 } 318 // Array header size is 12 bytes in 32-bit VM 319 // + 6 bytes for 3 chars == 18 bytes, 320 // enough space to load vec and shift. 321 masm.movdqu(vec, new AMD64Address(charPtr2, (intCnt2 * 2) - 16)); 322 masm.psrldq(vec, 16 - (intCnt2 * 2)); 323 } 324 } else { // not constant substring 325 masm.cmpl(cnt2, 8); 326 masm.jccb(ConditionFlag.AboveEqual, bigStrings); // Both strings are big enough 327 328 // We can read beyond string if str+16 does not cross page boundary 329 // since heaps are aligned and mapped by pages. 330 assert vmPageSize < 1024 * 1024 * 1024 : "default page should be small"; 331 masm.movl(result, charPtr2); // We need only low 32 bits 332 masm.andl(result, (vmPageSize - 1)); 333 masm.cmpl(result, (vmPageSize - 16)); 334 masm.jccb(ConditionFlag.BelowEqual, checkStr); 335 336 // Move small strings to stack to allow load 16 bytes into vec. 337 masm.subq(rsp, 16); 338 int stackOffset = wordSize - 2; 339 masm.push(cnt2); 340 341 masm.bind(copySubstr); 342 masm.movzwl(result, new AMD64Address(charPtr2, cnt2, Scale.Times2, -2)); 343 masm.movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); 344 masm.decrementl(cnt2, 1); 345 masm.jccb(ConditionFlag.NotZero, copySubstr); 346 347 masm.pop(cnt2); 348 masm.movq(charPtr2, rsp); // New substring address 349 } // non constant 350 351 masm.bind(checkStr); 352 masm.cmpl(cnt1, 8); 353 masm.jccb(ConditionFlag.AboveEqual, bigStrings); 354 355 // Check cross page boundary. 356 masm.movl(result, charPtr1); // We need only low 32 bits 357 masm.andl(result, (vmPageSize - 1)); 358 masm.cmpl(result, (vmPageSize - 16)); 359 masm.jccb(ConditionFlag.BelowEqual, bigStrings); 360 361 masm.subq(rsp, 16); 362 int stackOffset = -2; 363 if (intCnt2 < 0) { // not constant 364 masm.push(cnt2); 365 stackOffset += wordSize; 366 } 367 masm.movl(cnt2, cnt1); 368 369 masm.bind(copyStr); 370 masm.movzwl(result, new AMD64Address(charPtr1, cnt2, Scale.Times2, -2)); 371 masm.movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); 372 masm.decrementl(cnt2, 1); 373 masm.jccb(ConditionFlag.NotZero, copyStr); 374 375 if (intCnt2 < 0) { // not constant 376 masm.pop(cnt2); 377 } 378 masm.movq(charPtr1, rsp); // New string address 379 380 masm.bind(bigStrings); 381 // Load substring. 382 if (intCnt2 < 0) { // -1 383 masm.movdqu(vec, new AMD64Address(charPtr2, 0)); 384 masm.push(cnt2); // substr count 385 masm.push(charPtr2); // substr addr 386 masm.push(charPtr1); // string addr 387 } else { 388 // Small (< 8 chars) constant substrings are loaded already. 389 masm.movl(cnt2, intCnt2); 390 } 391 masm.push(tmp); // original SP 392 // Finished loading 393 394 // ======================================================== 395 // Start search 396 // 397 398 masm.movq(result, charPtr1); // string addr 399 400 if (intCnt2 < 0) { // Only for non constant substring 401 masm.jmpb(scanToSubstr); 402 403 // SP saved at sp+0 404 // String saved at sp+1*wordSize 405 // Substr saved at sp+2*wordSize 406 // Substr count saved at sp+3*wordSize 407 408 // Reload substr for rescan, this code 409 // is executed only for large substrings (> 8 chars) 410 masm.bind(reloadSubstr); 411 masm.movq(charPtr2, new AMD64Address(rsp, 2 * wordSize)); 412 masm.movl(cnt2, new AMD64Address(rsp, 3 * wordSize)); 413 masm.movdqu(vec, new AMD64Address(charPtr2, 0)); 414 // We came here after the beginning of the substring was 415 // matched but the rest of it was not so we need to search 416 // again. Start from the next element after the previous match. 417 masm.subq(charPtr1, result); // Restore counter 418 masm.shrl(charPtr1, 1); 419 masm.addl(cnt1, charPtr1); 420 masm.decrementl(cnt1); // Shift to next element 421 masm.cmpl(cnt1, cnt2); 422 masm.jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 423 424 masm.addq(result, 2); 425 } // non constant 426 427 // Scan string for start of substr in 16-byte vectors 428 masm.bind(scanToSubstr); 429 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 430 masm.pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 431 masm.jccb(ConditionFlag.Below, foundCandidate); // CF == 1 432 masm.subl(cnt1, 8); 433 masm.jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string 434 masm.cmpl(cnt1, cnt2); 435 masm.jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 436 masm.addq(result, 16); 437 438 masm.bind(adjustStr); 439 masm.cmpl(cnt1, 8); // Do not read beyond string 440 masm.jccb(ConditionFlag.GreaterEqual, scanToSubstr); 441 // Back-up string to avoid reading beyond string. 442 masm.leaq(result, new AMD64Address(result, cnt1, Scale.Times2, -16)); 443 masm.movl(cnt1, 8); 444 masm.jmpb(scanToSubstr); 445 446 // Found a potential substr 447 masm.bind(foundCandidate); 448 // After pcmpestri tmp(rcx) contains matched element index 449 450 // Make sure string is still long enough 451 masm.subl(cnt1, tmp); 452 masm.cmpl(cnt1, cnt2); 453 masm.jccb(ConditionFlag.GreaterEqual, foundSubstr); 454 // Left less then substring. 455 456 masm.bind(retNotFound); 457 masm.movl(result, -1); 458 masm.jmpb(cleanup); 459 460 masm.bind(foundSubstr); 461 // Compute start addr of substr 462 masm.leaq(result, new AMD64Address(result, tmp, Scale.Times2)); 463 464 if (intCnt2 > 0) { // Constant substring 465 // Repeat search for small substring (< 8 chars) 466 // from new point without reloading substring. 467 // Have to check that we don't read beyond string. 468 masm.cmpl(tmp, 8 - intCnt2); 469 masm.jccb(ConditionFlag.Greater, adjustStr); 470 // Fall through if matched whole substring. 471 } else { // non constant 472 assert intCnt2 == -1 : "should be != 0"; 473 masm.addl(tmp, cnt2); 474 // Found result if we matched whole substring. 475 masm.cmpl(tmp, 8); 476 masm.jccb(ConditionFlag.LessEqual, retFound); 477 478 // Repeat search for small substring (<= 8 chars) 479 // from new point 'str1' without reloading substring. 480 masm.cmpl(cnt2, 8); 481 // Have to check that we don't read beyond string. 482 masm.jccb(ConditionFlag.LessEqual, adjustStr); 483 484 Label checkNext = new Label(); 485 Label contScanSubstr = new Label(); 486 Label retFoundLong = new Label(); 487 // Compare the rest of substring (> 8 chars). 488 masm.movq(charPtr1, result); 489 490 masm.cmpl(tmp, cnt2); 491 // First 8 chars are already matched. 492 masm.jccb(ConditionFlag.Equal, checkNext); 493 494 masm.bind(scanSubstr); 495 masm.pcmpestri(vec, new AMD64Address(charPtr1, 0), 0x0d); 496 // Need to reload strings pointers if not matched whole vector 497 masm.jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 498 499 masm.bind(checkNext); 500 masm.subl(cnt2, 8); 501 masm.jccb(ConditionFlag.LessEqual, retFoundLong); // Found full substring 502 masm.addq(charPtr1, 16); 503 masm.addq(charPtr2, 16); 504 masm.subl(cnt1, 8); 505 masm.cmpl(cnt2, 8); // Do not read beyond substring 506 masm.jccb(ConditionFlag.GreaterEqual, contScanSubstr); 507 // Back-up strings to avoid reading beyond substring. 508 masm.leaq(charPtr2, new AMD64Address(charPtr2, cnt2, Scale.Times2, -16)); 509 masm.leaq(charPtr1, new AMD64Address(charPtr1, cnt2, Scale.Times2, -16)); 510 masm.subl(cnt1, cnt2); 511 masm.movl(cnt2, 8); 512 masm.addl(cnt1, 8); 513 masm.bind(contScanSubstr); 514 masm.movdqu(vec, new AMD64Address(charPtr2, 0)); 515 masm.jmpb(scanSubstr); 516 517 masm.bind(retFoundLong); 518 masm.movq(charPtr1, new AMD64Address(rsp, wordSize)); 519 } // non constant 520 521 masm.bind(retFound); 522 // Compute substr offset 523 masm.subq(result, charPtr1); 524 masm.shrl(result, 1); // index 525 526 masm.bind(cleanup); 527 masm.pop(rsp); // restore SP 528 } 529 530 }