src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Cdiff src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java

src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java

Print this page

        

*** 20,34 **** * or visit www.oracle.com if you need additional information or have any * questions. */ package org.graalvm.compiler.asm.amd64; import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIncDec; import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmLoadAndClearUpper; import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmRegToRegMoveAll; ! import org.graalvm.compiler.asm.NumUtil; import jdk.vm.ci.amd64.AMD64; import jdk.vm.ci.amd64.AMD64Kind; import jdk.vm.ci.code.Register; import jdk.vm.ci.code.TargetDescription; --- 20,40 ---- * or visit www.oracle.com if you need additional information or have any * questions. */ package org.graalvm.compiler.asm.amd64; + import static jdk.vm.ci.amd64.AMD64.rax; + import static jdk.vm.ci.amd64.AMD64.rcx; + import static jdk.vm.ci.amd64.AMD64.rdx; + import static jdk.vm.ci.amd64.AMD64.rsp; import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIncDec; import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmLoadAndClearUpper; import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmRegToRegMoveAll; ! import org.graalvm.compiler.asm.Label; ! import org.graalvm.compiler.core.common.NumUtil; ! import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; import jdk.vm.ci.amd64.AMD64; import jdk.vm.ci.amd64.AMD64Kind; import jdk.vm.ci.code.Register; import jdk.vm.ci.code.TargetDescription;
*** 136,145 **** --- 142,155 ---- public final void cmpptr(Register src1, AMD64Address src2) { cmpq(src1, src2); } + public final void decrementl(Register reg) { + decrementl(reg, 1); + } + public final void decrementl(Register reg, int value) { if (value == Integer.MIN_VALUE) { subl(reg, value); return; }
*** 319,324 **** --- 329,764 ---- assert dest.getRegisterCategory().equals(AMD64.XMM); fstpd(tmp); movdbl(dest, tmp); addq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); } + + // IndexOf for constant substrings with size >= 8 chars + // which don't need to be loaded through stack. + public void stringIndexofC8(Register str1, Register str2, + Register cnt1, Register cnt2, + int intCnt2, Register result, + Register vec, Register tmp) { + // assert(UseSSE42Intrinsics, "SSE4.2 is required"); + + // This method uses pcmpestri inxtruction with bound registers + // inputs: + // xmm - substring + // rax - substring length (elements count) + // mem - scanned string + // rdx - string length (elements count) + // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) + // outputs: + // rcx - matched index in string + assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; + + Label reloadSubstr = new Label(); + Label scanToSubstr = new Label(); + Label scanSubstr = new Label(); + Label retFound = new Label(); + Label retNotFound = new Label(); + Label exit = new Label(); + Label foundSubstr = new Label(); + Label matchSubstrHead = new Label(); + Label reloadStr = new Label(); + Label foundCandidate = new Label(); + + // Note, inline_string_indexOf() generates checks: + // if (substr.count > string.count) return -1; + // if (substr.count == 0) return 0; + assert intCnt2 >= 8 : "this code isused only for cnt2 >= 8 chars"; + + // Load substring. + movdqu(vec, new AMD64Address(str2, 0)); + movl(cnt2, intCnt2); + movq(result, str1); // string addr + + if (intCnt2 > 8) { + jmpb(scanToSubstr); + + // Reload substr for rescan, this code + // is executed only for large substrings (> 8 chars) + bind(reloadSubstr); + movdqu(vec, new AMD64Address(str2, 0)); + negq(cnt2); // Jumped here with negative cnt2, convert to positive + + bind(reloadStr); + // We came here after the beginning of the substring was + // matched but the rest of it was not so we need to search + // again. Start from the next element after the previous match. + + // cnt2 is number of substring reminding elements and + // cnt1 is number of string reminding elements when cmp failed. + // Restored cnt1 = cnt1 - cnt2 + int_cnt2 + subl(cnt1, cnt2); + addl(cnt1, intCnt2); + movl(cnt2, intCnt2); // Now restore cnt2 + + decrementl(cnt1, 1); // Shift to next element + cmpl(cnt1, cnt2); + jccb(ConditionFlag.Negative, retNotFound); // Left less then substring + + addq(result, 2); + + } // (int_cnt2 > 8) + + // Scan string for start of substr in 16-byte vectors + bind(scanToSubstr); + pcmpestri(vec, new AMD64Address(result, 0), 0x0d); + jccb(ConditionFlag.Below, foundCandidate); // CF == 1 + subl(cnt1, 8); + jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string + cmpl(cnt1, cnt2); + jccb(ConditionFlag.Negative, retNotFound); // Left less then substring + addq(result, 16); + jmpb(scanToSubstr); + + // Found a potential substr + bind(foundCandidate); + // Matched whole vector if first element matched (tmp(rcx) == 0). + if (intCnt2 == 8) { + jccb(ConditionFlag.Overflow, retFound); // OF == 1 + } else { // int_cnt2 > 8 + jccb(ConditionFlag.Overflow, foundSubstr); + } + // After pcmpestri tmp(rcx) contains matched element index + // Compute start addr of substr + leaq(result, new AMD64Address(result, tmp, Scale.Times2, 0)); + + // Make sure string is still long enough + subl(cnt1, tmp); + cmpl(cnt1, cnt2); + if (intCnt2 == 8) { + jccb(ConditionFlag.GreaterEqual, scanToSubstr); + } else { // int_cnt2 > 8 + jccb(ConditionFlag.GreaterEqual, matchSubstrHead); + } + // Left less then substring. + + bind(retNotFound); + movl(result, -1); + jmpb(exit); + + if (intCnt2 > 8) { + // This code is optimized for the case when whole substring + // is matched if its head is matched. + bind(matchSubstrHead); + pcmpestri(vec, new AMD64Address(result, 0), 0x0d); + // Reload only string if does not match + jccb(ConditionFlag.NoOverflow, reloadStr); // OF == 0 + + Label contScanSubstr = new Label(); + // Compare the rest of substring (> 8 chars). + bind(foundSubstr); + // First 8 chars are already matched. + negq(cnt2); + addq(cnt2, 8); + + bind(scanSubstr); + subl(cnt1, 8); + cmpl(cnt2, -8); // Do not read beyond substring + jccb(ConditionFlag.LessEqual, contScanSubstr); + // Back-up strings to avoid reading beyond substring: + // cnt1 = cnt1 - cnt2 + 8 + addl(cnt1, cnt2); // cnt2 is negative + addl(cnt1, 8); + movl(cnt2, 8); + negq(cnt2); + bind(contScanSubstr); + if (intCnt2 < 1024 * 1024 * 1024) { + movdqu(vec, new AMD64Address(str2, cnt2, Scale.Times2, intCnt2 * 2)); + pcmpestri(vec, new AMD64Address(result, cnt2, Scale.Times2, intCnt2 * 2), 0x0d); + } else { + // calculate index in register to avoid integer overflow (int_cnt2*2) + movl(tmp, intCnt2); + addq(tmp, cnt2); + movdqu(vec, new AMD64Address(str2, tmp, Scale.Times2, 0)); + pcmpestri(vec, new AMD64Address(result, tmp, Scale.Times2, 0), 0x0d); + } + // Need to reload strings pointers if not matched whole vector + jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 + addq(cnt2, 8); + jcc(ConditionFlag.Negative, scanSubstr); + // Fall through if found full substring + + } // (int_cnt2 > 8) + + bind(retFound); + // Found result if we matched full small substring. + // Compute substr offset + subq(result, str1); + shrl(result, 1); // index + bind(exit); + + } // string_indexofC8 + + // Small strings are loaded through stack if they cross page boundary. + public void stringIndexOf(Register str1, Register str2, + Register cnt1, Register cnt2, + int intCnt2, Register result, + Register vec, Register tmp, int vmPageSize) { + // + // int_cnt2 is length of small (< 8 chars) constant substring + // or (-1) for non constant substring in which case its length + // is in cnt2 register. + // + // Note, inline_string_indexOf() generates checks: + // if (substr.count > string.count) return -1; + // if (substr.count == 0) return 0; + // + assert intCnt2 == -1 || (0 < intCnt2 && intCnt2 < 8) : "should be != 0"; + + // This method uses pcmpestri instruction with bound registers + // inputs: + // xmm - substring + // rax - substring length (elements count) + // mem - scanned string + // rdx - string length (elements count) + // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) + // outputs: + // rcx - matched index in string + assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; + + Label reloadSubstr = new Label(); + Label scanToSubstr = new Label(); + Label scanSubstr = new Label(); + Label adjustStr = new Label(); + Label retFound = new Label(); + Label retNotFound = new Label(); + Label cleanup = new Label(); + Label foundSubstr = new Label(); + Label foundCandidate = new Label(); + + int wordSize = 8; + // We don't know where these strings are located + // and we can't read beyond them. Load them through stack. + Label bigStrings = new Label(); + Label checkStr = new Label(); + Label copySubstr = new Label(); + Label copyStr = new Label(); + + movq(tmp, rsp); // save old SP + + if (intCnt2 > 0) { // small (< 8 chars) constant substring + if (intCnt2 == 1) { // One char + movzwl(result, new AMD64Address(str2, 0)); + movdl(vec, result); // move 32 bits + } else if (intCnt2 == 2) { // Two chars + movdl(vec, new AMD64Address(str2, 0)); // move 32 bits + } else if (intCnt2 == 4) { // Four chars + movq(vec, new AMD64Address(str2, 0)); // move 64 bits + } else { // cnt2 = { 3, 5, 6, 7 } + // Array header size is 12 bytes in 32-bit VM + // + 6 bytes for 3 chars == 18 bytes, + // enough space to load vec and shift. + movdqu(vec, new AMD64Address(str2, (intCnt2 * 2) - 16)); + psrldq(vec, 16 - (intCnt2 * 2)); + } + } else { // not constant substring + cmpl(cnt2, 8); + jccb(ConditionFlag.AboveEqual, bigStrings); // Both strings are big enough + + // We can read beyond string if str+16 does not cross page boundary + // since heaps are aligned and mapped by pages. + assert vmPageSize < 1024 * 1024 * 1024 : "default page should be small"; + movl(result, str2); // We need only low 32 bits + andl(result, (vmPageSize - 1)); + cmpl(result, (vmPageSize - 16)); + jccb(ConditionFlag.BelowEqual, checkStr); + + // Move small strings to stack to allow load 16 bytes into vec. + subq(rsp, 16); + int stackOffset = wordSize - 2; + push(cnt2); + + bind(copySubstr); + movzwl(result, new AMD64Address(str2, cnt2, Scale.Times2, -2)); + movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); + decrementl(cnt2, 1); + jccb(ConditionFlag.NotZero, copySubstr); + + pop(cnt2); + movq(str2, rsp); // New substring address + } // non constant + + bind(checkStr); + cmpl(cnt1, 8); + jccb(ConditionFlag.AboveEqual, bigStrings); + + // Check cross page boundary. + movl(result, str1); // We need only low 32 bits + andl(result, (vmPageSize - 1)); + cmpl(result, (vmPageSize - 16)); + jccb(ConditionFlag.BelowEqual, bigStrings); + + subq(rsp, 16); + int stackOffset = -2; + if (intCnt2 < 0) { // not constant + push(cnt2); + stackOffset += wordSize; + } + movl(cnt2, cnt1); + + bind(copyStr); + movzwl(result, new AMD64Address(str1, cnt2, Scale.Times2, -2)); + movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); + decrementl(cnt2, 1); + jccb(ConditionFlag.NotZero, copyStr); + + if (intCnt2 < 0) { // not constant + pop(cnt2); + } + movq(str1, rsp); // New string address + + bind(bigStrings); + // Load substring. + if (intCnt2 < 0) { // -1 + movdqu(vec, new AMD64Address(str2, 0)); + push(cnt2); // substr count + push(str2); // substr addr + push(str1); // string addr + } else { + // Small (< 8 chars) constant substrings are loaded already. + movl(cnt2, intCnt2); + } + push(tmp); // original SP + // Finished loading + + // ======================================================== + // Start search + // + + movq(result, str1); // string addr + + if (intCnt2 < 0) { // Only for non constant substring + jmpb(scanToSubstr); + + // SP saved at sp+0 + // String saved at sp+1*wordSize + // Substr saved at sp+2*wordSize + // Substr count saved at sp+3*wordSize + + // Reload substr for rescan, this code + // is executed only for large substrings (> 8 chars) + bind(reloadSubstr); + movq(str2, new AMD64Address(rsp, 2 * wordSize)); + movl(cnt2, new AMD64Address(rsp, 3 * wordSize)); + movdqu(vec, new AMD64Address(str2, 0)); + // We came here after the beginning of the substring was + // matched but the rest of it was not so we need to search + // again. Start from the next element after the previous match. + subq(str1, result); // Restore counter + shrl(str1, 1); + addl(cnt1, str1); + decrementl(cnt1); // Shift to next element + cmpl(cnt1, cnt2); + jccb(ConditionFlag.Negative, retNotFound); // Left less then substring + + addq(result, 2); + } // non constant + + // Scan string for start of substr in 16-byte vectors + bind(scanToSubstr); + assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; + pcmpestri(vec, new AMD64Address(result, 0), 0x0d); + jccb(ConditionFlag.Below, foundCandidate); // CF == 1 + subl(cnt1, 8); + jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string + cmpl(cnt1, cnt2); + jccb(ConditionFlag.Negative, retNotFound); // Left less then substring + addq(result, 16); + + bind(adjustStr); + cmpl(cnt1, 8); // Do not read beyond string + jccb(ConditionFlag.GreaterEqual, scanToSubstr); + // Back-up string to avoid reading beyond string. + leaq(result, new AMD64Address(result, cnt1, Scale.Times2, -16)); + movl(cnt1, 8); + jmpb(scanToSubstr); + + // Found a potential substr + bind(foundCandidate); + // After pcmpestri tmp(rcx) contains matched element index + + // Make sure string is still long enough + subl(cnt1, tmp); + cmpl(cnt1, cnt2); + jccb(ConditionFlag.GreaterEqual, foundSubstr); + // Left less then substring. + + bind(retNotFound); + movl(result, -1); + jmpb(cleanup); + + bind(foundSubstr); + // Compute start addr of substr + leaq(result, new AMD64Address(result, tmp, Scale.Times2)); + + if (intCnt2 > 0) { // Constant substring + // Repeat search for small substring (< 8 chars) + // from new point without reloading substring. + // Have to check that we don't read beyond string. + cmpl(tmp, 8 - intCnt2); + jccb(ConditionFlag.Greater, adjustStr); + // Fall through if matched whole substring. + } else { // non constant + assert intCnt2 == -1 : "should be != 0"; + + addl(tmp, cnt2); + // Found result if we matched whole substring. + cmpl(tmp, 8); + jccb(ConditionFlag.LessEqual, retFound); + + // Repeat search for small substring (<= 8 chars) + // from new point 'str1' without reloading substring. + cmpl(cnt2, 8); + // Have to check that we don't read beyond string. + jccb(ConditionFlag.LessEqual, adjustStr); + + Label checkNext = new Label(); + Label contScanSubstr = new Label(); + Label retFoundLong = new Label(); + // Compare the rest of substring (> 8 chars). + movq(str1, result); + + cmpl(tmp, cnt2); + // First 8 chars are already matched. + jccb(ConditionFlag.Equal, checkNext); + + bind(scanSubstr); + pcmpestri(vec, new AMD64Address(str1, 0), 0x0d); + // Need to reload strings pointers if not matched whole vector + jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 + + bind(checkNext); + subl(cnt2, 8); + jccb(ConditionFlag.LessEqual, retFoundLong); // Found full substring + addq(str1, 16); + addq(str2, 16); + subl(cnt1, 8); + cmpl(cnt2, 8); // Do not read beyond substring + jccb(ConditionFlag.GreaterEqual, contScanSubstr); + // Back-up strings to avoid reading beyond substring. + leaq(str2, new AMD64Address(str2, cnt2, Scale.Times2, -16)); + leaq(str1, new AMD64Address(str1, cnt2, Scale.Times2, -16)); + subl(cnt1, cnt2); + movl(cnt2, 8); + addl(cnt1, 8); + bind(contScanSubstr); + movdqu(vec, new AMD64Address(str2, 0)); + jmpb(scanSubstr); + + bind(retFoundLong); + movq(str1, new AMD64Address(rsp, wordSize)); + } // non constant + + bind(retFound); + // Compute substr offset + subq(result, str1); + shrl(result, 1); // index + + bind(cleanup); + pop(rsp); // restore SP + + } + }
src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File