< prev index next >
src/cpu/x86/vm/macroAssembler_x86.cpp
Print this page
@@ -44,10 +44,13 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1SATBCardTableModRefBS.hpp"
#include "gc/g1/heapRegion.hpp"
#endif // INCLUDE_ALL_GCS
#include "crc32c.h"
+#ifdef COMPILER2
+#include "opto/intrinsicnode.hpp"
+#endif
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#define STOP(error) stop(error)
#else
@@ -6297,51 +6300,68 @@
NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
rep_stos();
}
}
+#ifdef COMPILER2
+
// IndexOf for constant substrings with size >= 8 chars
// which don't need to be loaded through stack.
void MacroAssembler::string_indexofC8(Register str1, Register str2,
Register cnt1, Register cnt2,
int int_cnt2, Register result,
- XMMRegister vec, Register tmp) {
+ XMMRegister vec, Register tmp,
+ int ae) {
ShortBranchVerifier sbv(this);
assert(UseSSE42Intrinsics, "SSE4.2 is required");
+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
- // This method uses pcmpestri instruction with bound registers
+ // This method uses the pcmpestri instruction with bound registers
// inputs:
// xmm - substring
// rax - substring length (elements count)
// mem - scanned string
// rdx - string length (elements count)
// 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+ // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
// outputs:
// rcx - matched index in string
assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+ int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
+ int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
+ Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
+ Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
// Note, inline_string_indexOf() generates checks:
// if (substr.count > string.count) return -1;
// if (substr.count == 0) return 0;
- assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
+ assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars");
// Load substring.
+ if (ae == StrIntrinsicNode::UL) {
+ pmovzxbw(vec, Address(str2, 0));
+ } else {
movdqu(vec, Address(str2, 0));
+ }
movl(cnt2, int_cnt2);
movptr(result, str1); // string addr
- if (int_cnt2 > 8) {
+ if (int_cnt2 > stride) {
jmpb(SCAN_TO_SUBSTR);
// Reload substr for rescan, this code
// is executed only for large substrings (> 8 chars)
bind(RELOAD_SUBSTR);
+ if (ae == StrIntrinsicNode::UL) {
+ pmovzxbw(vec, Address(str2, 0));
+ } else {
movdqu(vec, Address(str2, 0));
+ }
negptr(cnt2); // Jumped here with negative cnt2, convert to positive
bind(RELOAD_STR);
// We came here after the beginning of the substring was
// matched but the rest of it was not so we need to search
@@ -6356,131 +6376,150 @@
decrementl(cnt1); // Shift to next element
cmpl(cnt1, cnt2);
jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
- addptr(result, 2);
+ addptr(result, (1<<scale1));
} // (int_cnt2 > 8)
// Scan string for start of substr in 16-byte vectors
bind(SCAN_TO_SUBSTR);
- pcmpestri(vec, Address(result, 0), 0x0d);
+ pcmpestri(vec, Address(result, 0), mode);
jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
- subl(cnt1, 8);
+ subl(cnt1, stride);
jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
cmpl(cnt1, cnt2);
jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
addptr(result, 16);
jmpb(SCAN_TO_SUBSTR);
// Found a potential substr
bind(FOUND_CANDIDATE);
// Matched whole vector if first element matched (tmp(rcx) == 0).
- if (int_cnt2 == 8) {
+ if (int_cnt2 == stride) {
jccb(Assembler::overflow, RET_FOUND); // OF == 1
} else { // int_cnt2 > 8
jccb(Assembler::overflow, FOUND_SUBSTR);
}
// After pcmpestri tmp(rcx) contains matched element index
// Compute start addr of substr
- lea(result, Address(result, tmp, Address::times_2));
+ lea(result, Address(result, tmp, scale1));
// Make sure string is still long enough
subl(cnt1, tmp);
cmpl(cnt1, cnt2);
- if (int_cnt2 == 8) {
+ if (int_cnt2 == stride) {
jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
} else { // int_cnt2 > 8
jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
}
// Left less then substring.
bind(RET_NOT_FOUND);
movl(result, -1);
jmpb(EXIT);
- if (int_cnt2 > 8) {
+ if (int_cnt2 > stride) {
// This code is optimized for the case when whole substring
// is matched if its head is matched.
bind(MATCH_SUBSTR_HEAD);
- pcmpestri(vec, Address(result, 0), 0x0d);
+ pcmpestri(vec, Address(result, 0), mode);
// Reload only string if does not match
jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
Label CONT_SCAN_SUBSTR;
// Compare the rest of substring (> 8 chars).
bind(FOUND_SUBSTR);
// First 8 chars are already matched.
negptr(cnt2);
- addptr(cnt2, 8);
+ addptr(cnt2, stride);
bind(SCAN_SUBSTR);
- subl(cnt1, 8);
- cmpl(cnt2, -8); // Do not read beyond substring
+ subl(cnt1, stride);
+ cmpl(cnt2, -stride); // Do not read beyond substring
jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
// Back-up strings to avoid reading beyond substring:
// cnt1 = cnt1 - cnt2 + 8
addl(cnt1, cnt2); // cnt2 is negative
- addl(cnt1, 8);
- movl(cnt2, 8); negptr(cnt2);
+ addl(cnt1, stride);
+ movl(cnt2, stride); negptr(cnt2);
bind(CONT_SCAN_SUBSTR);
if (int_cnt2 < (int)G) {
- movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
- pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
+ int tail_off1 = int_cnt2<<scale1;
+ int tail_off2 = int_cnt2<<scale2;
+ if (ae == StrIntrinsicNode::UL) {
+ pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2));
+ } else {
+ movdqu(vec, Address(str2, cnt2, scale2, tail_off2));
+ }
+ pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode);
} else {
// calculate index in register to avoid integer overflow (int_cnt2*2)
movl(tmp, int_cnt2);
addptr(tmp, cnt2);
- movdqu(vec, Address(str2, tmp, Address::times_2, 0));
- pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
+ if (ae == StrIntrinsicNode::UL) {
+ pmovzxbw(vec, Address(str2, tmp, scale2, 0));
+ } else {
+ movdqu(vec, Address(str2, tmp, scale2, 0));
+ }
+ pcmpestri(vec, Address(result, tmp, scale1, 0), mode);
}
// Need to reload strings pointers if not matched whole vector
jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
- addptr(cnt2, 8);
+ addptr(cnt2, stride);
jcc(Assembler::negative, SCAN_SUBSTR);
// Fall through if found full substring
} // (int_cnt2 > 8)
bind(RET_FOUND);
// Found result if we matched full small substring.
// Compute substr offset
subptr(result, str1);
+ if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
shrl(result, 1); // index
+ }
bind(EXIT);
} // string_indexofC8
// Small strings are loaded through stack if they cross page boundary.
void MacroAssembler::string_indexof(Register str1, Register str2,
Register cnt1, Register cnt2,
int int_cnt2, Register result,
- XMMRegister vec, Register tmp) {
+ XMMRegister vec, Register tmp,
+ int ae) {
ShortBranchVerifier sbv(this);
assert(UseSSE42Intrinsics, "SSE4.2 is required");
+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+
//
// int_cnt2 is length of small (< 8 chars) constant substring
// or (-1) for non constant substring in which case its length
// is in cnt2 register.
//
// Note, inline_string_indexOf() generates checks:
// if (substr.count > string.count) return -1;
// if (substr.count == 0) return 0;
//
- assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
-
- // This method uses pcmpestri instruction with bound registers
+ int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
+ assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0");
+ // This method uses the pcmpestri instruction with bound registers
// inputs:
// xmm - substring
// rax - substring length (elements count)
// mem - scanned string
// rdx - string length (elements count)
// 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+ // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
// outputs:
// rcx - matched index in string
assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+ int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
+ Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
+ Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
FOUND_CANDIDATE;
@@ -6490,27 +6529,44 @@
Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
movptr(tmp, rsp); // save old SP
if (int_cnt2 > 0) { // small (< 8 chars) constant substring
- if (int_cnt2 == 1) { // One char
+ if (int_cnt2 == (1>>scale2)) { // One byte
+ assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding");
+ load_unsigned_byte(result, Address(str2, 0));
+ movdl(vec, result); // move 32 bits
+ } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) { // Three bytes
+ // Not enough header space in 32-bit VM: 12+3 = 15.
+ movl(result, Address(str2, -1));
+ shrl(result, 8);
+ movdl(vec, result); // move 32 bits
+ } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) { // One char
load_unsigned_short(result, Address(str2, 0));
movdl(vec, result); // move 32 bits
- } else if (int_cnt2 == 2) { // Two chars
+ } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars
movdl(vec, Address(str2, 0)); // move 32 bits
- } else if (int_cnt2 == 4) { // Four chars
+ } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars
movq(vec, Address(str2, 0)); // move 64 bits
- } else { // cnt2 = { 3, 5, 6, 7 }
+ } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7})
// Array header size is 12 bytes in 32-bit VM
// + 6 bytes for 3 chars == 18 bytes,
// enough space to load vec and shift.
assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
- movdqu(vec, Address(str2, (int_cnt2*2)-16));
- psrldq(vec, 16-(int_cnt2*2));
+ if (ae == StrIntrinsicNode::UL) {
+ int tail_off = int_cnt2-8;
+ pmovzxbw(vec, Address(str2, tail_off));
+ psrldq(vec, -2*tail_off);
+ }
+ else {
+ int tail_off = int_cnt2*(1<<scale2);
+ movdqu(vec, Address(str2, tail_off-16));
+ psrldq(vec, 16-tail_off);
+ }
}
} else { // not constant substring
- cmpl(cnt2, 8);
+ cmpl(cnt2, stride);
jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
// We can read beyond string if srt+16 does not cross page boundary
// since heaps are aligned and mapped by pages.
assert(os::vm_page_size() < (int)G, "default page should be small");
@@ -6519,44 +6575,54 @@
cmpl(result, (os::vm_page_size()-16));
jccb(Assembler::belowEqual, CHECK_STR);
// Move small strings to stack to allow load 16 bytes into vec.
subptr(rsp, 16);
- int stk_offset = wordSize-2;
+ int stk_offset = wordSize-(1<<scale2);
push(cnt2);
bind(COPY_SUBSTR);
- load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
- movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) {
+ load_unsigned_byte(result, Address(str2, cnt2, scale2, -1));
+ movb(Address(rsp, cnt2, scale2, stk_offset), result);
+ } else if (ae == StrIntrinsicNode::UU) {
+ load_unsigned_short(result, Address(str2, cnt2, scale2, -2));
+ movw(Address(rsp, cnt2, scale2, stk_offset), result);
+ }
decrement(cnt2);
jccb(Assembler::notZero, COPY_SUBSTR);
pop(cnt2);
movptr(str2, rsp); // New substring address
} // non constant
bind(CHECK_STR);
- cmpl(cnt1, 8);
+ cmpl(cnt1, stride);
jccb(Assembler::aboveEqual, BIG_STRINGS);
// Check cross page boundary.
movl(result, str1); // We need only low 32 bits
andl(result, (os::vm_page_size()-1));
cmpl(result, (os::vm_page_size()-16));
jccb(Assembler::belowEqual, BIG_STRINGS);
subptr(rsp, 16);
- int stk_offset = -2;
+ int stk_offset = -(1<<scale1);
if (int_cnt2 < 0) { // not constant
push(cnt2);
stk_offset += wordSize;
}
movl(cnt2, cnt1);
bind(COPY_STR);
- load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
- movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+ if (ae == StrIntrinsicNode::LL) {
+ load_unsigned_byte(result, Address(str1, cnt2, scale1, -1));
+ movb(Address(rsp, cnt2, scale1, stk_offset), result);
+ } else {
+ load_unsigned_short(result, Address(str1, cnt2, scale1, -2));
+ movw(Address(rsp, cnt2, scale1, stk_offset), result);
+ }
decrement(cnt2);
jccb(Assembler::notZero, COPY_STR);
if (int_cnt2 < 0) { // not constant
pop(cnt2);
@@ -6564,11 +6630,15 @@
movptr(str1, rsp); // New string address
bind(BIG_STRINGS);
// Load substring.
if (int_cnt2 < 0) { // -1
+ if (ae == StrIntrinsicNode::UL) {
+ pmovzxbw(vec, Address(str2, 0));
+ } else {
movdqu(vec, Address(str2, 0));
+ }
push(cnt2); // substr count
push(str2); // substr addr
push(str1); // string addr
} else {
// Small (< 8 chars) constant substrings are loaded already.
@@ -6595,41 +6665,47 @@
// Reload substr for rescan, this code
// is executed only for large substrings (> 8 chars)
bind(RELOAD_SUBSTR);
movptr(str2, Address(rsp, 2*wordSize));
movl(cnt2, Address(rsp, 3*wordSize));
+ if (ae == StrIntrinsicNode::UL) {
+ pmovzxbw(vec, Address(str2, 0));
+ } else {
movdqu(vec, Address(str2, 0));
+ }
// We came here after the beginning of the substring was
// matched but the rest of it was not so we need to search
// again. Start from the next element after the previous match.
subptr(str1, result); // Restore counter
+ if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
shrl(str1, 1);
+ }
addl(cnt1, str1);
decrementl(cnt1); // Shift to next element
cmpl(cnt1, cnt2);
jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
- addptr(result, 2);
+ addptr(result, (1<<scale1));
} // non constant
// Scan string for start of substr in 16-byte vectors
bind(SCAN_TO_SUBSTR);
assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
- pcmpestri(vec, Address(result, 0), 0x0d);
+ pcmpestri(vec, Address(result, 0), mode);
jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
- subl(cnt1, 8);
+ subl(cnt1, stride);
jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
cmpl(cnt1, cnt2);
jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
addptr(result, 16);
bind(ADJUST_STR);
- cmpl(cnt1, 8); // Do not read beyond string
+ cmpl(cnt1, stride); // Do not read beyond string
jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
// Back-up string to avoid reading beyond string.
- lea(result, Address(result, cnt1, Address::times_2, -16));
- movl(cnt1, 8);
+ lea(result, Address(result, cnt1, scale1, -16));
+ movl(cnt1, stride);
jmpb(SCAN_TO_SUBSTR);
// Found a potential substr
bind(FOUND_CANDIDATE);
// After pcmpestri tmp(rcx) contains matched element index
@@ -6644,30 +6720,29 @@
movl(result, -1);
jmpb(CLEANUP);
bind(FOUND_SUBSTR);
// Compute start addr of substr
- lea(result, Address(result, tmp, Address::times_2));
-
+ lea(result, Address(result, tmp, scale1));
if (int_cnt2 > 0) { // Constant substring
// Repeat search for small substring (< 8 chars)
// from new point without reloading substring.
// Have to check that we don't read beyond string.
- cmpl(tmp, 8-int_cnt2);
+ cmpl(tmp, stride-int_cnt2);
jccb(Assembler::greater, ADJUST_STR);
// Fall through if matched whole substring.
} else { // non constant
assert(int_cnt2 == -1, "should be != 0");
addl(tmp, cnt2);
// Found result if we matched whole substring.
- cmpl(tmp, 8);
+ cmpl(tmp, stride);
jccb(Assembler::lessEqual, RET_FOUND);
// Repeat search for small substring (<= 8 chars)
// from new point 'str1' without reloading substring.
- cmpl(cnt2, 8);
+ cmpl(cnt2, stride);
// Have to check that we don't read beyond string.
jccb(Assembler::lessEqual, ADJUST_STR);
Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
// Compare the rest of substring (> 8 chars).
@@ -6676,53 +6751,190 @@
cmpl(tmp, cnt2);
// First 8 chars are already matched.
jccb(Assembler::equal, CHECK_NEXT);
bind(SCAN_SUBSTR);
- pcmpestri(vec, Address(str1, 0), 0x0d);
+ pcmpestri(vec, Address(str1, 0), mode);
// Need to reload strings pointers if not matched whole vector
jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
bind(CHECK_NEXT);
- subl(cnt2, 8);
+ subl(cnt2, stride);
jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
addptr(str1, 16);
+ if (ae == StrIntrinsicNode::UL) {
+ addptr(str2, 8);
+ } else {
addptr(str2, 16);
- subl(cnt1, 8);
- cmpl(cnt2, 8); // Do not read beyond substring
+ }
+ subl(cnt1, stride);
+ cmpl(cnt2, stride); // Do not read beyond substring
jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
// Back-up strings to avoid reading beyond substring.
- lea(str2, Address(str2, cnt2, Address::times_2, -16));
- lea(str1, Address(str1, cnt2, Address::times_2, -16));
+
+ if (ae == StrIntrinsicNode::UL) {
+ lea(str2, Address(str2, cnt2, scale2, -8));
+ lea(str1, Address(str1, cnt2, scale1, -16));
+ } else {
+ lea(str2, Address(str2, cnt2, scale2, -16));
+ lea(str1, Address(str1, cnt2, scale1, -16));
+ }
subl(cnt1, cnt2);
- movl(cnt2, 8);
- addl(cnt1, 8);
+ movl(cnt2, stride);
+ addl(cnt1, stride);
bind(CONT_SCAN_SUBSTR);
+ if (ae == StrIntrinsicNode::UL) {
+ pmovzxbw(vec, Address(str2, 0));
+ } else {
movdqu(vec, Address(str2, 0));
+ }
jmpb(SCAN_SUBSTR);
bind(RET_FOUND_LONG);
movptr(str1, Address(rsp, wordSize));
} // non constant
bind(RET_FOUND);
// Compute substr offset
subptr(result, str1);
+ if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
shrl(result, 1); // index
-
+ }
bind(CLEANUP);
pop(rsp); // restore SP
} // string_indexof
-// Compare strings.
+void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
+ ShortBranchVerifier sbv(this);
+ assert(UseSSE42Intrinsics, "SSE4.2 is required");
+
+ int stride = 8;
+
+ Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
+ SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
+ RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
+ FOUND_SEQ_CHAR, DONE_LABEL;
+
+ movptr(result, str1);
+ if (UseAVX >= 2) {
+ cmpl(cnt1, stride);
+ jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
+ cmpl(cnt1, 2*stride);
+ jccb(Assembler::less, SCAN_TO_8_CHAR_INIT);
+ movdl(vec1, ch);
+ vpbroadcastw(vec1, vec1);
+ vpxor(vec2, vec2);
+ movl(tmp, cnt1);
+ andl(tmp, 0xFFFFFFF0); //vector count (in chars)
+ andl(cnt1,0x0000000F); //tail count (in chars)
+
+ bind(SCAN_TO_16_CHAR_LOOP);
+ vmovdqu(vec3, Address(result, 0));
+ vpcmpeqw(vec3, vec3, vec1, true);
+ vptest(vec2, vec3);
+ jcc(Assembler::carryClear, FOUND_CHAR);
+ addptr(result, 32);
+ subl(tmp, 2*stride);
+ jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
+ jmp(SCAN_TO_8_CHAR);
+ bind(SCAN_TO_8_CHAR_INIT);
+ movdl(vec1, ch);
+ pshuflw(vec1, vec1, 0x00);
+ pshufd(vec1, vec1, 0);
+ pxor(vec2, vec2);
+ }
+ if (UseAVX >= 2 || UseSSE42Intrinsics) {
+ bind(SCAN_TO_8_CHAR);
+ cmpl(cnt1, stride);
+ if (UseAVX >= 2) {
+ jccb(Assembler::less, SCAN_TO_CHAR);
+ }
+ if (!(UseAVX >= 2)) {
+ jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
+ movdl(vec1, ch);
+ pshuflw(vec1, vec1, 0x00);
+ pshufd(vec1, vec1, 0);
+ pxor(vec2, vec2);
+ }
+ movl(tmp, cnt1);
+ andl(tmp, 0xFFFFFFF8); //vector count (in chars)
+ andl(cnt1,0x00000007); //tail count (in chars)
+
+ bind(SCAN_TO_8_CHAR_LOOP);
+ movdqu(vec3, Address(result, 0));
+ pcmpeqw(vec3, vec1);
+ ptest(vec2, vec3);
+ jcc(Assembler::carryClear, FOUND_CHAR);
+ addptr(result, 16);
+ subl(tmp, stride);
+ jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
+ }
+ bind(SCAN_TO_CHAR);
+ testl(cnt1, cnt1);
+ jcc(Assembler::zero, RET_NOT_FOUND);
+
+ bind(SCAN_TO_CHAR_LOOP);
+ load_unsigned_short(tmp, Address(result, 0));
+ cmpl(ch, tmp);
+ jccb(Assembler::equal, FOUND_SEQ_CHAR);
+ addptr(result, 2);
+ subl(cnt1, 1);
+ jccb(Assembler::zero, RET_NOT_FOUND);
+ jmp(SCAN_TO_CHAR_LOOP);
+
+ bind(RET_NOT_FOUND);
+ movl(result, -1);
+ jmpb(DONE_LABEL);
+
+ if (UseAVX >= 2 || UseSSE42Intrinsics) {
+ bind(FOUND_CHAR);
+ if (UseAVX >= 2) {
+ vpmovmskb(tmp, vec3);
+ } else {
+ pmovmskb(tmp, vec3);
+ }
+ bsfl(ch, tmp);
+ addl(result, ch);
+ }
+
+ bind(FOUND_SEQ_CHAR);
+ subptr(result, str1);
+ shrl(result, 1);
+
+ bind(DONE_LABEL);
+} // string_indexof_char
+
+// helper function for string_compare
+void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
+ Address::ScaleFactor scale, Address::ScaleFactor scale1,
+ Address::ScaleFactor scale2, Register index, int ae) {
+ if (ae == StrIntrinsicNode::LL) {
+ load_unsigned_byte(elem1, Address(str1, index, scale, 0));
+ load_unsigned_byte(elem2, Address(str2, index, scale, 0));
+ } else if (ae == StrIntrinsicNode::UU) {
+ load_unsigned_short(elem1, Address(str1, index, scale, 0));
+ load_unsigned_short(elem2, Address(str2, index, scale, 0));
+ } else {
+ load_unsigned_byte(elem1, Address(str1, index, scale1, 0));
+ load_unsigned_short(elem2, Address(str2, index, scale2, 0));
+ }
+}
+
+// Compare strings, used for char[] and byte[].
void MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
- XMMRegister vec1) {
+ XMMRegister vec1, int ae) {
ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
+ int stride, stride2, adr_stride, adr_stride1, adr_stride2;
+ Address::ScaleFactor scale, scale1, scale2;
+ if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+ shrl(cnt2, 1);
+ }
// Compute the minimum of the string lengths and the
// difference of the string lengths (stack).
// Do the conditional move stuff
movl(result, cnt1);
subl(cnt1, cnt2);
@@ -6730,74 +6942,127 @@
cmov32(Assembler::lessEqual, cnt2, result);
// Is the minimum length zero?
testl(cnt2, cnt2);
jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
- // Compare first characters
+ if (ae == StrIntrinsicNode::LL) {
+ // Load first bytes
+ load_unsigned_byte(result, Address(str1, 0));
+ load_unsigned_byte(cnt1, Address(str2, 0));
+ } else if (ae == StrIntrinsicNode::UU) {
+ // Load first characters
load_unsigned_short(result, Address(str1, 0));
load_unsigned_short(cnt1, Address(str2, 0));
+ } else {
+ load_unsigned_byte(result, Address(str1, 0));
+ load_unsigned_short(cnt1, Address(str2, 0));
+ }
subl(result, cnt1);
jcc(Assembler::notZero, POP_LABEL);
+
+ if (ae == StrIntrinsicNode::UU) {
+ // Divide length by 2 to get number of chars
+ shrl(cnt2, 1);
+ }
cmpl(cnt2, 1);
jcc(Assembler::equal, LENGTH_DIFF_LABEL);
- // Check if the strings start at the same location.
+ // Check if the strings start at the same location and setup scale and stride
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
cmpptr(str1, str2);
jcc(Assembler::equal, LENGTH_DIFF_LABEL);
-
- Address::ScaleFactor scale = Address::times_2;
- int stride = 8;
+ if (ae == StrIntrinsicNode::LL) {
+ scale = Address::times_1;
+ stride = 16;
+ } else {
+ scale = Address::times_2;
+ stride = 8;
+ }
+ } else {
+ scale1 = Address::times_1;
+ scale2 = Address::times_2;
+ stride = 8;
+ }
if (UseAVX >= 2 && UseSSE42Intrinsics) {
Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
Label COMPARE_TAIL_LONG;
int pcmpmask = 0x19;
+ if (ae == StrIntrinsicNode::LL) {
+ pcmpmask &= ~0x01;
+ }
// Setup to compare 16-chars (32-bytes) vectors,
// start from first character again because it has aligned address.
- int stride2 = 16;
- int adr_stride = stride << scale;
+ if (ae == StrIntrinsicNode::LL) {
+ stride2 = 32;
+ } else {
+ stride2 = 16;
+ }
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ adr_stride = stride << scale;
+ } else {
+ adr_stride1 = 8; //stride << scale1;
+ adr_stride2 = 16; //stride << scale2;
+ }
assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
// rax and rdx are used by pcmpestri as elements counters
movl(result, cnt2);
andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count
jcc(Assembler::zero, COMPARE_TAIL_LONG);
// fast path : compare first 2 8-char vectors.
bind(COMPARE_16_CHARS);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
movdqu(vec1, Address(str1, 0));
+ } else {
+ pmovzxbw(vec1, Address(str1, 0));
+ }
pcmpestri(vec1, Address(str2, 0), pcmpmask);
jccb(Assembler::below, COMPARE_INDEX_CHAR);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
movdqu(vec1, Address(str1, adr_stride));
pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
+ } else {
+ pmovzxbw(vec1, Address(str1, adr_stride1));
+ pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask);
+ }
jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
addl(cnt1, stride);
// Compare the characters at index in cnt1
- bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character
- load_unsigned_short(result, Address(str1, cnt1, scale));
- load_unsigned_short(cnt2, Address(str2, cnt1, scale));
+ bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character
+ load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
subl(result, cnt2);
jmp(POP_LABEL);
// Setup the registers to start vector comparison loop
bind(COMPARE_WIDE_VECTORS);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale));
+ } else {
+ lea(str1, Address(str1, result, scale1));
+ lea(str2, Address(str2, result, scale2));
+ }
subl(result, stride2);
subl(cnt2, stride2);
jccb(Assembler::zero, COMPARE_WIDE_TAIL);
negptr(result);
// In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
bind(COMPARE_WIDE_VECTORS_LOOP);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
vmovdqu(vec1, Address(str1, result, scale));
vpxor(vec1, Address(str2, result, scale));
+ } else {
+ vpmovzxbw(vec1, Address(str1, result, scale1));
+ vpxor(vec1, Address(str2, result, scale2));
+ }
vptest(vec1, vec1);
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(result, stride2);
subl(cnt2, stride2);
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
@@ -6816,27 +7081,41 @@
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind(VECTOR_NOT_EQUAL);
// clean upper bits of YMM registers
vpxor(vec1, vec1);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale));
+ } else {
+ lea(str1, Address(str1, result, scale1));
+ lea(str2, Address(str2, result, scale2));
+ }
jmp(COMPARE_16_CHARS);
// Compare tail chars, length between 1 to 15 chars
bind(COMPARE_TAIL_LONG);
movl(cnt2, result);
cmpl(cnt2, stride);
jccb(Assembler::less, COMPARE_SMALL_STR);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
movdqu(vec1, Address(str1, 0));
+ } else {
+ pmovzxbw(vec1, Address(str1, 0));
+ }
pcmpestri(vec1, Address(str2, 0), pcmpmask);
jcc(Assembler::below, COMPARE_INDEX_CHAR);
subptr(cnt2, stride);
jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale));
+ } else {
+ lea(str1, Address(str1, result, scale1));
+ lea(str2, Address(str2, result, scale2));
+ }
negptr(cnt2);
jmpb(WHILE_HEAD_LABEL);
bind(COMPARE_SMALL_STR);
} else if (UseSSE42Intrinsics) {
@@ -6844,14 +7123,21 @@
int pcmpmask = 0x19;
// Setup to compare 8-char (16-byte) vectors,
// start from first character again because it has aligned address.
movl(result, cnt2);
andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
+ if (ae == StrIntrinsicNode::LL) {
+ pcmpmask &= ~0x01;
+ }
jccb(Assembler::zero, COMPARE_TAIL);
-
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale));
+ } else {
+ lea(str1, Address(str1, result, scale1));
+ lea(str2, Address(str2, result, scale2));
+ }
negptr(result);
// pcmpestri
// inputs:
// vec1- substring
@@ -6863,12 +7149,17 @@
// outputs:
// rcx - first mismatched element index
assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
bind(COMPARE_WIDE_VECTORS);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
movdqu(vec1, Address(str1, result, scale));
pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+ } else {
+ pmovzxbw(vec1, Address(str1, result, scale1));
+ pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
+ }
// After pcmpestri cnt1(rcx) contains mismatched element index
jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
addptr(result, stride);
subptr(cnt2, stride);
@@ -6879,69 +7170,222 @@
jccb(Assembler::zero, LENGTH_DIFF_LABEL);
movl(cnt2, stride);
movl(result, stride);
negptr(result);
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
movdqu(vec1, Address(str1, result, scale));
pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+ } else {
+ pmovzxbw(vec1, Address(str1, result, scale1));
+ pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
+ }
jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
// Mismatched characters in the vectors
bind(VECTOR_NOT_EQUAL);
addptr(cnt1, result);
- load_unsigned_short(result, Address(str1, cnt1, scale));
- load_unsigned_short(cnt2, Address(str2, cnt1, scale));
+ load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
subl(result, cnt2);
jmpb(POP_LABEL);
bind(COMPARE_TAIL); // limit is zero
movl(cnt2, result);
// Fallthru to tail compare
}
// Shift str2 and str1 to the end of the arrays, negate min
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
lea(str1, Address(str1, cnt2, scale));
lea(str2, Address(str2, cnt2, scale));
+ } else {
+ lea(str1, Address(str1, cnt2, scale1));
+ lea(str2, Address(str2, cnt2, scale2));
+ }
decrementl(cnt2); // first character was compared already
negptr(cnt2);
// Compare the rest of the elements
bind(WHILE_HEAD_LABEL);
- load_unsigned_short(result, Address(str1, cnt2, scale, 0));
- load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
+ load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae);
subl(result, cnt1);
jccb(Assembler::notZero, POP_LABEL);
increment(cnt2);
jccb(Assembler::notZero, WHILE_HEAD_LABEL);
// Strings are equal up to min length. Return the length difference.
bind(LENGTH_DIFF_LABEL);
pop(result);
+ if (ae == StrIntrinsicNode::UU) {
+ // Divide diff by 2 to get number of chars
+ sarl(result, 1);
+ }
jmpb(DONE_LABEL);
// Discard the stored length difference
bind(POP_LABEL);
pop(cnt1);
// That's it
bind(DONE_LABEL);
+ if(ae == StrIntrinsicNode::UL) {
+ negl(result);
+ }
}
-// Compare char[] arrays aligned to 4 bytes or substrings.
-void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
- Register limit, Register result, Register chr,
+// Search for Non-ASCII character (Negative byte value) in a byte array,
+// return true if it has any and false otherwise.
+void MacroAssembler::has_negatives(Register ary1, Register len,
+ Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2) {
+
+ // rsi: byte array
+ // rcx: len
+ // rax: result
+ ShortBranchVerifier sbv(this);
+ assert_different_registers(ary1, len, result, tmp1);
+ assert_different_registers(vec1, vec2);
+ Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
+
+ // len == 0
+ testl(len, len);
+ jcc(Assembler::zero, FALSE_LABEL);
+
+ movl(result, len); // copy
+
+ if (UseAVX >= 2) {
+ // With AVX2, use 32-byte vector compare
+ Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+ // Compare 32-byte vectors
+ andl(result, 0x0000001f); // tail count (in bytes)
+ andl(len, 0xffffffe0); // vector count (in bytes)
+ jccb(Assembler::zero, COMPARE_TAIL);
+
+ lea(ary1, Address(ary1, len, Address::times_1));
+ negptr(len);
+
+ movl(tmp1, 0x80808080); // create mask to test for Unicode chars in vector
+ movdl(vec2, tmp1);
+ vpbroadcastd(vec2, vec2);
+
+ bind(COMPARE_WIDE_VECTORS);
+ vmovdqu(vec1, Address(ary1, len, Address::times_1));
+ vptest(vec1, vec2);
+ jccb(Assembler::notZero, TRUE_LABEL);
+ addptr(len, 32);
+ jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+ testl(result, result);
+ jccb(Assembler::zero, FALSE_LABEL);
+
+ vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
+ vptest(vec1, vec2);
+ jccb(Assembler::notZero, TRUE_LABEL);
+ jmpb(FALSE_LABEL);
+
+ bind(COMPARE_TAIL); // len is zero
+ movl(len, result);
+ // Fallthru to tail compare
+ } else if (UseSSE42Intrinsics) {
+ // With SSE4.2, use double quad vector compare
+ Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+ // Compare 16-byte vectors
+ andl(result, 0x0000000f); // tail count (in bytes)
+ andl(len, 0xfffffff0); // vector count (in bytes)
+ jccb(Assembler::zero, COMPARE_TAIL);
+
+ lea(ary1, Address(ary1, len, Address::times_1));
+ negptr(len);
+
+ movl(tmp1, 0x80808080);
+ movdl(vec2, tmp1);
+ pshufd(vec2, vec2, 0);
+
+ bind(COMPARE_WIDE_VECTORS);
+ movdqu(vec1, Address(ary1, len, Address::times_1));
+ ptest(vec1, vec2);
+ jccb(Assembler::notZero, TRUE_LABEL);
+ addptr(len, 16);
+ jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+ testl(result, result);
+ jccb(Assembler::zero, FALSE_LABEL);
+
+ movdqu(vec1, Address(ary1, result, Address::times_1, -16));
+ ptest(vec1, vec2);
+ jccb(Assembler::notZero, TRUE_LABEL);
+ jmpb(FALSE_LABEL);
+
+ bind(COMPARE_TAIL); // len is zero
+ movl(len, result);
+ // Fallthru to tail compare
+ }
+
+ // Compare 4-byte vectors
+ andl(len, 0xfffffffc); // vector count (in bytes)
+ jccb(Assembler::zero, COMPARE_CHAR);
+
+ lea(ary1, Address(ary1, len, Address::times_1));
+ negptr(len);
+
+ bind(COMPARE_VECTORS);
+ movl(tmp1, Address(ary1, len, Address::times_1));
+ andl(tmp1, 0x80808080);
+ jccb(Assembler::notZero, TRUE_LABEL);
+ addptr(len, 4);
+ jcc(Assembler::notZero, COMPARE_VECTORS);
+
+ // Compare trailing char (final 2 bytes), if any
+ bind(COMPARE_CHAR);
+ testl(result, 0x2); // tail char
+ jccb(Assembler::zero, COMPARE_BYTE);
+ load_unsigned_short(tmp1, Address(ary1, 0));
+ andl(tmp1, 0x00008080);
+ jccb(Assembler::notZero, TRUE_LABEL);
+ subptr(result, 2);
+ lea(ary1, Address(ary1, 2));
+
+ bind(COMPARE_BYTE);
+ testl(result, 0x1); // tail byte
+ jccb(Assembler::zero, FALSE_LABEL);
+ load_unsigned_byte(tmp1, Address(ary1, 0));
+ andl(tmp1, 0x00000080);
+ jccb(Assembler::notEqual, TRUE_LABEL);
+ jmpb(FALSE_LABEL);
+
+ bind(TRUE_LABEL);
+ movl(result, 1); // return true
+ jmpb(DONE);
+
+ bind(FALSE_LABEL);
+ xorl(result, result); // return false
+
+ // That's it
+ bind(DONE);
+ if (UseAVX >= 2) {
+ // clean upper bits of YMM registers
+ vpxor(vec1, vec1);
+ vpxor(vec2, vec2);
+ }
+}
+
+// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
+void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+ Register limit, Register result, Register chr,
+ XMMRegister vec1, XMMRegister vec2, bool is_char) {
ShortBranchVerifier sbv(this);
- Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
+ Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
int length_offset = arrayOopDesc::length_offset_in_bytes();
- int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+ int base_offset = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
+ if (is_array_equ) {
// Check the input args
cmpptr(ary1, ary2);
jcc(Assembler::equal, TRUE_LABEL);
- if (is_array_equ) {
// Need additional checks for arrays_equals.
testptr(ary1, ary1);
jcc(Assembler::zero, FALSE_LABEL);
testptr(ary2, ary2);
jcc(Assembler::zero, FALSE_LABEL);
@@ -6960,19 +7404,22 @@
// Load array address
lea(ary1, Address(ary1, base_offset));
lea(ary2, Address(ary2, base_offset));
}
+ if (is_array_equ && is_char) {
+ // arrays_equals when used for char[].
shll(limit, 1); // byte count != 0
+ }
movl(result, limit); // copy
if (UseAVX >= 2) {
// With AVX2, use 32-byte vector compare
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
// Compare 32-byte vectors
- andl(result, 0x0000001e); // tail count (in bytes)
+ andl(result, 0x0000001f); // tail count (in bytes)
andl(limit, 0xffffffe0); // vector count (in bytes)
jccb(Assembler::zero, COMPARE_TAIL);
lea(ary1, Address(ary1, limit, Address::times_1));
lea(ary2, Address(ary2, limit, Address::times_1));
@@ -7005,11 +7452,11 @@
} else if (UseSSE42Intrinsics) {
// With SSE4.2, use double quad vector compare
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
// Compare 16-byte vectors
- andl(result, 0x0000000e); // tail count (in bytes)
+ andl(result, 0x0000000f); // tail count (in bytes)
andl(limit, 0xfffffff0); // vector count (in bytes)
jccb(Assembler::zero, COMPARE_TAIL);
lea(ary1, Address(ary1, limit, Address::times_1));
lea(ary2, Address(ary2, limit, Address::times_1));
@@ -7057,16 +7504,30 @@
jcc(Assembler::notZero, COMPARE_VECTORS);
// Compare trailing char (final 2 bytes), if any
bind(COMPARE_CHAR);
testl(result, 0x2); // tail char
- jccb(Assembler::zero, TRUE_LABEL);
+ jccb(Assembler::zero, COMPARE_BYTE);
load_unsigned_short(chr, Address(ary1, 0));
load_unsigned_short(limit, Address(ary2, 0));
cmpl(chr, limit);
jccb(Assembler::notEqual, FALSE_LABEL);
+ if (is_array_equ && is_char) {
+ bind(COMPARE_BYTE);
+ } else {
+ lea(ary1, Address(ary1, 2));
+ lea(ary2, Address(ary2, 2));
+
+ bind(COMPARE_BYTE);
+ testl(result, 0x1); // tail byte
+ jccb(Assembler::zero, TRUE_LABEL);
+ load_unsigned_byte(chr, Address(ary1, 0));
+ load_unsigned_byte(limit, Address(ary2, 0));
+ cmpl(chr, limit);
+ jccb(Assembler::notEqual, FALSE_LABEL);
+ }
bind(TRUE_LABEL);
movl(result, 1); // return true
jmpb(DONE);
bind(FALSE_LABEL);
@@ -7079,10 +7540,12 @@
vpxor(vec1, vec1);
vpxor(vec2, vec2);
}
}
+#endif
+
void MacroAssembler::generate_fill(BasicType t, bool aligned,
Register to, Register value, Register count,
Register rtmp, XMMRegister xtmp) {
ShortBranchVerifier sbv(this);
assert_different_registers(to, value, count, rtmp);
@@ -9083,10 +9546,183 @@
#endif // LP64
#undef BIND
#undef BLOCK_COMMENT
+// Compress char[] array to byte[].
+void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
+ XMMRegister tmp1Reg, XMMRegister tmp2Reg,
+ XMMRegister tmp3Reg, XMMRegister tmp4Reg,
+ Register tmp5, Register result) {
+ Label copy_chars_loop, return_length, return_zero, done;
+
+ // rsi: src
+ // rdi: dst
+ // rdx: len
+ // rcx: tmp5
+ // rax: result
+
+ // rsi holds start addr of source char[] to be compressed
+ // rdi holds start addr of destination byte[]
+ // rdx holds length
+
+ assert(len != result, "");
+
+ // save length for return
+ push(len);
+
+ if (UseSSE42Intrinsics) {
+ Label copy_32_loop, copy_16, copy_tail;
+
+ movl(result, len);
+ movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
+
+ // vectored compression
+ andl(len, 0xfffffff0); // vector count (in chars)
+ andl(result, 0x0000000f); // tail count (in chars)
+ testl(len, len);
+ jccb(Assembler::zero, copy_16);
+
+ // compress 16 chars per iter
+ movdl(tmp1Reg, tmp5);
+ pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
+ pxor(tmp4Reg, tmp4Reg);
+
+ lea(src, Address(src, len, Address::times_2));
+ lea(dst, Address(dst, len, Address::times_1));
+ negptr(len);
+
+ bind(copy_32_loop);
+ movdqu(tmp2Reg, Address(src, len, Address::times_2)); // load 1st 8 characters
+ por(tmp4Reg, tmp2Reg);
+ movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
+ por(tmp4Reg, tmp3Reg);
+ ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
+ jcc(Assembler::notZero, return_zero);
+ packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
+ movdqu(Address(dst, len, Address::times_1), tmp2Reg);
+ addptr(len, 16);
+ jcc(Assembler::notZero, copy_32_loop);
+
+ // compress next vector of 8 chars (if any)
+ bind(copy_16);
+ movl(len, result);
+ andl(len, 0xfffffff8); // vector count (in chars)
+ andl(result, 0x00000007); // tail count (in chars)
+ testl(len, len);
+ jccb(Assembler::zero, copy_tail);
+
+ movdl(tmp1Reg, tmp5);
+ pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
+ pxor(tmp3Reg, tmp3Reg);
+
+ movdqu(tmp2Reg, Address(src, 0));
+ ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
+ jccb(Assembler::notZero, return_zero);
+ packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
+ movq(Address(dst, 0), tmp2Reg);
+ addptr(src, 16);
+ addptr(dst, 8);
+
+ bind(copy_tail);
+ movl(len, result);
+ }
+ // compress 1 char per iter
+ testl(len, len);
+ jccb(Assembler::zero, return_length);
+ lea(src, Address(src, len, Address::times_2));
+ lea(dst, Address(dst, len, Address::times_1));
+ negptr(len);
+
+ bind(copy_chars_loop);
+ load_unsigned_short(result, Address(src, len, Address::times_2));
+ testl(result, 0xff00); // check if Unicode char
+ jccb(Assembler::notZero, return_zero);
+ movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte
+ increment(len);
+ jcc(Assembler::notZero, copy_chars_loop);
+
+ // if compression succeeded, return length
+ bind(return_length);
+ pop(result);
+ jmpb(done);
+
+ // if compression failed, return 0
+ bind(return_zero);
+ xorl(result, result);
+ addptr(rsp, wordSize);
+
+ bind(done);
+}
+
+// Inflate byte[] array to char[].
+void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
+ XMMRegister tmp1, Register tmp2) {
+ Label copy_chars_loop, done;
+
+ // rsi: src
+ // rdi: dst
+ // rdx: len
+ // rcx: tmp2
+
+ // rsi holds start addr of source byte[] to be inflated
+ // rdi holds start addr of destination char[]
+ // rdx holds length
+ assert_different_registers(src, dst, len, tmp2);
+
+ if (UseSSE42Intrinsics) {
+ Label copy_8_loop, copy_bytes, copy_tail;
+
+ movl(tmp2, len);
+ andl(tmp2, 0x00000007); // tail count (in chars)
+ andl(len, 0xfffffff8); // vector count (in chars)
+ jccb(Assembler::zero, copy_tail);
+
+ // vectored inflation
+ lea(src, Address(src, len, Address::times_1));
+ lea(dst, Address(dst, len, Address::times_2));
+ negptr(len);
+
+ // inflate 8 chars per iter
+ bind(copy_8_loop);
+ pmovzxbw(tmp1, Address(src, len, Address::times_1)); // unpack to 8 words
+ movdqu(Address(dst, len, Address::times_2), tmp1);
+ addptr(len, 8);
+ jcc(Assembler::notZero, copy_8_loop);
+
+ bind(copy_tail);
+ movl(len, tmp2);
+
+ cmpl(len, 4);
+ jccb(Assembler::less, copy_bytes);
+
+ movdl(tmp1, Address(src, 0)); // load 4 byte chars
+ pmovzxbw(tmp1, tmp1);
+ movq(Address(dst, 0), tmp1);
+ subptr(len, 4);
+ addptr(src, 4);
+ addptr(dst, 8);
+
+ bind(copy_bytes);
+ }
+ testl(len, len);
+ jccb(Assembler::zero, done);
+ lea(src, Address(src, len, Address::times_1));
+ lea(dst, Address(dst, len, Address::times_2));
+ negptr(len);
+
+ // inflate 1 char per iter
+ bind(copy_chars_loop);
+ load_unsigned_byte(tmp2, Address(src, len, Address::times_1)); // load byte char
+ movw(Address(dst, len, Address::times_2), tmp2); // inflate byte char to word
+ increment(len);
+ jcc(Assembler::notZero, copy_chars_loop);
+
+ bind(done);
+}
+
+
Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
switch (cond) {
// Note some conditions are synonyms for others
case Assembler::zero: return Assembler::notZero;
case Assembler::notZero: return Assembler::zero;
< prev index next >