src/cpu/x86/vm/macroAssembler_x86.cpp
Index
Unified diffs
Context diffs
Sdiffs
Patch
New
Old
Previous File
Next File
*** old/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Nov 20 16:02:53 2015
--- new/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Nov 20 16:02:53 2015
*** 6973,6983 ****
--- 6973,6984 ----
Register cnt1, Register cnt2,
int int_cnt2, Register result,
XMMRegister vec, Register tmp,
int ae) {
ShortBranchVerifier sbv(this);
! assert(UseSSE42Intrinsics, "SSE4.2 is required");
! assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
// This method uses the pcmpestri instruction with bound registers
// inputs:
// xmm - substring
*** 7151,7161 ****
--- 7152,7163 ----
Register cnt1, Register cnt2,
int int_cnt2, Register result,
XMMRegister vec, Register tmp,
int ae) {
ShortBranchVerifier sbv(this);
! assert(UseSSE42Intrinsics, "SSE4.2 is required");
! assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
//
// int_cnt2 is length of small (< 8 chars) constant substring
// or (-1) for non constant substring in which case its length
*** 7468,7478 ****
--- 7470,7481 ----
} // string_indexof
void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
ShortBranchVerifier sbv(this);
! assert(UseSSE42Intrinsics, "SSE4.2 is required");
! assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
int stride = 8;
Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
*** 7505,7521 ****
--- 7508,7522 ----
movdl(vec1, ch);
pshuflw(vec1, vec1, 0x00);
pshufd(vec1, vec1, 0);
pxor(vec2, vec2);
}
if (UseAVX >= 2 || UseSSE42Intrinsics) {
bind(SCAN_TO_8_CHAR);
cmpl(cnt1, stride);
if (UseAVX >= 2) {
jccb(Assembler::less, SCAN_TO_CHAR);
}
if (!(UseAVX >= 2)) {
+ } else {
jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
movdl(vec1, ch);
pshuflw(vec1, vec1, 0x00);
pshufd(vec1, vec1, 0);
pxor(vec2, vec2);
*** 7530,7544 ****
--- 7531,7543 ----
ptest(vec2, vec3);
jcc(Assembler::carryClear, FOUND_CHAR);
addptr(result, 16);
subl(tmp, stride);
jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
}
bind(SCAN_TO_CHAR);
testl(cnt1, cnt1);
jcc(Assembler::zero, RET_NOT_FOUND);
bind(SCAN_TO_CHAR_LOOP);
load_unsigned_short(tmp, Address(result, 0));
cmpl(ch, tmp);
jccb(Assembler::equal, FOUND_SEQ_CHAR);
addptr(result, 2);
*** 7548,7567 ****
--- 7547,7564 ----
bind(RET_NOT_FOUND);
movl(result, -1);
jmpb(DONE_LABEL);
if (UseAVX >= 2 || UseSSE42Intrinsics) {
bind(FOUND_CHAR);
if (UseAVX >= 2) {
vpmovmskb(tmp, vec3);
} else {
pmovmskb(tmp, vec3);
}
bsfl(ch, tmp);
addl(result, ch);
}
bind(FOUND_SEQ_CHAR);
subptr(result, str1);
shrl(result, 1);
*** 7646,7655 ****
--- 7643,7653 ----
scale2 = Address::times_2;
stride = 8;
}
if (UseAVX >= 2 && UseSSE42Intrinsics) {
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
Label COMPARE_TAIL_LONG;
int pcmpmask = 0x19;
if (ae == StrIntrinsicNode::LL) {
*** 7781,7790 ****
--- 7779,7789 ----
negptr(cnt2);
jmpb(WHILE_HEAD_LABEL);
bind(COMPARE_SMALL_STR);
} else if (UseSSE42Intrinsics) {
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
int pcmpmask = 0x19;
// Setup to compare 8-char (16-byte) vectors,
// start from first character again because it has aligned address.
movl(result, cnt2);
*** 7913,7923 ****
--- 7912,7923 ----
testl(len, len);
jcc(Assembler::zero, FALSE_LABEL);
movl(result, len); // copy
! if (UseAVX >= 2 && UseSSE42Intrinsics) {
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
// With AVX2, use 32-byte vector compare
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
// Compare 32-byte vectors
andl(result, 0x0000001f); // tail count (in bytes)
*** 7948,7957 ****
--- 7948,7958 ----
bind(COMPARE_TAIL); // len is zero
movl(len, result);
// Fallthru to tail compare
} else if (UseSSE42Intrinsics) {
+ assert(UseSSE >= 4, "SSE4 must be for SSE4.2 intrinsics to be available");
// With SSE4.2, use double quad vector compare
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
// Compare 16-byte vectors
andl(result, 0x0000000f); // tail count (in bytes)
*** 8024,8034 ****
--- 8025,8035 ----
bind(FALSE_LABEL);
xorl(result, result); // return false
// That's it
bind(DONE);
! if (UseAVX >= 2 && UseSSE42Intrinsics) {
// clean upper bits of YMM registers
vpxor(vec1, vec1);
vpxor(vec2, vec2);
}
}
*** 8112,8121 ****
--- 8113,8123 ----
bind(COMPARE_TAIL); // limit is zero
movl(limit, result);
// Fallthru to tail compare
} else if (UseSSE42Intrinsics) {
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
// With SSE4.2, use double quad vector compare
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
// Compare 16-byte vectors
andl(result, 0x0000000f); // tail count (in bytes)
*** 8461,8470 ****
--- 8463,8473 ----
lea(src, Address(src, len, Address::times_2)); // char[]
lea(dst, Address(dst, len, Address::times_1)); // byte[]
negptr(len);
if (UseSSE42Intrinsics || UseAVX >= 2) {
+ assert(UseSSE42Intrinsics ? UseSSE >= 4 : true, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit;
Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
if (UseAVX >= 2) {
Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
*** 10233,10242 ****
--- 10236,10246 ----
// save length for return
push(len);
if (UseSSE42Intrinsics) {
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
Label copy_32_loop, copy_16, copy_tail;
movl(result, len);
movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
*** 10332,10341 ****
--- 10336,10346 ----
// rdi holds start addr of destination char[]
// rdx holds length
assert_different_registers(src, dst, len, tmp2);
if (UseSSE42Intrinsics) {
+ assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
Label copy_8_loop, copy_bytes, copy_tail;
movl(tmp2, len);
andl(tmp2, 0x00000007); // tail count (in chars)
andl(len, 0xfffffff8); // vector count (in chars)
src/cpu/x86/vm/macroAssembler_x86.cpp
Index
Unified diffs
Context diffs
Sdiffs
Patch
New
Old
Previous File
Next File