src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File
*** old/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Nov 20 16:02:53 2015
--- new/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Nov 20 16:02:53 2015

*** 6973,6983 **** --- 6973,6984 ---- Register cnt1, Register cnt2, int int_cnt2, Register result, XMMRegister vec, Register tmp, int ae) { ShortBranchVerifier sbv(this); ! assert(UseSSE42Intrinsics, "SSE4.2 is required"); ! assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); // This method uses the pcmpestri instruction with bound registers // inputs: // xmm - substring
*** 7151,7161 **** --- 7152,7163 ---- Register cnt1, Register cnt2, int int_cnt2, Register result, XMMRegister vec, Register tmp, int ae) { ShortBranchVerifier sbv(this); ! assert(UseSSE42Intrinsics, "SSE4.2 is required"); ! assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); // // int_cnt2 is length of small (< 8 chars) constant substring // or (-1) for non constant substring in which case its length
*** 7468,7478 **** --- 7470,7481 ---- } // string_indexof void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result, XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { ShortBranchVerifier sbv(this); ! assert(UseSSE42Intrinsics, "SSE4.2 is required"); ! assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); int stride = 8; Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP, SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
*** 7505,7521 **** --- 7508,7522 ---- movdl(vec1, ch); pshuflw(vec1, vec1, 0x00); pshufd(vec1, vec1, 0); pxor(vec2, vec2); } if (UseAVX >= 2 || UseSSE42Intrinsics) { bind(SCAN_TO_8_CHAR); cmpl(cnt1, stride); if (UseAVX >= 2) { jccb(Assembler::less, SCAN_TO_CHAR); } if (!(UseAVX >= 2)) { + } else { jccb(Assembler::less, SCAN_TO_CHAR_LOOP); movdl(vec1, ch); pshuflw(vec1, vec1, 0x00); pshufd(vec1, vec1, 0); pxor(vec2, vec2);
*** 7530,7544 **** --- 7531,7543 ---- ptest(vec2, vec3); jcc(Assembler::carryClear, FOUND_CHAR); addptr(result, 16); subl(tmp, stride); jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); } bind(SCAN_TO_CHAR); testl(cnt1, cnt1); jcc(Assembler::zero, RET_NOT_FOUND); bind(SCAN_TO_CHAR_LOOP); load_unsigned_short(tmp, Address(result, 0)); cmpl(ch, tmp); jccb(Assembler::equal, FOUND_SEQ_CHAR); addptr(result, 2);
*** 7548,7567 **** --- 7547,7564 ---- bind(RET_NOT_FOUND); movl(result, -1); jmpb(DONE_LABEL); if (UseAVX >= 2 || UseSSE42Intrinsics) { bind(FOUND_CHAR); if (UseAVX >= 2) { vpmovmskb(tmp, vec3); } else { pmovmskb(tmp, vec3); } bsfl(ch, tmp); addl(result, ch); } bind(FOUND_SEQ_CHAR); subptr(result, str1); shrl(result, 1);
*** 7646,7655 **** --- 7643,7653 ---- scale2 = Address::times_2; stride = 8; } if (UseAVX >= 2 && UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; Label COMPARE_TAIL_LONG; int pcmpmask = 0x19; if (ae == StrIntrinsicNode::LL) {
*** 7781,7790 **** --- 7779,7789 ---- negptr(cnt2); jmpb(WHILE_HEAD_LABEL); bind(COMPARE_SMALL_STR); } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; int pcmpmask = 0x19; // Setup to compare 8-char (16-byte) vectors, // start from first character again because it has aligned address. movl(result, cnt2);
*** 7913,7923 **** --- 7912,7923 ---- testl(len, len); jcc(Assembler::zero, FALSE_LABEL); movl(result, len); // copy ! if (UseAVX >= 2 && UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); // With AVX2, use 32-byte vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; // Compare 32-byte vectors andl(result, 0x0000001f); // tail count (in bytes)
*** 7948,7957 **** --- 7948,7958 ---- bind(COMPARE_TAIL); // len is zero movl(len, result); // Fallthru to tail compare } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be for SSE4.2 intrinsics to be available"); // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; // Compare 16-byte vectors andl(result, 0x0000000f); // tail count (in bytes)
*** 8024,8034 **** --- 8025,8035 ---- bind(FALSE_LABEL); xorl(result, result); // return false // That's it bind(DONE); ! if (UseAVX >= 2 && UseSSE42Intrinsics) { // clean upper bits of YMM registers vpxor(vec1, vec1); vpxor(vec2, vec2); } }
*** 8112,8121 **** --- 8113,8123 ---- bind(COMPARE_TAIL); // limit is zero movl(limit, result); // Fallthru to tail compare } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; // Compare 16-byte vectors andl(result, 0x0000000f); // tail count (in bytes)
*** 8461,8470 **** --- 8463,8473 ---- lea(src, Address(src, len, Address::times_2)); // char[] lea(dst, Address(dst, len, Address::times_1)); // byte[] negptr(len); if (UseSSE42Intrinsics || UseAVX >= 2) { + assert(UseSSE42Intrinsics ? UseSSE >= 4 : true, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; if (UseAVX >= 2) { Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
*** 10233,10242 **** --- 10236,10246 ---- // save length for return push(len); if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label copy_32_loop, copy_16, copy_tail; movl(result, len); movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
*** 10332,10341 **** --- 10336,10346 ---- // rdi holds start addr of destination char[] // rdx holds length assert_different_registers(src, dst, len, tmp2); if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label copy_8_loop, copy_bytes, copy_tail; movl(tmp2, len); andl(tmp2, 0x00000007); // tail count (in chars) andl(len, 0xfffffff8); // vector count (in chars)

src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File