--- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-11-20 16:02:53.720052340 +0100 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-11-20 16:02:53.652052343 +0100 @@ -6975,7 +6975,8 @@ XMMRegister vec, Register tmp, int ae) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); // This method uses the pcmpestri instruction with bound registers @@ -7153,7 +7154,8 @@ XMMRegister vec, Register tmp, int ae) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); // @@ -7470,7 +7472,8 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result, XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); int stride = 8; @@ -7507,36 +7510,32 @@ pshufd(vec1, vec1, 0); pxor(vec2, vec2); } - if (UseAVX >= 2 || UseSSE42Intrinsics) { - bind(SCAN_TO_8_CHAR); - cmpl(cnt1, stride); - if (UseAVX >= 2) { - jccb(Assembler::less, SCAN_TO_CHAR); - } - if (!(UseAVX >= 2)) { - jccb(Assembler::less, SCAN_TO_CHAR_LOOP); - movdl(vec1, ch); - pshuflw(vec1, vec1, 0x00); - pshufd(vec1, vec1, 0); - pxor(vec2, vec2); - } - movl(tmp, cnt1); - andl(tmp, 0xFFFFFFF8); //vector count (in chars) - andl(cnt1,0x00000007); //tail count (in chars) - - bind(SCAN_TO_8_CHAR_LOOP); - movdqu(vec3, Address(result, 0)); - pcmpeqw(vec3, vec1); - ptest(vec2, vec3); - jcc(Assembler::carryClear, FOUND_CHAR); - addptr(result, 16); - subl(tmp, stride); - jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); + bind(SCAN_TO_8_CHAR); + cmpl(cnt1, stride); + if (UseAVX >= 2) { + jccb(Assembler::less, SCAN_TO_CHAR); + } else { + jccb(Assembler::less, SCAN_TO_CHAR_LOOP); + movdl(vec1, ch); + pshuflw(vec1, vec1, 0x00); + pshufd(vec1, vec1, 0); + pxor(vec2, vec2); } + movl(tmp, cnt1); + andl(tmp, 0xFFFFFFF8); //vector count (in chars) + andl(cnt1,0x00000007); //tail count (in chars) + + bind(SCAN_TO_8_CHAR_LOOP); + movdqu(vec3, Address(result, 0)); + pcmpeqw(vec3, vec1); + ptest(vec2, vec3); + jcc(Assembler::carryClear, FOUND_CHAR); + addptr(result, 16); + subl(tmp, stride); + jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); bind(SCAN_TO_CHAR); testl(cnt1, cnt1); jcc(Assembler::zero, RET_NOT_FOUND); - bind(SCAN_TO_CHAR_LOOP); load_unsigned_short(tmp, Address(result, 0)); cmpl(ch, tmp); @@ -7550,16 +7549,14 @@ movl(result, -1); jmpb(DONE_LABEL); - if (UseAVX >= 2 || UseSSE42Intrinsics) { - bind(FOUND_CHAR); - if (UseAVX >= 2) { - vpmovmskb(tmp, vec3); - } else { - pmovmskb(tmp, vec3); - } - bsfl(ch, tmp); - addl(result, ch); + bind(FOUND_CHAR); + if (UseAVX >= 2) { + vpmovmskb(tmp, vec3); + } else { + pmovmskb(tmp, vec3); } + bsfl(ch, tmp); + addl(result, ch); bind(FOUND_SEQ_CHAR); subptr(result, str1); @@ -7648,6 +7645,7 @@ } if (UseAVX >= 2 && UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; Label COMPARE_TAIL_LONG; @@ -7783,6 +7781,7 @@ bind(COMPARE_SMALL_STR); } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; int pcmpmask = 0x19; // Setup to compare 8-char (16-byte) vectors, @@ -7915,7 +7914,8 @@ movl(result, len); // copy - if (UseAVX >= 2) { + if (UseAVX >= 2 && UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); // With AVX2, use 32-byte vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -7950,6 +7950,7 @@ movl(len, result); // Fallthru to tail compare } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be for SSE4.2 intrinsics to be available"); // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -8026,7 +8027,7 @@ // That's it bind(DONE); - if (UseAVX >= 2) { + if (UseAVX >= 2 && UseSSE42Intrinsics) { // clean upper bits of YMM registers vpxor(vec1, vec1); vpxor(vec2, vec2); @@ -8114,6 +8115,7 @@ movl(limit, result); // Fallthru to tail compare } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -8463,6 +8465,7 @@ negptr(len); if (UseSSE42Intrinsics || UseAVX >= 2) { + assert(UseSSE42Intrinsics ? UseSSE >= 4 : true, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; @@ -10235,6 +10238,7 @@ push(len); if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label copy_32_loop, copy_16, copy_tail; movl(result, len); @@ -10334,6 +10338,7 @@ assert_different_registers(src, dst, len, tmp2); if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label copy_8_loop, copy_bytes, copy_tail; movl(tmp2, len); --- old/src/cpu/x86/vm/vm_version_x86.cpp 2015-11-20 16:02:53.728052340 +0100 +++ new/src/cpu/x86/vm/vm_version_x86.cpp 2015-11-20 16:02:53.656052343 +0100 @@ -930,10 +930,15 @@ UseXmmI2D = false; } } - if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { - if( supports_sse4_2() && UseSSE >= 4 ) { - UseSSE42Intrinsics = true; + if (supports_sse4_2() && UseSSE >= 4) { + if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); + } + } else { + if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); } + FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); } // some defaults for AMD family 15h @@ -1007,8 +1012,13 @@ } if (supports_sse4_2() && UseSSE >= 4) { if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { - UseSSE42Intrinsics = true; + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); } + } else { + if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); } } if ((cpu_family() == 0x06) &&