< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page

        

*** 9427,9436 **** --- 9427,9437 ---- } void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){ assert(UseSSE42Intrinsics, "SSE4.2 must be enabled."); + Label VECTOR64_LOOP, VECTOR64_TAIL, VECTOR64_NOT_EQUAL, VECTOR32_TAIL; Label VECTOR32_LOOP, VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP; Label VECTOR16_TAIL, VECTOR8_TAIL, VECTOR4_TAIL; Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL; Label SAME_TILL_END, DONE; Label BYTES_LOOP, BYTES_TAIL, BYTES_NOT_EQUAL;
*** 9439,9453 **** ShortBranchVerifier sbv(this); shlq(length); xorq(result, result); cmpq(length, 8); jcc(Assembler::equal, VECTOR8_LOOP); jcc(Assembler::less, VECTOR4_TAIL); ! if (UseAVX >= 2){ cmpq(length, 16); jcc(Assembler::equal, VECTOR16_LOOP); jcc(Assembler::less, VECTOR8_LOOP); --- 9440,9505 ---- ShortBranchVerifier sbv(this); shlq(length); xorq(result, result); + if ((UseAVX > 2) && + VM_Version::supports_avx512vlbw()) { + set_programmed_mask_reg(); // opening of the stub context for programming mask registers + cmpq(length, 64); + jcc(Assembler::less, VECTOR32_TAIL); + movq(tmp1, length); + andq(tmp1, 0x3F); // tail count + andq(length, ~(0x3F)); //vector count + + bind(VECTOR64_LOOP); + // AVX512 code to compare 64 byte vectors. + evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit); + evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); + kortestql(k7, k7); + jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch + addq(result, 64); + subq(length, 64); + jccb(Assembler::notZero, VECTOR64_LOOP); + + //bind(VECTOR64_TAIL); + testq(tmp1, tmp1); + jcc(Assembler::zero, SAME_TILL_END); + + bind(VECTOR64_TAIL); + // AVX512 code to compare upto 63 byte vectors. + // Save k1 + kmovql(k3, k1); + mov64(tmp2, 0xFFFFFFFFFFFFFFFF); + shlxq(tmp2, tmp2, tmp1); + notq(tmp2); + kmovql(k1, tmp2); + + evmovdqub(k1, false, rymm0, Address(obja, result), Assembler::AVX_512bit); + evpcmpeqb(k1, false, k7, rymm0, Address(objb, result), Assembler::AVX_512bit); + + ktestql(k7, k1); + // Restore k1 + kmovql(k1, k3); + jcc(Assembler::below, SAME_TILL_END); // not mismatch + + bind(VECTOR64_NOT_EQUAL); + kmovql(tmp1, k7); + notq(tmp1); + tzcntq(tmp1, tmp1); + addq(result, tmp1); + shrq(result); + jmp(DONE); + bind(VECTOR32_TAIL); + clear_programmed_mask_reg(); // closing of the stub context for programming mask registers + } + cmpq(length, 8); jcc(Assembler::equal, VECTOR8_LOOP); jcc(Assembler::less, VECTOR4_TAIL); ! if (UseAVX >= 2) { cmpq(length, 16); jcc(Assembler::equal, VECTOR16_LOOP); jcc(Assembler::less, VECTOR8_LOOP);
*** 9551,9561 **** xorl(tmp1, tmp2); testl(tmp1, tmp1); jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found jmpb(SAME_TILL_END); ! if (UseAVX >= 2){ bind(VECTOR32_NOT_EQUAL); vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit); vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit); vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit); vpmovmskb(tmp1, rymm0); --- 9603,9613 ---- xorl(tmp1, tmp2); testl(tmp1, tmp1); jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found jmpb(SAME_TILL_END); ! if (UseAVX >= 2) { bind(VECTOR32_NOT_EQUAL); vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit); vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit); vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit); vpmovmskb(tmp1, rymm0);
*** 9564,9574 **** shrq(result); jmpb(DONE); } bind(VECTOR16_NOT_EQUAL); ! if (UseAVX >= 2){ vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit); vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit); pxor(rymm0, rymm2); } else { pcmpeqb(rymm2, rymm2); --- 9616,9626 ---- shrq(result); jmpb(DONE); } bind(VECTOR16_NOT_EQUAL); ! if (UseAVX >= 2) { vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit); vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit); pxor(rymm0, rymm2); } else { pcmpeqb(rymm2, rymm2);
*** 9595,9605 **** mov64(result, -1); bind(DONE); } - //Helper functions for square_to_len() /** * Store the squares of x[], right shifted one bit (divided by 2) into z[] * Preserves x and z and modifies rest of the registers. --- 9647,9656 ----
< prev index next >