< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page

        

*** 3947,3956 **** --- 3947,4186 ---- void MacroAssembler::testl(Register dst, AddressLiteral src) { assert(reachable(src), "Address should be reachable"); testl(dst, as_Address(src)); } + void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::pcmpeqb(dst, src); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::pcmpeqb(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pcmpeqb(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pcmpeqb(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::pcmpeqb(xmm1, xmm0); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + + void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::pcmpeqw(dst, src); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::pcmpeqw(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pcmpeqw(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pcmpeqw(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::pcmpeqw(xmm1, xmm0); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + + void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) { + int dst_enc = dst->encoding(); + if (dst_enc < 16) { + Assembler::pcmpestri(dst, src, imm8); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pcmpestri(xmm0, src, imm8); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + + void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::pcmpestri(dst, src, imm8); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pcmpestri(xmm0, src, imm8); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pcmpestri(dst, xmm0, imm8); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::pcmpestri(xmm1, xmm0, imm8); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + + void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::pmovzxbw(dst, src); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::pmovzxbw(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pmovzxbw(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pmovzxbw(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::pmovzxbw(xmm1, xmm0); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + + void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) { + int dst_enc = dst->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::pmovzxbw(dst, src); + } else if (dst_enc < 16) { + Assembler::pmovzxbw(dst, src); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pmovzxbw(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + + void MacroAssembler::pmovmskb(Register dst, XMMRegister src) { + int src_enc = src->encoding(); + if (src_enc < 16) { + Assembler::pmovmskb(dst, src); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pmovmskb(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + + void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::ptest(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::ptest(xmm0, src); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::ptest(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::ptest(xmm1, xmm0); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { Assembler::sqrtsd(dst, as_Address(src)); } else { lea(rscratch1, src);
*** 4254,4437 **** Assembler::vpaddw(xmm0, xmm0, src, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } ! void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); int src_enc = src->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpsubb(dst, nds, src, vector_len); } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpsubb(dst, dst, src, vector_len); } else if ((dst_enc < 16) && (nds_enc < 16)) { // use nds as scratch for src evmovdqul(nds, src, Assembler::AVX_512bit); ! Assembler::vpsubb(dst, dst, nds, vector_len); } else if ((src_enc < 16) && (nds_enc < 16)) { // use nds as scratch for dst evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpsubb(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else if (dst_enc < 16) { // use nds as scatch for xmm0 to hold src evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpsubb(dst, dst, xmm0, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs are in the upper bank subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm1, src, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len); evmovdqul(dst, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, nds, Assembler::AVX_512bit); evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } } ! void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpsubb(dst, nds, src, vector_len); } else if (dst_enc < 16) { ! Assembler::vpsubb(dst, dst, src, vector_len); } else if (nds_enc < 16) { // implies dst_enc in upper bank with src as scratch evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpsubb(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs in upper bank evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpsubw(xmm0, xmm0, src, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } ! void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); int src_enc = src->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpsubw(dst, nds, src, vector_len); } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpsubw(dst, dst, src, vector_len); } else if ((dst_enc < 16) && (nds_enc < 16)) { // use nds as scratch for src evmovdqul(nds, src, Assembler::AVX_512bit); ! Assembler::vpsubw(dst, dst, nds, vector_len); } else if ((src_enc < 16) && (nds_enc < 16)) { // use nds as scratch for dst evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpsubw(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else if (dst_enc < 16) { // use nds as scatch for xmm0 to hold src evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpsubw(dst, dst, xmm0, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs are in the upper bank subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm1, src, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len); evmovdqul(dst, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, nds, Assembler::AVX_512bit); evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } } ! void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpsubw(dst, nds, src, vector_len); } else if (dst_enc < 16) { ! Assembler::vpsubw(dst, dst, src, vector_len); } else if (nds_enc < 16) { // implies dst_enc in upper bank with src as scratch evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpsubw(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs in upper bank evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); Assembler::vpsubw(xmm0, xmm0, src, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } ! ! void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); int src_enc = src->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpmullw(dst, nds, src, vector_len); } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpmullw(dst, dst, src, vector_len); } else if ((dst_enc < 16) && (nds_enc < 16)) { // use nds as scratch for src evmovdqul(nds, src, Assembler::AVX_512bit); ! Assembler::vpmullw(dst, dst, nds, vector_len); } else if ((src_enc < 16) && (nds_enc < 16)) { // use nds as scratch for dst evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpmullw(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else if (dst_enc < 16) { // use nds as scatch for xmm0 to hold src evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpmullw(dst, dst, xmm0, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs are in the upper bank subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm1, src, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len); evmovdqul(dst, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, nds, Assembler::AVX_512bit); evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } } ! void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpmullw(dst, nds, src, vector_len); } else if (dst_enc < 16) { ! Assembler::vpmullw(dst, dst, src, vector_len); } else if (nds_enc < 16) { // implies dst_enc in upper bank with src as scratch evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpmullw(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs in upper bank evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpmullw(xmm0, xmm0, src, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { --- 4484,4815 ---- Assembler::vpaddw(xmm0, xmm0, src, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } ! void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) { ! int dst_enc = dst->encoding(); ! int src_enc = src->encoding(); ! if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpbroadcastw(dst, src); ! } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpbroadcastw(dst, src); ! } else if (src_enc < 16) { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpbroadcastw(xmm0, src); ! movdqu(dst, xmm0); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } else if (dst_enc < 16) { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpbroadcastw(dst, xmm0); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } else { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); ! movdqu(xmm0, src); ! movdqu(xmm1, dst); ! Assembler::vpbroadcastw(xmm1, xmm0); ! movdqu(dst, xmm1); ! evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } ! } ! ! void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); int src_enc = src->encoding(); + assert(dst_enc == nds_enc, ""); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpcmpeqb(dst, nds, src, vector_len); } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpcmpeqb(dst, nds, src, vector_len); ! } else if (src_enc < 16) { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len); ! movdqu(dst, xmm0); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } else if (dst_enc < 16) { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpcmpeqb(dst, dst, xmm0, vector_len); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } else { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); ! movdqu(xmm0, src); ! movdqu(xmm1, dst); ! Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len); ! movdqu(dst, xmm1); ! evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } ! } ! ! void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { ! int dst_enc = dst->encoding(); ! int nds_enc = nds->encoding(); ! int src_enc = src->encoding(); ! assert(dst_enc == nds_enc, ""); ! if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpcmpeqw(dst, nds, src, vector_len); ! } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpcmpeqw(dst, nds, src, vector_len); ! } else if (src_enc < 16) { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len); ! movdqu(dst, xmm0); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } else if (dst_enc < 16) { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpcmpeqw(dst, dst, xmm0, vector_len); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } else { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); ! movdqu(xmm0, src); ! movdqu(xmm1, dst); ! Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len); ! movdqu(dst, xmm1); ! evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } ! } ! ! void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { ! int dst_enc = dst->encoding(); ! if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpmovzxbw(dst, src, vector_len); ! } else if (dst_enc < 16) { ! Assembler::vpmovzxbw(dst, src, vector_len); ! } else { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpmovzxbw(xmm0, src, vector_len); ! movdqu(dst, xmm0); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } ! } ! ! void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) { ! int src_enc = src->encoding(); ! if (src_enc < 16) { ! Assembler::vpmovmskb(dst, src); ! } else { ! subptr(rsp, 64); ! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); ! evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpmovmskb(dst, xmm0); ! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); ! addptr(rsp, 64); ! } ! } ! ! void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { ! int dst_enc = dst->encoding(); ! int nds_enc = nds->encoding(); ! int src_enc = src->encoding(); ! if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpmullw(dst, nds, src, vector_len); ! } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpmullw(dst, dst, src, vector_len); } else if ((dst_enc < 16) && (nds_enc < 16)) { // use nds as scratch for src evmovdqul(nds, src, Assembler::AVX_512bit); ! Assembler::vpmullw(dst, dst, nds, vector_len); } else if ((src_enc < 16) && (nds_enc < 16)) { // use nds as scratch for dst evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpmullw(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else if (dst_enc < 16) { // use nds as scatch for xmm0 to hold src evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpmullw(dst, dst, xmm0, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs are in the upper bank subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm1, src, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len); evmovdqul(dst, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, nds, Assembler::AVX_512bit); evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } } ! void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpmullw(dst, nds, src, vector_len); } else if (dst_enc < 16) { ! Assembler::vpmullw(dst, dst, src, vector_len); } else if (nds_enc < 16) { // implies dst_enc in upper bank with src as scratch evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpmullw(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs in upper bank evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpmullw(xmm0, xmm0, src, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } ! void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); int src_enc = src->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpsubb(dst, nds, src, vector_len); } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpsubb(dst, dst, src, vector_len); } else if ((dst_enc < 16) && (nds_enc < 16)) { // use nds as scratch for src evmovdqul(nds, src, Assembler::AVX_512bit); ! Assembler::vpsubb(dst, dst, nds, vector_len); } else if ((src_enc < 16) && (nds_enc < 16)) { // use nds as scratch for dst evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpsubb(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else if (dst_enc < 16) { // use nds as scatch for xmm0 to hold src evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpsubb(dst, dst, xmm0, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs are in the upper bank subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm1, src, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len); evmovdqul(dst, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, nds, Assembler::AVX_512bit); evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } } ! void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpsubb(dst, nds, src, vector_len); } else if (dst_enc < 16) { ! Assembler::vpsubb(dst, dst, src, vector_len); } else if (nds_enc < 16) { // implies dst_enc in upper bank with src as scratch evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpsubb(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs in upper bank evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); Assembler::vpsubw(xmm0, xmm0, src, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } ! void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); int src_enc = src->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpsubw(dst, nds, src, vector_len); } else if ((dst_enc < 16) && (src_enc < 16)) { ! Assembler::vpsubw(dst, dst, src, vector_len); } else if ((dst_enc < 16) && (nds_enc < 16)) { // use nds as scratch for src evmovdqul(nds, src, Assembler::AVX_512bit); ! Assembler::vpsubw(dst, dst, nds, vector_len); } else if ((src_enc < 16) && (nds_enc < 16)) { // use nds as scratch for dst evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpsubw(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else if (dst_enc < 16) { // use nds as scatch for xmm0 to hold src evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, src, Assembler::AVX_512bit); ! Assembler::vpsubw(dst, dst, xmm0, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs are in the upper bank subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm1, src, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len); evmovdqul(dst, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, nds, Assembler::AVX_512bit); evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } } ! void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { int dst_enc = dst->encoding(); int nds_enc = nds->encoding(); if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { ! Assembler::vpsubw(dst, nds, src, vector_len); } else if (dst_enc < 16) { ! Assembler::vpsubw(dst, dst, src, vector_len); } else if (nds_enc < 16) { // implies dst_enc in upper bank with src as scratch evmovdqul(nds, dst, Assembler::AVX_512bit); ! Assembler::vpsubw(nds, nds, src, vector_len); evmovdqul(dst, nds, Assembler::AVX_512bit); } else { // worse case scenario, all regs in upper bank evmovdqul(nds, xmm0, Assembler::AVX_512bit); evmovdqul(xmm0, dst, Assembler::AVX_512bit); ! Assembler::vpsubw(xmm0, xmm0, src, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
*** 4636,4645 **** --- 5014,5057 ---- Assembler::vpsllw(xmm0, xmm0, shift, vector_len); evmovdqul(xmm0, nds, Assembler::AVX_512bit); } } + void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vptest(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vptest(xmm0, src); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vptest(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::vptest(xmm1, xmm0); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + // This instruction exists within macros, ergo we cannot control its input // when emitted through those patterns. void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) { if (VM_Version::supports_avx512nobw()) { int dst_enc = dst->encoding();
*** 7719,7729 **** bind(COMPARE_WIDE_VECTORS_LOOP); if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { vmovdqu(vec1, Address(str1, result, scale)); vpxor(vec1, Address(str2, result, scale)); } else { ! vpmovzxbw(vec1, Address(str1, result, scale1)); vpxor(vec1, Address(str2, result, scale2)); } vptest(vec1, vec1); jccb(Assembler::notZero, VECTOR_NOT_EQUAL); addptr(result, stride2); --- 8131,8141 ---- bind(COMPARE_WIDE_VECTORS_LOOP); if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { vmovdqu(vec1, Address(str1, result, scale)); vpxor(vec1, Address(str2, result, scale)); } else { ! vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit); vpxor(vec1, Address(str2, result, scale2)); } vptest(vec1, vec1); jccb(Assembler::notZero, VECTOR_NOT_EQUAL); addptr(result, stride2);
< prev index next >