/* * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package org.graalvm.compiler.lir.amd64; import jdk.vm.ci.amd64.AMD64; import jdk.vm.ci.amd64.AMD64.CPUFeature; import jdk.vm.ci.amd64.AMD64Kind; import jdk.vm.ci.code.Register; import jdk.vm.ci.meta.JavaKind; import jdk.vm.ci.meta.Value; import org.graalvm.compiler.asm.Label; import org.graalvm.compiler.asm.amd64.AMD64Address; import org.graalvm.compiler.asm.amd64.AMD64Assembler; import org.graalvm.compiler.asm.amd64.AMD64Assembler.VexMoveOp; import org.graalvm.compiler.asm.amd64.AMD64Assembler.VexRMIOp; import org.graalvm.compiler.asm.amd64.AMD64Assembler.VexRMOp; import org.graalvm.compiler.asm.amd64.AMD64Assembler.VexRVMOp; import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; import org.graalvm.compiler.asm.amd64.AVXKind; import org.graalvm.compiler.core.common.LIRKind; import org.graalvm.compiler.lir.LIRInstructionClass; import org.graalvm.compiler.lir.Opcode; import org.graalvm.compiler.lir.asm.CompilationResultBuilder; import org.graalvm.compiler.lir.gen.LIRGeneratorTool; import static jdk.vm.ci.code.ValueUtil.asRegister; import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL; import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; /** */ @Opcode("AMD64_ARRAY_INDEX_OF") public final class AMD64ArrayIndexOfOp extends AMD64LIRInstruction { public static final LIRInstructionClass TYPE = LIRInstructionClass.create(AMD64ArrayIndexOfOp.class); private final JavaKind kind; private final int vmPageSize; private final int nValues; private final boolean findTwoConsecutive; private final AMD64Kind vectorKind; @Def({REG}) protected Value resultValue; @Alive({REG}) protected Value arrayPtrValue; @Use({REG}) protected Value arrayLengthValue; @Alive({REG}) protected Value searchValue1; @Alive({REG, ILLEGAL}) protected Value searchValue2; @Alive({REG, ILLEGAL}) protected Value searchValue3; @Alive({REG, ILLEGAL}) protected Value searchValue4; @Temp({REG}) protected Value arraySlotsRemaining; @Temp({REG}) protected Value comparisonResult1; @Temp({REG}) protected Value comparisonResult2; @Temp({REG}) protected Value comparisonResult3; @Temp({REG}) protected Value comparisonResult4; @Temp({REG, ILLEGAL}) protected Value vectorCompareVal1; @Temp({REG, ILLEGAL}) protected Value vectorCompareVal2; @Temp({REG, ILLEGAL}) protected Value vectorCompareVal3; @Temp({REG, ILLEGAL}) protected Value vectorCompareVal4; @Temp({REG, ILLEGAL}) protected Value vectorArray1; @Temp({REG, ILLEGAL}) protected Value vectorArray2; @Temp({REG, ILLEGAL}) protected Value vectorArray3; @Temp({REG, ILLEGAL}) protected Value vectorArray4; public AMD64ArrayIndexOfOp(JavaKind kind, boolean findTwoConsecutive, int vmPageSize, int maxVectorSize, LIRGeneratorTool tool, Value result, Value arrayPtr, Value arrayLength, Value... searchValues) { super(TYPE); this.kind = kind; this.findTwoConsecutive = findTwoConsecutive; this.vmPageSize = vmPageSize; assert 0 < searchValues.length && searchValues.length <= 4; assert byteMode(kind) || charMode(kind); assert supports(tool, CPUFeature.SSE2) || supports(tool, CPUFeature.AVX) || supportsAVX2(tool); nValues = searchValues.length; assert !findTwoConsecutive || nValues == 1; resultValue = result; arrayPtrValue = arrayPtr; arrayLengthValue = arrayLength; searchValue1 = searchValues[0]; searchValue2 = nValues > 1 ? searchValues[1] : Value.ILLEGAL; searchValue3 = nValues > 2 ? searchValues[2] : Value.ILLEGAL; searchValue4 = nValues > 3 ? searchValues[3] : Value.ILLEGAL; arraySlotsRemaining = tool.newVariable(LIRKind.value(AMD64Kind.DWORD)); comparisonResult1 = tool.newVariable(LIRKind.value(AMD64Kind.DWORD)); comparisonResult2 = tool.newVariable(LIRKind.value(AMD64Kind.DWORD)); comparisonResult3 = tool.newVariable(LIRKind.value(AMD64Kind.DWORD)); comparisonResult4 = tool.newVariable(LIRKind.value(AMD64Kind.DWORD)); vectorKind = supportsAVX2(tool) && (maxVectorSize < 0 || maxVectorSize >= 32) ? byteMode(kind) ? AMD64Kind.V256_BYTE : AMD64Kind.V256_WORD : byteMode(kind) ? AMD64Kind.V128_BYTE : AMD64Kind.V128_WORD; vectorCompareVal1 = tool.newVariable(LIRKind.value(vectorKind)); vectorCompareVal2 = nValues > 1 ? tool.newVariable(LIRKind.value(vectorKind)) : Value.ILLEGAL; vectorCompareVal3 = nValues > 2 ? tool.newVariable(LIRKind.value(vectorKind)) : Value.ILLEGAL; vectorCompareVal4 = nValues > 3 ? tool.newVariable(LIRKind.value(vectorKind)) : Value.ILLEGAL; vectorArray1 = tool.newVariable(LIRKind.value(vectorKind)); vectorArray2 = tool.newVariable(LIRKind.value(vectorKind)); vectorArray3 = tool.newVariable(LIRKind.value(vectorKind)); vectorArray4 = tool.newVariable(LIRKind.value(vectorKind)); } private static boolean byteMode(JavaKind kind) { return kind == JavaKind.Byte; } private static boolean charMode(JavaKind kind) { return kind == JavaKind.Char; } @Override public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler asm) { Register arrayPtr = asRegister(arrayPtrValue); Register arrayLength = asRegister(arrayLengthValue); Register result = asRegister(resultValue); Register slotsRemaining = asRegister(arraySlotsRemaining); Register[] searchValue = { nValues > 0 ? asRegister(searchValue1) : null, nValues > 1 ? asRegister(searchValue2) : null, nValues > 2 ? asRegister(searchValue3) : null, nValues > 3 ? asRegister(searchValue4) : null, }; Register[] vecCmp = { nValues > 0 ? asRegister(vectorCompareVal1) : null, nValues > 1 ? asRegister(vectorCompareVal2) : null, nValues > 2 ? asRegister(vectorCompareVal3) : null, nValues > 3 ? asRegister(vectorCompareVal4) : null, }; Register[] vecArray = { asRegister(vectorArray1), asRegister(vectorArray2), asRegister(vectorArray3), asRegister(vectorArray4), }; Register[] cmpResult = { asRegister(comparisonResult1), asRegister(comparisonResult2), asRegister(comparisonResult3), asRegister(comparisonResult4), }; Label retFound = new Label(); Label retNotFound = new Label(); Label end = new Label(); AVXKind.AVXSize vectorSize = AVXKind.getDataSize(vectorKind); int nVectors = nValues == 1 ? 4 : nValues == 2 ? 2 : 1; // load array length // important: this must be the first register manipulation, since arrayLengthValue is // annotated with @Use asm.movl(slotsRemaining, arrayLength); // load array pointer asm.movq(result, arrayPtr); // move search values to vectors for (int i = 0; i < nValues; i++) { if (asm.supports(CPUFeature.AVX)) { VexMoveOp.VMOVD.emit(asm, AVXKind.AVXSize.DWORD, vecCmp[i], searchValue[i]); } else { asm.movdl(vecCmp[i], searchValue[i]); } } // fill comparison vector with copies of the search value for (int i = 0; i < nValues; i++) { emitBroadcast(asm, findTwoConsecutive ? (byteMode(kind) ? JavaKind.Char : JavaKind.Int) : kind, vecCmp[i], vecArray[0], vectorSize); } emitArrayIndexOfChars(crb, asm, kind, vectorSize, result, slotsRemaining, searchValue, vecCmp, vecArray, cmpResult, retFound, retNotFound, vmPageSize, nValues, nVectors, findTwoConsecutive); // return -1 (no match) asm.bind(retNotFound); asm.movq(result, -1); asm.jmpb(end); asm.bind(retFound); // convert array pointer to offset asm.subq(result, arrayPtr); if (charMode(kind)) { asm.shrq(result, 1); } asm.bind(end); } private static void emitArrayIndexOfChars(CompilationResultBuilder crb, AMD64MacroAssembler asm, JavaKind kind, AVXKind.AVXSize vectorSize, Register arrayPtr, Register slotsRemaining, Register[] searchValue, Register[] vecCmp, Register[] vecArray, Register[] cmpResult, Label retFound, Label retNotFound, int vmPageSize, int nValues, int nVectors, boolean findTwoCharPrefix) { Label bulkVectorLoop = new Label(); Label singleVectorLoop = new Label(); Label[] vectorFound = { new Label(), new Label(), new Label(), new Label(), }; Label lessThanVectorSizeRemaining = new Label(); Label lessThanVectorSizeRemainingLoop = new Label(); Label bulkVectorLoopExit = nVectors == 1 ? lessThanVectorSizeRemaining : singleVectorLoop; int bytesPerVector = vectorSize.getBytes(); int arraySlotsPerVector = vectorSize.getBytes() / kind.getByteCount(); int singleVectorLoopCondition = arraySlotsPerVector; int bulkSize = arraySlotsPerVector * nVectors; int bulkSizeBytes = bytesPerVector * nVectors; int bulkLoopCondition = bulkSize; int[] vectorOffsets; JavaKind vectorCompareKind = kind; if (findTwoCharPrefix) { singleVectorLoopCondition++; bulkLoopCondition++; bulkSize /= 2; bulkSizeBytes /= 2; vectorOffsets = new int[]{0, kind.getByteCount(), bytesPerVector, bytesPerVector + kind.getByteCount()}; vectorCompareKind = byteMode(kind) ? JavaKind.Char : JavaKind.Int; } else { vectorOffsets = new int[]{0, bytesPerVector, bytesPerVector * 2, bytesPerVector * 3}; } // load copy of low part of array pointer Register tmpArrayPtrLow = cmpResult[0]; asm.movl(tmpArrayPtrLow, arrayPtr); // check if bulk vector load is in bounds asm.cmpl(slotsRemaining, bulkLoopCondition); asm.jcc(AMD64Assembler.ConditionFlag.Below, bulkVectorLoopExit); // check if array pointer is aligned to bulkSize asm.andl(tmpArrayPtrLow, bulkSizeBytes - 1); asm.jcc(AMD64Assembler.ConditionFlag.Zero, bulkVectorLoop); // do one unaligned bulk comparison pass and adjust alignment afterwards emitVectorCompare(asm, vectorCompareKind, vectorSize, nValues, nVectors, vectorOffsets, arrayPtr, vecCmp, vecArray, cmpResult, vectorFound, false); // load copy of low part of array pointer asm.movl(tmpArrayPtrLow, arrayPtr); // adjust array pointer asm.addq(arrayPtr, bulkSizeBytes); // adjust number of array slots remaining asm.subl(slotsRemaining, bulkSize); // get offset to bulk size alignment asm.andl(tmpArrayPtrLow, bulkSizeBytes - 1); emitBytesToArraySlots(asm, kind, tmpArrayPtrLow); // adjust array pointer to bulk size alignment asm.andq(arrayPtr, ~(bulkSizeBytes - 1)); // adjust number of array slots remaining asm.addl(slotsRemaining, tmpArrayPtrLow); // check if there are enough array slots remaining for the bulk loop asm.cmpl(slotsRemaining, bulkLoopCondition); asm.jcc(AMD64Assembler.ConditionFlag.Below, bulkVectorLoopExit); emitAlign(crb, asm); asm.bind(bulkVectorLoop); // memory-aligned bulk comparison emitVectorCompare(asm, vectorCompareKind, vectorSize, nValues, nVectors, vectorOffsets, arrayPtr, vecCmp, vecArray, cmpResult, vectorFound, !findTwoCharPrefix); // adjust number of array slots remaining asm.subl(slotsRemaining, bulkSize); // adjust array pointer asm.addq(arrayPtr, bulkSizeBytes); // check if there are enough array slots remaining for the bulk loop asm.cmpl(slotsRemaining, bulkLoopCondition); asm.jcc(AMD64Assembler.ConditionFlag.Below, bulkVectorLoopExit); // continue loop asm.jmp(bulkVectorLoop); if (nVectors > 1) { emitAlign(crb, asm); // same loop as bulkVectorLoop, with only one vector asm.bind(singleVectorLoop); // check if single vector load is in bounds asm.cmpl(slotsRemaining, singleVectorLoopCondition); asm.jcc(AMD64Assembler.ConditionFlag.Below, lessThanVectorSizeRemaining); // compare emitVectorCompare(asm, vectorCompareKind, vectorSize, nValues, findTwoCharPrefix ? 2 : 1, vectorOffsets, arrayPtr, vecCmp, vecArray, cmpResult, vectorFound, false); // adjust number of array slots remaining asm.subl(slotsRemaining, arraySlotsPerVector); // adjust array pointer asm.addq(arrayPtr, bytesPerVector); // continue loop asm.jmpb(singleVectorLoop); } asm.bind(lessThanVectorSizeRemaining); // check if any array slots remain asm.testl(slotsRemaining, slotsRemaining); asm.jcc(AMD64Assembler.ConditionFlag.Zero, retNotFound); // a vector compare will read out of bounds of the input array. // check if the out-of-bounds read would cross a memory page boundary. // load copy of low part of array pointer asm.movl(tmpArrayPtrLow, arrayPtr); // check if pointer + vector size would cross the page boundary asm.andl(tmpArrayPtrLow, (vmPageSize - 1)); asm.cmpl(tmpArrayPtrLow, (vmPageSize - (findTwoCharPrefix ? bytesPerVector + kind.getByteCount() : bytesPerVector))); // if the page boundary would be crossed, do byte/character-wise comparison instead. asm.jccb(AMD64Assembler.ConditionFlag.Above, lessThanVectorSizeRemainingLoop); Label[] overBoundsMatch = {new Label(), new Label()}; // otherwise, do a vector compare that reads beyond array bounds emitVectorCompare(asm, vectorCompareKind, vectorSize, nValues, findTwoCharPrefix ? 2 : 1, vectorOffsets, arrayPtr, vecCmp, vecArray, cmpResult, overBoundsMatch, false); // no match asm.jmp(retNotFound); if (findTwoCharPrefix) { Label overBoundsFinish = new Label(); asm.bind(overBoundsMatch[1]); // get match offset of second result asm.bsfq(cmpResult[1], cmpResult[1]); asm.addl(cmpResult[1], kind.getByteCount()); // replace first result with second and continue asm.movl(cmpResult[0], cmpResult[1]); asm.jmpb(overBoundsFinish); asm.bind(overBoundsMatch[0]); emitFindTwoCharPrefixMinResult(asm, kind, cmpResult, overBoundsFinish); } else { asm.bind(overBoundsMatch[0]); // find match offset asm.bsfq(cmpResult[0], cmpResult[0]); } // adjust array pointer for match result asm.addq(arrayPtr, cmpResult[0]); if (charMode(kind)) { // convert byte offset to chars asm.shrl(cmpResult[0], 1); } // check if offset of matched value is greater than number of bytes remaining / out of array // bounds if (findTwoCharPrefix) { asm.decrementl(slotsRemaining); } asm.cmpl(cmpResult[0], slotsRemaining); // match is out of bounds, return no match asm.jcc(AMD64Assembler.ConditionFlag.GreaterEqual, retNotFound); // adjust number of array slots remaining if (findTwoCharPrefix) { asm.incrementl(slotsRemaining, 1); } asm.subl(slotsRemaining, cmpResult[0]); // match is in bounds, return offset asm.jmp(retFound); // compare remaining slots in the array one-by-one asm.bind(lessThanVectorSizeRemainingLoop); // check if enough array slots remain asm.cmpl(slotsRemaining, findTwoCharPrefix ? 1 : 0); asm.jcc(AMD64Assembler.ConditionFlag.LessEqual, retNotFound); // load char / byte if (byteMode(kind)) { if (findTwoCharPrefix) { asm.movzwl(cmpResult[0], new AMD64Address(arrayPtr)); } else { asm.movzbl(cmpResult[0], new AMD64Address(arrayPtr)); } } else { if (findTwoCharPrefix) { asm.movl(cmpResult[0], new AMD64Address(arrayPtr)); } else { asm.movzwl(cmpResult[0], new AMD64Address(arrayPtr)); } } // check for match for (int i = 0; i < nValues; i++) { asm.cmpl(cmpResult[0], searchValue[i]); asm.jcc(AMD64Assembler.ConditionFlag.Equal, retFound); } // adjust number of array slots remaining asm.decrementl(slotsRemaining); // adjust array pointer asm.addq(arrayPtr, kind.getByteCount()); // continue loop asm.jmpb(lessThanVectorSizeRemainingLoop); for (int i = 1; i < nVectors; i += (findTwoCharPrefix ? 2 : 1)) { emitVectorFoundWithOffset(asm, kind, vectorOffsets[i], arrayPtr, cmpResult[i], slotsRemaining, vectorFound[i], retFound); } if (findTwoCharPrefix) { asm.bind(vectorFound[2]); asm.addq(arrayPtr, vectorOffsets[2]); // adjust number of array slots remaining asm.subl(slotsRemaining, charMode(kind) ? vectorOffsets[2] / 2 : vectorOffsets[2]); asm.movl(cmpResult[0], cmpResult[2]); asm.movl(cmpResult[1], cmpResult[3]); asm.bind(vectorFound[0]); emitFindTwoCharPrefixMinResult(asm, kind, cmpResult, new Label()); } else { asm.bind(vectorFound[0]); // find index of first set bit in bit mask asm.bsfq(cmpResult[0], cmpResult[0]); } // add offset to array pointer asm.addq(arrayPtr, cmpResult[0]); if (charMode(kind)) { // convert byte offset to chars asm.shrl(cmpResult[0], 1); } // adjust number of array slots remaining asm.subl(slotsRemaining, cmpResult[0]); asm.jmpb(retFound); } private static void emitFindTwoCharPrefixMinResult(AMD64MacroAssembler asm, JavaKind kind, Register[] cmpResult, Label done) { // find match offset asm.bsfq(cmpResult[0], cmpResult[0]); // check if second result is also a match asm.testl(cmpResult[1], cmpResult[1]); asm.jcc(AMD64Assembler.ConditionFlag.Zero, done); // get match offset of second result asm.bsfq(cmpResult[1], cmpResult[1]); asm.addl(cmpResult[1], kind.getByteCount()); // check if first result is less than second asm.cmpl(cmpResult[0], cmpResult[1]); asm.jcc(AMD64Assembler.ConditionFlag.LessEqual, done); // first result is greater than second, replace it with the second result asm.movl(cmpResult[0], cmpResult[1]); asm.bind(done); } private static void emitAlign(CompilationResultBuilder crb, AMD64MacroAssembler asm) { asm.align(crb.target.wordSize * 2); } /** * Fills {@code vecDst} with copies of its lowest byte, word or dword. */ private static void emitBroadcast(AMD64MacroAssembler asm, JavaKind kind, Register vecDst, Register vecTmp, AVXKind.AVXSize vectorSize) { switch (kind) { case Byte: if (asm.supports(CPUFeature.AVX2)) { VexRMOp.VPBROADCASTB.emit(asm, vectorSize, vecDst, vecDst); } else if (asm.supports(CPUFeature.AVX)) { VexRVMOp.VPXOR.emit(asm, vectorSize, vecTmp, vecTmp, vecTmp); VexRVMOp.VPSHUFB.emit(asm, vectorSize, vecDst, vecDst, vecTmp); } else if (asm.supports(CPUFeature.SSSE3)) { asm.pxor(vecTmp, vecTmp); asm.pshufb(vecDst, vecTmp); } else { // SSE2 asm.punpcklbw(vecDst, vecDst); asm.punpcklbw(vecDst, vecDst); asm.pshufd(vecDst, vecDst, 0); } break; case Short: case Char: if (asm.supports(CPUFeature.AVX2)) { VexRMOp.VPBROADCASTW.emit(asm, vectorSize, vecDst, vecDst); } else if (asm.supports(CPUFeature.AVX)) { VexRMIOp.VPSHUFLW.emit(asm, vectorSize, vecDst, vecDst, 0); VexRMIOp.VPSHUFD.emit(asm, vectorSize, vecDst, vecDst, 0); } else { // SSE asm.pshuflw(vecDst, vecDst, 0); asm.pshufd(vecDst, vecDst, 0); } break; case Int: if (asm.supports(CPUFeature.AVX2)) { VexRMOp.VPBROADCASTD.emit(asm, vectorSize, vecDst, vecDst); } else if (asm.supports(CPUFeature.AVX)) { VexRMIOp.VPSHUFD.emit(asm, vectorSize, vecDst, vecDst, 0); } else { // SSE asm.pshufd(vecDst, vecDst, 0); } break; default: throw new UnsupportedOperationException(); } } /** * Convert a byte offset stored in {@code bytes} to an array index offset. */ private static void emitBytesToArraySlots(AMD64MacroAssembler asm, JavaKind kind, Register bytes) { if (charMode(kind)) { asm.shrl(bytes, 1); } else { assert byteMode(kind); } } private static void emitVectorCompare(AMD64MacroAssembler asm, JavaKind kind, AVXKind.AVXSize vectorSize, int nValues, int nVectors, int[] vectorOffsets, Register arrayPtr, Register[] vecCmp, Register[] vecArray, Register[] cmpResult, Label[] vectorFound, boolean alignedLoad) { // load array contents into vectors for (int i = 0; i < nValues; i++) { for (int j = 0; j < nVectors; j++) { emitArrayLoad(asm, vectorSize, vecArray[(i * nVectors) + j], arrayPtr, vectorOffsets[j], alignedLoad); } } // compare all loaded bytes to the search value. // matching bytes are set to 0xff, non-matching bytes are set to 0x00. for (int i = 0; i < nValues; i++) { for (int j = 0; j < nVectors; j++) { emitVectorCompareInst(asm, kind, vectorSize, vecArray[(i * nVectors) + j], vecCmp[i]); } } // create 32-bit-masks from the most significant bit of every byte in the comparison // results. for (int i = 0; i < nValues * nVectors; i++) { emitMOVMSK(asm, vectorSize, cmpResult[i], vecArray[i]); } // join results of comparisons against multiple values for (int stride = 1; stride < nValues; stride *= 2) { for (int i = 0; i < nVectors; i++) { for (int j = 0; j + stride < nValues; j += stride * 2) { asm.orl(cmpResult[i + (j * nVectors)], cmpResult[i + ((j + stride) * nVectors)]); } } } // check if a match was found for (int i = 0; i < nVectors; i++) { asm.testl(cmpResult[i], cmpResult[i]); asm.jcc(AMD64Assembler.ConditionFlag.NotZero, vectorFound[i]); } } private static void emitVectorFoundWithOffset(AMD64MacroAssembler asm, JavaKind kind, int resultOffset, Register result, Register cmpResult, Register slotsRemaining, Label entry, Label ret) { asm.bind(entry); if (resultOffset > 0) { // adjust array pointer asm.addq(result, resultOffset); // adjust number of array slots remaining asm.subl(slotsRemaining, charMode(kind) ? resultOffset / 2 : resultOffset); } // find index of first set bit in bit mask asm.bsfq(cmpResult, cmpResult); // add offset to array pointer asm.addq(result, cmpResult); if (charMode(kind)) { // convert byte offset to chars asm.shrl(cmpResult, 1); } // adjust number of array slots remaining asm.subl(slotsRemaining, cmpResult); asm.jmpb(ret); } private static void emitArrayLoad(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register vecDst, Register arrayPtr, int offset, boolean alignedLoad) { AMD64Address src = new AMD64Address(arrayPtr, offset); if (asm.supports(CPUFeature.AVX)) { VexMoveOp loadOp = alignedLoad ? VexMoveOp.VMOVDQA : VexMoveOp.VMOVDQU; loadOp.emit(asm, vectorSize, vecDst, src); } else { // SSE asm.movdqu(vecDst, src); } } /** * Compares all packed bytes/words/dwords in {@code vecArray} to {@code vecCmp}. Matching values * are set to all ones (0xff, 0xffff, ...), non-matching values are set to zero. */ private static void emitVectorCompareInst(AMD64MacroAssembler asm, JavaKind kind, AVXKind.AVXSize vectorSize, Register vecArray, Register vecCmp) { switch (kind) { case Byte: if (asm.supports(CPUFeature.AVX)) { VexRVMOp.VPCMPEQB.emit(asm, vectorSize, vecArray, vecCmp, vecArray); } else { // SSE asm.pcmpeqb(vecArray, vecCmp); } break; case Short: case Char: if (asm.supports(CPUFeature.AVX)) { VexRVMOp.VPCMPEQW.emit(asm, vectorSize, vecArray, vecCmp, vecArray); } else { // SSE asm.pcmpeqw(vecArray, vecCmp); } break; case Int: if (asm.supports(CPUFeature.AVX)) { VexRVMOp.VPCMPEQD.emit(asm, vectorSize, vecArray, vecCmp, vecArray); } else { // SSE asm.pcmpeqd(vecArray, vecCmp); } break; default: throw new UnsupportedOperationException(); } } private static void emitMOVMSK(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register dst, Register vecSrc) { if (asm.supports(CPUFeature.AVX)) { VexRMOp.VPMOVMSKB.emit(asm, vectorSize, dst, vecSrc); } else { // SSE asm.pmovmskb(dst, vecSrc); } } private static boolean supportsAVX2(LIRGeneratorTool tool) { return supports(tool, CPUFeature.AVX2); } private static boolean supports(LIRGeneratorTool tool, CPUFeature cpuFeature) { return ((AMD64) tool.target().arch).getFeatures().contains(cpuFeature); } }