1 /*
2 * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24
25 package org.graalvm.compiler.lir.amd64;
26
27 import jdk.vm.ci.amd64.AMD64;
28 import jdk.vm.ci.amd64.AMD64.CPUFeature;
29 import jdk.vm.ci.amd64.AMD64Kind;
30 import jdk.vm.ci.code.Register;
31 import jdk.vm.ci.code.TargetDescription;
32 import jdk.vm.ci.meta.JavaKind;
33 import jdk.vm.ci.meta.Value;
34 import org.graalvm.compiler.asm.Label;
35 import org.graalvm.compiler.asm.amd64.AMD64Address;
36 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
37 import org.graalvm.compiler.asm.amd64.AMD64Assembler;
38 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
39 import org.graalvm.compiler.asm.amd64.AMD64Assembler.SSEOp;
40 import org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize;
41 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
42 import org.graalvm.compiler.asm.amd64.AVXKind;
43 import org.graalvm.compiler.core.common.LIRKind;
44 import org.graalvm.compiler.debug.GraalError;
45 import org.graalvm.compiler.lir.LIRInstructionClass;
46 import org.graalvm.compiler.lir.Opcode;
47 import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
48 import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
49
50 import static jdk.vm.ci.code.ValueUtil.asRegister;
51 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
52 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
53 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
54
55 import java.util.Objects;
56
57 /**
58 * Emits code which compares two arrays of the same length. If the CPU supports any vector
59 * instructions specialized code is emitted to leverage these instructions.
60 *
61 * This op can also compare arrays of different integer types (e.g. {@code byte[]} and
62 * {@code char[]}) with on-the-fly sign- or zero-extension. If one of the given arrays is a
63 * {@code char[]} array, the smaller elements are zero-extended, otherwise they are sign-extended.
64 */
65 @Opcode("ARRAY_EQUALS")
66 public final class AMD64ArrayEqualsOp extends AMD64LIRInstruction {
67 public static final LIRInstructionClass<AMD64ArrayEqualsOp> TYPE = LIRInstructionClass.create(AMD64ArrayEqualsOp.class);
68
69 private final JavaKind kind1;
70 private final JavaKind kind2;
71 private final int arrayBaseOffset1;
72 private final int arrayBaseOffset2;
73 private final Scale arrayIndexScale1;
74 private final Scale arrayIndexScale2;
75 private final AVXKind.AVXSize vectorSize;
76 private final int constantLength;
77 private final boolean signExtend;
78
79 @Def({REG}) private Value resultValue;
80 @Alive({REG}) private Value array1Value;
81 @Alive({REG}) private Value array2Value;
82 @Alive({REG}) private Value lengthValue;
83 @Temp({REG, ILLEGAL}) private Value temp1;
84 @Temp({REG, ILLEGAL}) private Value temp2;
85 @Temp({REG}) private Value temp3;
86 @Temp({REG, ILLEGAL}) private Value temp4;
87
88 @Temp({REG, ILLEGAL}) private Value temp5;
89 @Temp({REG, ILLEGAL}) private Value tempXMM;
90
91 @Temp({REG, ILLEGAL}) private Value vectorTemp1;
92 @Temp({REG, ILLEGAL}) private Value vectorTemp2;
93 @Temp({REG, ILLEGAL}) private Value vectorTemp3;
94 @Temp({REG, ILLEGAL}) private Value vectorTemp4;
95
96 public AMD64ArrayEqualsOp(LIRGeneratorTool tool, JavaKind kind1, JavaKind kind2, Value result, Value array1, Value array2, Value length,
97 int constantLength, boolean directPointers, int maxVectorSize) {
98 super(TYPE);
99 this.kind1 = kind1;
100 this.kind2 = kind2;
101 this.signExtend = kind1 != JavaKind.Char && kind2 != JavaKind.Char;
102
103 assert kind1.isNumericInteger() && kind2.isNumericInteger() || kind1 == kind2;
104
105 this.arrayBaseOffset1 = directPointers ? 0 : tool.getProviders().getMetaAccess().getArrayBaseOffset(kind1);
106 this.arrayBaseOffset2 = directPointers ? 0 : tool.getProviders().getMetaAccess().getArrayBaseOffset(kind2);
107 this.arrayIndexScale1 = Objects.requireNonNull(Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(kind1)));
108 this.arrayIndexScale2 = Objects.requireNonNull(Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(kind2)));
109 this.vectorSize = ((AMD64) tool.target().arch).getFeatures().contains(CPUFeature.AVX2) && (maxVectorSize < 0 || maxVectorSize >= 32) ? AVXKind.AVXSize.YMM : AVXKind.AVXSize.XMM;
110 this.constantLength = constantLength;
111
112 this.resultValue = result;
113 this.array1Value = array1;
114 this.array2Value = array2;
115 this.lengthValue = length;
116
117 // Allocate some temporaries.
118 if (supportsSSE41(tool.target()) && canGenerateConstantLengthCompare(tool.target()) && !constantLengthCompareNeedsTmpArrayPointers()) {
119 this.temp1 = Value.ILLEGAL;
120 this.temp2 = Value.ILLEGAL;
121 } else {
122 this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
123 this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
124 }
125 this.temp3 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
126 if (supportsSSE41(tool.target()) && canGenerateConstantLengthCompare(tool.target())) {
127 this.temp4 = Value.ILLEGAL;
128 this.temp5 = Value.ILLEGAL;
129 } else {
130 this.temp4 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
145 LIRKind lirKind = LIRKind.value(vectorSize == AVXKind.AVXSize.YMM ? AMD64Kind.V256_BYTE : AMD64Kind.V128_BYTE);
146 this.vectorTemp1 = tool.newVariable(lirKind);
147 this.vectorTemp2 = tool.newVariable(lirKind);
148 this.vectorTemp3 = tool.newVariable(lirKind);
149 this.vectorTemp4 = tool.newVariable(lirKind);
150 } else {
151 this.vectorTemp1 = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
152 this.vectorTemp2 = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
153 this.vectorTemp3 = Value.ILLEGAL;
154 this.vectorTemp4 = Value.ILLEGAL;
155 }
156 } else {
157 this.vectorTemp1 = Value.ILLEGAL;
158 this.vectorTemp2 = Value.ILLEGAL;
159 this.vectorTemp3 = Value.ILLEGAL;
160 this.vectorTemp4 = Value.ILLEGAL;
161 }
162 }
163
164 private boolean canGenerateConstantLengthCompare(TargetDescription target) {
165 return constantLength >= 0 && kind1.isNumericInteger() && (kind1 == kind2 || getElementsPerVector(AVXKind.AVXSize.XMM) <= constantLength) && supportsSSE41(target);
166 }
167
168 @Override
169 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
170 Register result = asRegister(resultValue);
171
172 Label trueLabel = new Label();
173 Label falseLabel = new Label();
174 Label done = new Label();
175
176 if (canGenerateConstantLengthCompare(crb.target)) {
177 emitConstantLengthArrayCompareBytes(crb, masm, new Register[]{asRegister(vectorTemp1), asRegister(vectorTemp2), asRegister(vectorTemp3), asRegister(vectorTemp4)}, falseLabel);
178 } else {
179 Register array1 = asRegister(temp1);
180 Register array2 = asRegister(temp2);
181 // Load array base addresses.
182 masm.leaq(array1, new AMD64Address(asRegister(array1Value), arrayBaseOffset1));
183 masm.leaq(array2, new AMD64Address(asRegister(array2Value), arrayBaseOffset2));
184 Register length = asRegister(temp3);
185 // Get array length.
186 masm.movl(length, asRegister(lengthValue));
187 // copy
188 masm.movl(result, length);
189 emitArrayCompare(crb, masm, result, array1, array2, length, trueLabel, falseLabel);
190 }
191
192 // Return true
193 masm.bind(trueLabel);
194 masm.movl(result, 1);
195 masm.jmpb(done);
196
197 // Return false
198 masm.bind(falseLabel);
199 masm.xorl(result, result);
200
201 // That's it
202 masm.bind(done);
203 }
204
205 private void emitArrayCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm,
206 Register result, Register array1, Register array2, Register length,
699 Register base1, Register base2, Register index, int offset, Label falseLabel, int range) {
700 assert kind1.isNumericFloat();
701 Label loop = new Label();
702 Register i = asRegister(temp5);
703
704 masm.movq(i, range);
705 masm.negq(i);
706 // Align the main loop
707 masm.align(crb.target.wordSize * 2);
708 masm.bind(loop);
709 emitFloatCompare(masm, base1, base2, index, offset, falseLabel, range == 1);
710 masm.incrementq(index, 1);
711 masm.incrementq(i, 1);
712 masm.jccb(ConditionFlag.NotZero, loop);
713 // Floats within the range are equal, revert change to the register index
714 masm.subq(index, range);
715 }
716
717 private boolean constantLengthCompareNeedsTmpArrayPointers() {
718 AVXKind.AVXSize vSize = vectorSize;
719 if (constantLength < getElementsPerVector(vectorSize)) {
720 vSize = AVXKind.AVXSize.XMM;
721 }
722 int vectorCount = constantLength & ~(2 * getElementsPerVector(vSize) - 1);
723 return vectorCount > 0;
724 }
725
726 /**
727 * Emits specialized assembly for checking equality of memory regions
728 * {@code arrayPtr1[0..nBytes]} and {@code arrayPtr2[0..nBytes]}. If they match, execution
729 * continues directly after the emitted code block, otherwise we jump to {@code noMatch}.
730 */
731 private void emitConstantLengthArrayCompareBytes(
732 CompilationResultBuilder crb,
733 AMD64MacroAssembler asm,
734 Register[] tmpVectors,
735 Label noMatch) {
736 if (constantLength == 0) {
737 // do nothing
738 return;
739 }
740 Register arrayPtr1 = asRegister(array1Value);
741 Register arrayPtr2 = asRegister(array2Value);
742 Register tmp = asRegister(temp3);
743 AVXKind.AVXSize vSize = vectorSize;
744 if (constantLength < getElementsPerVector(vectorSize)) {
745 vSize = AVXKind.AVXSize.XMM;
746 }
747 int elementsPerVector = getElementsPerVector(vSize);
748 if (elementsPerVector > constantLength) {
749 assert kind1 == kind2;
750 int byteLength = constantLength << arrayIndexScale1.log2;
751 // array is shorter than any vector register, use regular XOR instructions
752 int movSize = (byteLength < 2) ? 1 : ((byteLength < 4) ? 2 : ((byteLength < 8) ? 4 : 8));
753 emitMovBytes(asm, tmp, new AMD64Address(arrayPtr1, arrayBaseOffset1), movSize);
754 emitXorBytes(asm, tmp, new AMD64Address(arrayPtr2, arrayBaseOffset2), movSize);
755 asm.jccb(AMD64Assembler.ConditionFlag.NotZero, noMatch);
756 if (byteLength > movSize) {
757 emitMovBytes(asm, tmp, new AMD64Address(arrayPtr1, arrayBaseOffset1 + byteLength - movSize), movSize);
758 emitXorBytes(asm, tmp, new AMD64Address(arrayPtr2, arrayBaseOffset2 + byteLength - movSize), movSize);
759 asm.jccb(AMD64Assembler.ConditionFlag.NotZero, noMatch);
760 }
761 } else {
762 int elementsPerVectorLoop = 2 * elementsPerVector;
763 int tailCount = constantLength & (elementsPerVectorLoop - 1);
764 int vectorCount = constantLength & ~(elementsPerVectorLoop - 1);
765 int bytesPerVector = vSize.getBytes();
766 if (vectorCount > 0) {
767 Label loopBegin = new Label();
768 Register tmpArrayPtr1 = asRegister(temp1);
769 Register tmpArrayPtr2 = asRegister(temp2);
770 asm.leaq(tmpArrayPtr1, new AMD64Address(arrayPtr1, vectorCount << arrayIndexScale1.log2));
771 asm.leaq(tmpArrayPtr2, new AMD64Address(arrayPtr2, vectorCount << arrayIndexScale2.log2));
772 arrayPtr1 = tmpArrayPtr1;
773 arrayPtr2 = tmpArrayPtr2;
774 asm.movq(tmp, -vectorCount);
775 asm.align(crb.target.wordSize * 2);
776 asm.bind(loopBegin);
777 emitVectorLoad1(asm, tmpVectors[0], arrayPtr1, tmp, arrayBaseOffset1, vSize);
778 emitVectorLoad2(asm, tmpVectors[1], arrayPtr2, tmp, arrayBaseOffset2, vSize);
779 emitVectorLoad1(asm, tmpVectors[2], arrayPtr1, tmp, arrayBaseOffset1 + scaleDisplacement1(bytesPerVector), vSize);
780 emitVectorLoad2(asm, tmpVectors[3], arrayPtr2, tmp, arrayBaseOffset2 + scaleDisplacement2(bytesPerVector), vSize);
781 emitVectorXor(asm, tmpVectors[0], tmpVectors[1], vSize);
782 emitVectorXor(asm, tmpVectors[2], tmpVectors[3], vSize);
783 emitVectorTest(asm, tmpVectors[0], vSize);
784 asm.jccb(AMD64Assembler.ConditionFlag.NotZero, noMatch);
|
1 /*
2 * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24
25 package org.graalvm.compiler.lir.amd64;
26
27 import static jdk.vm.ci.code.ValueUtil.asRegister;
28 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
29 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.CONST;
30 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
31 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
32
33 import java.util.Objects;
34
35 import org.graalvm.compiler.asm.Label;
36 import org.graalvm.compiler.asm.amd64.AMD64Address;
37 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
38 import org.graalvm.compiler.asm.amd64.AMD64Assembler;
39 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
40 import org.graalvm.compiler.asm.amd64.AMD64Assembler.SSEOp;
41 import org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize;
42 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
43 import org.graalvm.compiler.asm.amd64.AVXKind;
44 import org.graalvm.compiler.core.common.LIRKind;
45 import org.graalvm.compiler.debug.GraalError;
46 import org.graalvm.compiler.lir.LIRInstructionClass;
47 import org.graalvm.compiler.lir.LIRValueUtil;
48 import org.graalvm.compiler.lir.Opcode;
49 import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
50 import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
51
52 import jdk.vm.ci.amd64.AMD64;
53 import jdk.vm.ci.amd64.AMD64.CPUFeature;
54 import jdk.vm.ci.amd64.AMD64Kind;
55 import jdk.vm.ci.code.Register;
56 import jdk.vm.ci.code.TargetDescription;
57 import jdk.vm.ci.meta.JavaKind;
58 import jdk.vm.ci.meta.Value;
59
60 /**
61 * Emits code which compares two arrays of the same length. If the CPU supports any vector
62 * instructions specialized code is emitted to leverage these instructions.
63 *
64 * This op can also compare arrays of different integer types (e.g. {@code byte[]} and
65 * {@code char[]}) with on-the-fly sign- or zero-extension. If one of the given arrays is a
66 * {@code char[]} array, the smaller elements are zero-extended, otherwise they are sign-extended.
67 */
68 @Opcode("ARRAY_EQUALS")
69 public final class AMD64ArrayEqualsOp extends AMD64LIRInstruction {
70 public static final LIRInstructionClass<AMD64ArrayEqualsOp> TYPE = LIRInstructionClass.create(AMD64ArrayEqualsOp.class);
71
72 private final JavaKind kind1;
73 private final JavaKind kind2;
74 private final int arrayBaseOffset1;
75 private final int arrayBaseOffset2;
76 private final Scale arrayIndexScale1;
77 private final Scale arrayIndexScale2;
78 private final AVXKind.AVXSize vectorSize;
79 private final boolean signExtend;
80
81 @Def({REG}) private Value resultValue;
82 @Alive({REG}) private Value array1Value;
83 @Alive({REG}) private Value array2Value;
84 @Alive({REG, CONST}) private Value lengthValue;
85 @Temp({REG, ILLEGAL}) private Value temp1;
86 @Temp({REG, ILLEGAL}) private Value temp2;
87 @Temp({REG}) private Value temp3;
88 @Temp({REG, ILLEGAL}) private Value temp4;
89
90 @Temp({REG, ILLEGAL}) private Value temp5;
91 @Temp({REG, ILLEGAL}) private Value tempXMM;
92
93 @Temp({REG, ILLEGAL}) private Value vectorTemp1;
94 @Temp({REG, ILLEGAL}) private Value vectorTemp2;
95 @Temp({REG, ILLEGAL}) private Value vectorTemp3;
96 @Temp({REG, ILLEGAL}) private Value vectorTemp4;
97
98 public AMD64ArrayEqualsOp(LIRGeneratorTool tool, JavaKind kind1, JavaKind kind2, Value result, Value array1, Value array2, Value length,
99 boolean directPointers, int maxVectorSize) {
100 super(TYPE);
101 this.kind1 = kind1;
102 this.kind2 = kind2;
103 this.signExtend = kind1 != JavaKind.Char && kind2 != JavaKind.Char;
104
105 assert kind1.isNumericInteger() && kind2.isNumericInteger() || kind1 == kind2;
106
107 this.arrayBaseOffset1 = directPointers ? 0 : tool.getProviders().getMetaAccess().getArrayBaseOffset(kind1);
108 this.arrayBaseOffset2 = directPointers ? 0 : tool.getProviders().getMetaAccess().getArrayBaseOffset(kind2);
109 this.arrayIndexScale1 = Objects.requireNonNull(Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(kind1)));
110 this.arrayIndexScale2 = Objects.requireNonNull(Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(kind2)));
111 this.vectorSize = ((AMD64) tool.target().arch).getFeatures().contains(CPUFeature.AVX2) && (maxVectorSize < 0 || maxVectorSize >= 32) ? AVXKind.AVXSize.YMM : AVXKind.AVXSize.XMM;
112
113 this.resultValue = result;
114 this.array1Value = array1;
115 this.array2Value = array2;
116 this.lengthValue = length;
117
118 // Allocate some temporaries.
119 if (supportsSSE41(tool.target()) && canGenerateConstantLengthCompare(tool.target()) && !constantLengthCompareNeedsTmpArrayPointers()) {
120 this.temp1 = Value.ILLEGAL;
121 this.temp2 = Value.ILLEGAL;
122 } else {
123 this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
124 this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
125 }
126 this.temp3 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
127 if (supportsSSE41(tool.target()) && canGenerateConstantLengthCompare(tool.target())) {
128 this.temp4 = Value.ILLEGAL;
129 this.temp5 = Value.ILLEGAL;
130 } else {
131 this.temp4 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
146 LIRKind lirKind = LIRKind.value(vectorSize == AVXKind.AVXSize.YMM ? AMD64Kind.V256_BYTE : AMD64Kind.V128_BYTE);
147 this.vectorTemp1 = tool.newVariable(lirKind);
148 this.vectorTemp2 = tool.newVariable(lirKind);
149 this.vectorTemp3 = tool.newVariable(lirKind);
150 this.vectorTemp4 = tool.newVariable(lirKind);
151 } else {
152 this.vectorTemp1 = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
153 this.vectorTemp2 = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
154 this.vectorTemp3 = Value.ILLEGAL;
155 this.vectorTemp4 = Value.ILLEGAL;
156 }
157 } else {
158 this.vectorTemp1 = Value.ILLEGAL;
159 this.vectorTemp2 = Value.ILLEGAL;
160 this.vectorTemp3 = Value.ILLEGAL;
161 this.vectorTemp4 = Value.ILLEGAL;
162 }
163 }
164
165 private boolean canGenerateConstantLengthCompare(TargetDescription target) {
166 return LIRValueUtil.isJavaConstant(lengthValue) && kind1.isNumericInteger() && (kind1 == kind2 || getElementsPerVector(AVXKind.AVXSize.XMM) <= constantLength()) && supportsSSE41(target);
167 }
168
169 private int constantLength() {
170 return LIRValueUtil.asJavaConstant(lengthValue).asInt();
171 }
172
173 @Override
174 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
175 Register result = asRegister(resultValue);
176
177 Label trueLabel = new Label();
178 Label falseLabel = new Label();
179 Label done = new Label();
180
181 if (canGenerateConstantLengthCompare(crb.target)) {
182 emitConstantLengthArrayCompareBytes(crb, masm, new Register[]{asRegister(vectorTemp1), asRegister(vectorTemp2), asRegister(vectorTemp3), asRegister(vectorTemp4)}, falseLabel);
183 } else {
184 Register array1 = asRegister(temp1);
185 Register array2 = asRegister(temp2);
186 // Load array base addresses.
187 masm.leaq(array1, new AMD64Address(asRegister(array1Value), arrayBaseOffset1));
188 masm.leaq(array2, new AMD64Address(asRegister(array2Value), arrayBaseOffset2));
189 Register length = asRegister(temp3);
190 // Get array length.
191 if (LIRValueUtil.isJavaConstant(lengthValue)) {
192 masm.movl(length, constantLength());
193 } else {
194 masm.movl(length, asRegister(lengthValue));
195 }
196 // copy
197 masm.movl(result, length);
198 emitArrayCompare(crb, masm, result, array1, array2, length, trueLabel, falseLabel);
199 }
200
201 // Return true
202 masm.bind(trueLabel);
203 masm.movl(result, 1);
204 masm.jmpb(done);
205
206 // Return false
207 masm.bind(falseLabel);
208 masm.xorl(result, result);
209
210 // That's it
211 masm.bind(done);
212 }
213
214 private void emitArrayCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm,
215 Register result, Register array1, Register array2, Register length,
708 Register base1, Register base2, Register index, int offset, Label falseLabel, int range) {
709 assert kind1.isNumericFloat();
710 Label loop = new Label();
711 Register i = asRegister(temp5);
712
713 masm.movq(i, range);
714 masm.negq(i);
715 // Align the main loop
716 masm.align(crb.target.wordSize * 2);
717 masm.bind(loop);
718 emitFloatCompare(masm, base1, base2, index, offset, falseLabel, range == 1);
719 masm.incrementq(index, 1);
720 masm.incrementq(i, 1);
721 masm.jccb(ConditionFlag.NotZero, loop);
722 // Floats within the range are equal, revert change to the register index
723 masm.subq(index, range);
724 }
725
726 private boolean constantLengthCompareNeedsTmpArrayPointers() {
727 AVXKind.AVXSize vSize = vectorSize;
728 if (constantLength() < getElementsPerVector(vectorSize)) {
729 vSize = AVXKind.AVXSize.XMM;
730 }
731 int vectorCount = constantLength() & ~(2 * getElementsPerVector(vSize) - 1);
732 return vectorCount > 0;
733 }
734
735 /**
736 * Emits specialized assembly for checking equality of memory regions
737 * {@code arrayPtr1[0..nBytes]} and {@code arrayPtr2[0..nBytes]}. If they match, execution
738 * continues directly after the emitted code block, otherwise we jump to {@code noMatch}.
739 */
740 private void emitConstantLengthArrayCompareBytes(
741 CompilationResultBuilder crb,
742 AMD64MacroAssembler asm,
743 Register[] tmpVectors,
744 Label noMatch) {
745 if (constantLength() == 0) {
746 // do nothing
747 return;
748 }
749 Register arrayPtr1 = asRegister(array1Value);
750 Register arrayPtr2 = asRegister(array2Value);
751 Register tmp = asRegister(temp3);
752 AVXKind.AVXSize vSize = vectorSize;
753 if (constantLength() < getElementsPerVector(vectorSize)) {
754 vSize = AVXKind.AVXSize.XMM;
755 }
756 int elementsPerVector = getElementsPerVector(vSize);
757 if (elementsPerVector > constantLength()) {
758 assert kind1 == kind2;
759 int byteLength = constantLength() << arrayIndexScale1.log2;
760 // array is shorter than any vector register, use regular XOR instructions
761 int movSize = (byteLength < 2) ? 1 : ((byteLength < 4) ? 2 : ((byteLength < 8) ? 4 : 8));
762 emitMovBytes(asm, tmp, new AMD64Address(arrayPtr1, arrayBaseOffset1), movSize);
763 emitXorBytes(asm, tmp, new AMD64Address(arrayPtr2, arrayBaseOffset2), movSize);
764 asm.jccb(AMD64Assembler.ConditionFlag.NotZero, noMatch);
765 if (byteLength > movSize) {
766 emitMovBytes(asm, tmp, new AMD64Address(arrayPtr1, arrayBaseOffset1 + byteLength - movSize), movSize);
767 emitXorBytes(asm, tmp, new AMD64Address(arrayPtr2, arrayBaseOffset2 + byteLength - movSize), movSize);
768 asm.jccb(AMD64Assembler.ConditionFlag.NotZero, noMatch);
769 }
770 } else {
771 int elementsPerVectorLoop = 2 * elementsPerVector;
772 int tailCount = constantLength() & (elementsPerVectorLoop - 1);
773 int vectorCount = constantLength() & ~(elementsPerVectorLoop - 1);
774 int bytesPerVector = vSize.getBytes();
775 if (vectorCount > 0) {
776 Label loopBegin = new Label();
777 Register tmpArrayPtr1 = asRegister(temp1);
778 Register tmpArrayPtr2 = asRegister(temp2);
779 asm.leaq(tmpArrayPtr1, new AMD64Address(arrayPtr1, vectorCount << arrayIndexScale1.log2));
780 asm.leaq(tmpArrayPtr2, new AMD64Address(arrayPtr2, vectorCount << arrayIndexScale2.log2));
781 arrayPtr1 = tmpArrayPtr1;
782 arrayPtr2 = tmpArrayPtr2;
783 asm.movq(tmp, -vectorCount);
784 asm.align(crb.target.wordSize * 2);
785 asm.bind(loopBegin);
786 emitVectorLoad1(asm, tmpVectors[0], arrayPtr1, tmp, arrayBaseOffset1, vSize);
787 emitVectorLoad2(asm, tmpVectors[1], arrayPtr2, tmp, arrayBaseOffset2, vSize);
788 emitVectorLoad1(asm, tmpVectors[2], arrayPtr1, tmp, arrayBaseOffset1 + scaleDisplacement1(bytesPerVector), vSize);
789 emitVectorLoad2(asm, tmpVectors[3], arrayPtr2, tmp, arrayBaseOffset2 + scaleDisplacement2(bytesPerVector), vSize);
790 emitVectorXor(asm, tmpVectors[0], tmpVectors[1], vSize);
791 emitVectorXor(asm, tmpVectors[2], tmpVectors[3], vSize);
792 emitVectorTest(asm, tmpVectors[0], vSize);
793 asm.jccb(AMD64Assembler.ConditionFlag.NotZero, noMatch);
|