1 /*
   2  * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.lir.aarch64;
  26 
  27 import static jdk.vm.ci.aarch64.AArch64.zr;
  28 import static jdk.vm.ci.code.ValueUtil.asRegister;
  29 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
  30 
  31 import org.graalvm.compiler.asm.Label;
  32 import org.graalvm.compiler.asm.aarch64.AArch64Address;
  33 import org.graalvm.compiler.asm.aarch64.AArch64Assembler.ConditionFlag;
  34 import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler;
  35 import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.ScratchRegister;
  36 import org.graalvm.compiler.core.common.LIRKind;
  37 import org.graalvm.compiler.lir.LIRInstructionClass;
  38 import org.graalvm.compiler.lir.Opcode;
  39 import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
  40 import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
  41 
  42 import jdk.vm.ci.code.Register;
  43 import jdk.vm.ci.meta.JavaKind;
  44 import jdk.vm.ci.meta.Value;
  45 
  46 /**
  47  * Emits code which compares two arrays of the same length. If the CPU supports any vector
  48  * instructions specialized code is emitted to leverage these instructions.
  49  */
  50 @Opcode("ARRAY_EQUALS")
  51 public final class AArch64ArrayEqualsOp extends AArch64LIRInstruction {
  52     public static final LIRInstructionClass<AArch64ArrayEqualsOp> TYPE = LIRInstructionClass.create(AArch64ArrayEqualsOp.class);
  53 
  54     private final JavaKind kind;
  55     private final int arrayBaseOffset;
  56     private final int arrayIndexScale;
  57 
  58     @Def({REG}) protected Value resultValue;
  59     @Alive({REG}) protected Value array1Value;
  60     @Alive({REG}) protected Value array2Value;
  61     @Alive({REG}) protected Value lengthValue;
  62     @Temp({REG}) protected Value temp1;
  63     @Temp({REG}) protected Value temp2;
  64     @Temp({REG}) protected Value temp3;
  65     @Temp({REG}) protected Value temp4;
  66 
  67     public AArch64ArrayEqualsOp(LIRGeneratorTool tool, JavaKind kind, Value result, Value array1, Value array2, Value length, boolean directPointers) {
  68         super(TYPE);
  69 
  70         assert !kind.isNumericFloat() : "Float arrays comparison (bitwise_equal || both_NaN) isn't supported";
  71         this.kind = kind;
  72 
  73         this.arrayBaseOffset = directPointers ? 0 : tool.getProviders().getMetaAccess().getArrayBaseOffset(kind);
  74         this.arrayIndexScale = tool.getProviders().getMetaAccess().getArrayIndexScale(kind);
  75 
  76         this.resultValue = result;
  77         this.array1Value = array1;
  78         this.array2Value = array2;
  79         this.lengthValue = length;
  80 
  81         // Allocate some temporaries.
  82         this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
  83         this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
  84         this.temp3 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
  85         this.temp4 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
  86     }
  87 
  88     @Override
  89     public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
  90         Register result = asRegister(resultValue);
  91         Register array1 = asRegister(temp1);
  92         Register array2 = asRegister(temp2);
  93         Register length = asRegister(temp3);
  94 
  95         Label breakLabel = new Label();
  96 
  97         try (ScratchRegister sc1 = masm.getScratchRegister()) {
  98             Register rscratch1 = sc1.getRegister();
  99             // Load array base addresses.
 100             masm.lea(array1, AArch64Address.createUnscaledImmediateAddress(asRegister(array1Value), arrayBaseOffset));
 101             masm.lea(array2, AArch64Address.createUnscaledImmediateAddress(asRegister(array2Value), arrayBaseOffset));
 102 
 103             // Get array length in bytes.
 104             masm.mov(rscratch1, arrayIndexScale);
 105             masm.smaddl(length, asRegister(lengthValue), rscratch1, zr);
 106             masm.mov(64, result, length); // copy
 107 
 108             emit8ByteCompare(crb, masm, result, array1, array2, length, breakLabel, rscratch1);
 109             emitTailCompares(masm, result, array1, array2, breakLabel, rscratch1);
 110 
 111             // Return: rscratch1 is non-zero iff the arrays differ
 112             masm.bind(breakLabel);
 113             masm.cmp(64, rscratch1, zr);
 114             masm.cset(result, ConditionFlag.EQ);
 115         }
 116     }
 117 
 118     /**
 119      * Vector size used in {@link #emit8ByteCompare}.
 120      */
 121     private static final int VECTOR_SIZE = 8;
 122 
 123     /**
 124      * Emits code that uses 8-byte vector compares.
 125      *
 126      */
 127     private void emit8ByteCompare(CompilationResultBuilder crb, AArch64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label breakLabel,
 128                     Register rscratch1) {
 129         Label loop = new Label();
 130         Label compareTail = new Label();
 131 
 132         Register temp = asRegister(temp4);
 133 
 134         masm.and(64, result, result, VECTOR_SIZE - 1); // tail count (in bytes)
 135         masm.ands(64, length, length, ~(VECTOR_SIZE - 1));  // vector count (in bytes)
 136         masm.branchConditionally(ConditionFlag.EQ, compareTail);
 137 
 138         masm.lea(array1, AArch64Address.createRegisterOffsetAddress(array1, length, false));
 139         masm.lea(array2, AArch64Address.createRegisterOffsetAddress(array2, length, false));
 140         masm.sub(64, length, zr, length);
 141 
 142         // Align the main loop
 143         masm.align(crb.target.wordSize * 2);
 144         masm.bind(loop);
 145         masm.ldr(64, temp, AArch64Address.createRegisterOffsetAddress(array1, length, false));
 146         masm.ldr(64, rscratch1, AArch64Address.createRegisterOffsetAddress(array2, length, false));
 147         masm.eor(64, rscratch1, temp, rscratch1);
 148         masm.cbnz(64, rscratch1, breakLabel);
 149         masm.add(64, length, length, VECTOR_SIZE);
 150         masm.cbnz(64, length, loop);
 151 
 152         masm.cbz(64, result, breakLabel);
 153 
 154         /*
 155          * Compare the remaining bytes with an unaligned memory load aligned to the end of the
 156          * array.
 157          */
 158         masm.lea(array1, AArch64Address.createUnscaledImmediateAddress(array1, -VECTOR_SIZE));
 159         masm.lea(array2, AArch64Address.createUnscaledImmediateAddress(array2, -VECTOR_SIZE));
 160         masm.ldr(64, temp, AArch64Address.createRegisterOffsetAddress(array1, result, false));
 161         masm.ldr(64, rscratch1, AArch64Address.createRegisterOffsetAddress(array2, result, false));
 162         masm.eor(64, rscratch1, temp, rscratch1);
 163         masm.jmp(breakLabel);
 164 
 165         masm.bind(compareTail);
 166     }
 167 
 168     /**
 169      * Emits code to compare the remaining 1 to 4 bytes.
 170      *
 171      */
 172     private void emitTailCompares(AArch64MacroAssembler masm, Register result, Register array1, Register array2, Label breakLabel, Register rscratch1) {
 173         Label compare2Bytes = new Label();
 174         Label compare1Byte = new Label();
 175         Label end = new Label();
 176 
 177         Register temp = asRegister(temp4);
 178 
 179         if (kind.getByteCount() <= 4) {
 180             // Compare trailing 4 bytes, if any.
 181             masm.ands(32, zr, result, 4);
 182             masm.branchConditionally(ConditionFlag.EQ, compare2Bytes);
 183             masm.ldr(32, temp, AArch64Address.createPostIndexedImmediateAddress(array1, 4));
 184             masm.ldr(32, rscratch1, AArch64Address.createPostIndexedImmediateAddress(array2, 4));
 185             masm.eor(32, rscratch1, temp, rscratch1);
 186             masm.cbnz(32, rscratch1, breakLabel);
 187 
 188             if (kind.getByteCount() <= 2) {
 189                 // Compare trailing 2 bytes, if any.
 190                 masm.bind(compare2Bytes);
 191                 masm.ands(32, zr, result, 2);
 192                 masm.branchConditionally(ConditionFlag.EQ, compare1Byte);
 193                 masm.ldr(16, temp, AArch64Address.createPostIndexedImmediateAddress(array1, 2));
 194                 masm.ldr(16, rscratch1, AArch64Address.createPostIndexedImmediateAddress(array2, 2));
 195                 masm.eor(32, rscratch1, temp, rscratch1);
 196                 masm.cbnz(32, rscratch1, breakLabel);
 197 
 198                 // The one-byte tail compare is only required for boolean and byte arrays.
 199                 if (kind.getByteCount() <= 1) {
 200                     // Compare trailing byte, if any.
 201                     masm.bind(compare1Byte);
 202                     masm.ands(32, zr, result, 1);
 203                     masm.branchConditionally(ConditionFlag.EQ, end);
 204                     masm.ldr(8, temp, AArch64Address.createBaseRegisterOnlyAddress(array1));
 205                     masm.ldr(8, rscratch1, AArch64Address.createBaseRegisterOnlyAddress(array2));
 206                     masm.eor(32, rscratch1, temp, rscratch1);
 207                     masm.cbnz(32, rscratch1, breakLabel);
 208                 } else {
 209                     masm.bind(compare1Byte);
 210                 }
 211             } else {
 212                 masm.bind(compare2Bytes);
 213             }
 214         }
 215         masm.bind(end);
 216         masm.mov(64, rscratch1, zr);
 217     }
 218 }