1 /*
   2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2019, Arm Limited and affiliates. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 
  26 package org.graalvm.compiler.lir.aarch64;
  27 
  28 import static jdk.vm.ci.aarch64.AArch64.zr;
  29 import static jdk.vm.ci.code.ValueUtil.asRegister;
  30 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
  31 
  32 import org.graalvm.compiler.asm.Label;
  33 import org.graalvm.compiler.asm.aarch64.AArch64Address;
  34 import org.graalvm.compiler.asm.aarch64.AArch64Assembler;
  35 import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler;
  36 import org.graalvm.compiler.lir.LIRInstructionClass;
  37 import org.graalvm.compiler.lir.Opcode;
  38 import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
  39 
  40 import jdk.vm.ci.code.CodeUtil;
  41 import jdk.vm.ci.code.Register;
  42 import jdk.vm.ci.meta.AllocatableValue;
  43 
  44 /**
  45  * Zero a chunk of memory on AArch64.
  46  */
  47 @Opcode("ZERO_MEMORY")
  48 public final class AArch64ZeroMemoryOp extends AArch64LIRInstruction {
  49     public static final LIRInstructionClass<AArch64ZeroMemoryOp> TYPE = LIRInstructionClass.create(AArch64ZeroMemoryOp.class);
  50 
  51     @Use({REG}) protected AllocatableValue addressValue;
  52     @Use({REG}) protected AllocatableValue lengthValue;
  53 
  54     private final boolean useDcZva;
  55     private final int zvaLength;
  56 
  57     /**
  58      * Constructor of AArch64ZeroMemoryOp.
  59      *
  60      * @param address allocatable 8-byte aligned base address of the memory chunk.
  61      * @param length allocatable length of the memory chunk, the value must be multiple of 8.
  62      * @param useDcZva is DC ZVA instruction is able to use.
  63      * @param zvaLength the ZVA length info of current AArch64 CPU, negative value indicates length
  64      *            is unknown at compile time.
  65      */
  66     public AArch64ZeroMemoryOp(AllocatableValue address, AllocatableValue length, boolean useDcZva, int zvaLength) {
  67         super(TYPE);
  68         this.addressValue = address;
  69         this.lengthValue = length;
  70         this.useDcZva = useDcZva;
  71         this.zvaLength = zvaLength;
  72     }
  73 
  74     @Override
  75     protected void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
  76         Register base = asRegister(addressValue);
  77         Register size = asRegister(lengthValue);
  78         if (useDcZva && zvaLength > 0) {
  79             // From ARMv8-A architecture reference manual D12.2.35 Data Cache Zero ID register:
  80             // A valid ZVA length should be a power-of-2 value in [4, 2048]
  81             assert (CodeUtil.isPowerOf2(zvaLength) && 4 <= zvaLength && zvaLength <= 2048);
  82             emitZeroMemoryWithDc(masm, base, size, zvaLength);
  83         } else {
  84             // Use store pair instructions (STP) to zero memory as a fallback.
  85             emitZeroMemoryWithStp(masm, base, size);
  86         }
  87     }
  88 
  89     /**
  90      * Zero a chunk of memory with DC ZVA instructions.
  91      *
  92      * @param masm the AArch64 macro assembler.
  93      * @param base base an 8-byte aligned address of the memory chunk to be zeroed.
  94      * @param size size of the memory chunk to be zeroed, in bytes, must be multiple of 8.
  95      * @param zvaLength the ZVA length info of current AArch64 CPU.
  96      */
  97     private static void emitZeroMemoryWithDc(AArch64MacroAssembler masm, Register base, Register size, int zvaLength) {
  98         Label preLoop = new Label();
  99         Label zvaLoop = new Label();
 100         Label postLoop = new Label();
 101         Label tail = new Label();
 102         Label done = new Label();
 103 
 104         try (AArch64MacroAssembler.ScratchRegister sc1 = masm.getScratchRegister()) {
 105             Register rscratch1 = sc1.getRegister();
 106 
 107             // Count number of bytes to be pre-zeroed to align base address with ZVA length.
 108             masm.neg(64, rscratch1, base);
 109             masm.and(64, rscratch1, rscratch1, zvaLength - 1);
 110 
 111             // Is size less than number of bytes to be pre-zeroed? Jump to POST_LOOP if so.
 112             masm.cmp(64, size, rscratch1);
 113             masm.branchConditionally(AArch64Assembler.ConditionFlag.LE, postLoop);
 114             masm.sub(64, size, size, rscratch1);
 115 
 116             // Pre-ZVA loop.
 117             masm.bind(preLoop);
 118             masm.subs(64, rscratch1, rscratch1, 8);
 119             masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, zvaLoop);
 120             masm.str(64, zr, AArch64Address.createPostIndexedImmediateAddress(base, 8));
 121             masm.jmp(preLoop);
 122 
 123             // ZVA loop.
 124             masm.bind(zvaLoop);
 125             masm.subs(64, size, size, zvaLength);
 126             masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, tail);
 127             masm.dc(AArch64Assembler.DataCacheOperationType.ZVA, base);
 128             masm.add(64, base, base, zvaLength);
 129             masm.jmp(zvaLoop);
 130 
 131             // Handle bytes after ZVA loop.
 132             masm.bind(tail);
 133             masm.add(64, size, size, zvaLength);
 134 
 135             // Post-ZVA loop.
 136             masm.bind(postLoop);
 137             masm.subs(64, size, size, 8);
 138             masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, done);
 139             masm.str(64, zr, AArch64Address.createPostIndexedImmediateAddress(base, 8));
 140             masm.jmp(postLoop);
 141 
 142             // Done.
 143             masm.bind(done);
 144         }
 145     }
 146 
 147     /**
 148      * Zero a chunk of memory with STP instructions.
 149      *
 150      * @param masm the AArch64 macro assembler.
 151      * @param base base an 8-byte aligned address of the memory chunk to be zeroed.
 152      * @param size size of the memory chunk to be zeroed, in bytes, must be multiple of 8.
 153      */
 154     private static void emitZeroMemoryWithStp(AArch64MacroAssembler masm, Register base, Register size) {
 155         Label loop = new Label();
 156         Label tail = new Label();
 157         Label done = new Label();
 158 
 159         // Jump to DONE if size is zero.
 160         masm.cbz(64, size, done);
 161 
 162         // Is base address already 16-byte aligned? Jump to LDP loop if so.
 163         masm.tbz(base, 3, loop);
 164         masm.sub(64, size, size, 8);
 165         masm.str(64, zr, AArch64Address.createPostIndexedImmediateAddress(base, 8));
 166 
 167         // The STP loop that zeros 16 bytes in each iteration.
 168         masm.bind(loop);
 169         masm.subs(64, size, size, 16);
 170         masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, tail);
 171         masm.stp(64, zr, zr, AArch64Address.createPostIndexedImmediateAddress(base, 2));
 172         masm.jmp(loop);
 173 
 174         // We may need to zero the tail 8 bytes of the memory chunk.
 175         masm.bind(tail);
 176         masm.adds(64, size, size, 16);
 177         masm.branchConditionally(AArch64Assembler.ConditionFlag.EQ, done);
 178         masm.str(64, zr, AArch64Address.createPostIndexedImmediateAddress(base, 8));
 179 
 180         // Done.
 181         masm.bind(done);
 182     }
 183 }