1 /* 2 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2019, Arm Limited and affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 */ 24 25 26 package org.graalvm.compiler.lir.aarch64; 27 28 import static jdk.vm.ci.aarch64.AArch64.zr; 29 import static jdk.vm.ci.code.ValueUtil.asRegister; 30 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; 31 32 import org.graalvm.compiler.asm.Label; 33 import org.graalvm.compiler.asm.aarch64.AArch64Address; 34 import org.graalvm.compiler.asm.aarch64.AArch64Assembler; 35 import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler; 36 import org.graalvm.compiler.lir.LIRInstructionClass; 37 import org.graalvm.compiler.lir.Opcode; 38 import org.graalvm.compiler.lir.asm.CompilationResultBuilder; 39 40 import jdk.vm.ci.code.CodeUtil; 41 import jdk.vm.ci.code.Register; 42 import jdk.vm.ci.meta.AllocatableValue; 43 44 /** 45 * Zero a chunk of memory on AArch64. 46 */ 47 @Opcode("ZERO_MEMORY") 48 public final class AArch64ZeroMemoryOp extends AArch64LIRInstruction { 49 public static final LIRInstructionClass<AArch64ZeroMemoryOp> TYPE = LIRInstructionClass.create(AArch64ZeroMemoryOp.class); 50 51 @Use({REG}) protected AllocatableValue addressValue; 52 @Use({REG}) protected AllocatableValue lengthValue; 53 54 private final boolean useDcZva; 55 private final int zvaLength; 56 57 /** 58 * Constructor of AArch64ZeroMemoryOp. 59 * 60 * @param address allocatable 8-byte aligned base address of the memory chunk. 61 * @param length allocatable length of the memory chunk, the value must be multiple of 8. 62 * @param useDcZva is DC ZVA instruction is able to use. 63 * @param zvaLength the ZVA length info of current AArch64 CPU, negative value indicates length 64 * is unknown at compile time. 65 */ 66 public AArch64ZeroMemoryOp(AllocatableValue address, AllocatableValue length, boolean useDcZva, int zvaLength) { 67 super(TYPE); 68 this.addressValue = address; 69 this.lengthValue = length; 70 this.useDcZva = useDcZva; 71 this.zvaLength = zvaLength; 72 } 73 74 @Override 75 protected void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { 76 Register base = asRegister(addressValue); 77 Register size = asRegister(lengthValue); 78 if (useDcZva && zvaLength > 0) { 79 // From ARMv8-A architecture reference manual D12.2.35 Data Cache Zero ID register: 80 // A valid ZVA length should be a power-of-2 value in [4, 2048] 81 assert (CodeUtil.isPowerOf2(zvaLength) && 4 <= zvaLength && zvaLength <= 2048); 82 emitZeroMemoryWithDc(masm, base, size, zvaLength); 83 } else { 84 // Use store pair instructions (STP) to zero memory as a fallback. 85 emitZeroMemoryWithStp(masm, base, size); 86 } 87 } 88 89 /** 90 * Zero a chunk of memory with DC ZVA instructions. 91 * 92 * @param masm the AArch64 macro assembler. 93 * @param base base an 8-byte aligned address of the memory chunk to be zeroed. 94 * @param size size of the memory chunk to be zeroed, in bytes, must be multiple of 8. 95 * @param zvaLength the ZVA length info of current AArch64 CPU. 96 */ 97 private static void emitZeroMemoryWithDc(AArch64MacroAssembler masm, Register base, Register size, int zvaLength) { 98 Label preLoop = new Label(); 99 Label zvaLoop = new Label(); 100 Label postLoop = new Label(); 101 Label tail = new Label(); 102 Label done = new Label(); 103 104 try (AArch64MacroAssembler.ScratchRegister sc1 = masm.getScratchRegister()) { 105 Register rscratch1 = sc1.getRegister(); 106 107 // Count number of bytes to be pre-zeroed to align base address with ZVA length. 108 masm.neg(64, rscratch1, base); 109 masm.and(64, rscratch1, rscratch1, zvaLength - 1); 110 111 // Is size less than number of bytes to be pre-zeroed? Jump to POST_LOOP if so. 112 masm.cmp(64, size, rscratch1); 113 masm.branchConditionally(AArch64Assembler.ConditionFlag.LE, postLoop); 114 masm.sub(64, size, size, rscratch1); 115 116 // Pre-ZVA loop. 117 masm.bind(preLoop); 118 masm.subs(64, rscratch1, rscratch1, 8); 119 masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, zvaLoop); 120 masm.str(64, zr, AArch64Address.createPostIndexedImmediateAddress(base, 8)); 121 masm.jmp(preLoop); 122 123 // ZVA loop. 124 masm.bind(zvaLoop); 125 masm.subs(64, size, size, zvaLength); 126 masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, tail); 127 masm.dc(AArch64Assembler.DataCacheOperationType.ZVA, base); 128 masm.add(64, base, base, zvaLength); 129 masm.jmp(zvaLoop); 130 131 // Handle bytes after ZVA loop. 132 masm.bind(tail); 133 masm.add(64, size, size, zvaLength); 134 135 // Post-ZVA loop. 136 masm.bind(postLoop); 137 masm.subs(64, size, size, 8); 138 masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, done); 139 masm.str(64, zr, AArch64Address.createPostIndexedImmediateAddress(base, 8)); 140 masm.jmp(postLoop); 141 142 // Done. 143 masm.bind(done); 144 } 145 } 146 147 /** 148 * Zero a chunk of memory with STP instructions. 149 * 150 * @param masm the AArch64 macro assembler. 151 * @param base base an 8-byte aligned address of the memory chunk to be zeroed. 152 * @param size size of the memory chunk to be zeroed, in bytes, must be multiple of 8. 153 */ 154 private static void emitZeroMemoryWithStp(AArch64MacroAssembler masm, Register base, Register size) { 155 Label loop = new Label(); 156 Label tail = new Label(); 157 Label done = new Label(); 158 159 // Jump to DONE if size is zero. 160 masm.cbz(64, size, done); 161 162 // Is base address already 16-byte aligned? Jump to LDP loop if so. 163 masm.tbz(base, 3, loop); 164 masm.sub(64, size, size, 8); 165 masm.str(64, zr, AArch64Address.createPostIndexedImmediateAddress(base, 8)); 166 167 // The STP loop that zeros 16 bytes in each iteration. 168 masm.bind(loop); 169 masm.subs(64, size, size, 16); 170 masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, tail); 171 masm.stp(64, zr, zr, AArch64Address.createPostIndexedImmediateAddress(base, 2)); 172 masm.jmp(loop); 173 174 // We may need to zero the tail 8 bytes of the memory chunk. 175 masm.bind(tail); 176 masm.adds(64, size, size, 16); 177 masm.branchConditionally(AArch64Assembler.ConditionFlag.EQ, done); 178 masm.str(64, zr, AArch64Address.createPostIndexedImmediateAddress(base, 8)); 179 180 // Done. 181 masm.bind(done); 182 } 183 }