1 /*
   2  * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.graalvm.compiler.asm.aarch64;
  25 
  26 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.BASE_REGISTER_ONLY;
  27 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.EXTENDED_REGISTER_OFFSET;
  28 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SCALED;
  29 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_UNSCALED;
  30 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.REGISTER_OFFSET;
  31 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_BASE;
  32 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_INDEX;
  33 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.NO_WORK;
  34 import static jdk.vm.ci.aarch64.AArch64.CPU;
  35 import static jdk.vm.ci.aarch64.AArch64.r8;
  36 import static jdk.vm.ci.aarch64.AArch64.r9;
  37 import static jdk.vm.ci.aarch64.AArch64.sp;
  38 import static jdk.vm.ci.aarch64.AArch64.zr;
  39 
  40 import org.graalvm.compiler.asm.AbstractAddress;
  41 import org.graalvm.compiler.asm.Label;
  42 import org.graalvm.compiler.asm.NumUtil;
  43 import org.graalvm.compiler.debug.GraalError;
  44 
  45 import jdk.vm.ci.aarch64.AArch64;
  46 import jdk.vm.ci.code.Register;
  47 import jdk.vm.ci.code.TargetDescription;
  48 
  49 public class AArch64MacroAssembler extends AArch64Assembler {
  50 
  51     private final ScratchRegister[] scratchRegister = new ScratchRegister[]{new ScratchRegister(r8), new ScratchRegister(r9)};
  52 
  53     // Points to the next free scratch register
  54     private int nextFreeScratchRegister = 0;
  55 
  56     public AArch64MacroAssembler(TargetDescription target) {
  57         super(target);
  58     }
  59 
  60     public class ScratchRegister implements AutoCloseable {
  61         private final Register register;
  62 
  63         public ScratchRegister(Register register) {
  64             this.register = register;
  65         }
  66 
  67         public Register getRegister() {
  68             return register;
  69         }
  70 
  71         @Override
  72         public void close() {
  73             assert nextFreeScratchRegister > 0 : "Close called too often";
  74             nextFreeScratchRegister--;
  75         }
  76     }
  77 
  78     public ScratchRegister getScratchRegister() {
  79         return scratchRegister[nextFreeScratchRegister++];
  80     }
  81 
  82     /**
  83      * Specifies what actions have to be taken to turn an arbitrary address of the form
  84      * {@code base + displacement [+ index [<< scale]]} into a valid AArch64Address.
  85      */
  86     public static class AddressGenerationPlan {
  87         public final WorkPlan workPlan;
  88         public final AArch64Address.AddressingMode addressingMode;
  89         public final boolean needsScratch;
  90 
  91         public enum WorkPlan {
  92             /**
  93              * Can be used as-is without extra work.
  94              */
  95             NO_WORK,
  96             /**
  97              * Add scaled displacement to index register.
  98              */
  99             ADD_TO_INDEX,
 100             /**
 101              * Add unscaled displacement to base register.
 102              */
 103             ADD_TO_BASE,
 104         }
 105 
 106         /**
 107          * @param workPlan Work necessary to generate a valid address.
 108          * @param addressingMode Addressing mode of generated address.
 109          * @param needsScratch True if generating address needs a scatch register, false otherwise.
 110          */
 111         public AddressGenerationPlan(WorkPlan workPlan, AArch64Address.AddressingMode addressingMode, boolean needsScratch) {
 112             this.workPlan = workPlan;
 113             this.addressingMode = addressingMode;
 114             this.needsScratch = needsScratch;
 115         }
 116     }
 117 
 118     /**
 119      * Generates an addressplan for an address of the form
 120      * {@code base + displacement [+ index [<< log2(transferSize)]]} with the index register and
 121      * scaling being optional.
 122      *
 123      * @param displacement an arbitrary displacement.
 124      * @param hasIndexRegister true if the address uses an index register, false otherwise. non null
 125      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 126      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 127      * @return AddressGenerationPlan that specifies the actions necessary to generate a valid
 128      *         AArch64Address for the given parameters.
 129      */
 130     public static AddressGenerationPlan generateAddressPlan(long displacement, boolean hasIndexRegister, int transferSize) {
 131         assert transferSize == 0 || transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 132         boolean indexScaled = transferSize != 0;
 133         int log2Scale = NumUtil.log2Ceil(transferSize);
 134         long scaledDisplacement = displacement >> log2Scale;
 135         boolean displacementScalable = indexScaled && (displacement & (transferSize - 1)) == 0;
 136         if (displacement == 0) {
 137             // register offset without any work beforehand.
 138             return new AddressGenerationPlan(NO_WORK, REGISTER_OFFSET, false);
 139         } else {
 140             if (hasIndexRegister) {
 141                 if (displacementScalable) {
 142                     boolean needsScratch = !isArithmeticImmediate(scaledDisplacement);
 143                     return new AddressGenerationPlan(ADD_TO_INDEX, REGISTER_OFFSET, needsScratch);
 144                 } else {
 145                     boolean needsScratch = !isArithmeticImmediate(displacement);
 146                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 147                 }
 148             } else {
 149                 if (NumUtil.isSignedNbit(9, displacement)) {
 150                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_UNSCALED, false);
 151                 } else if (displacementScalable && NumUtil.isUnsignedNbit(12, scaledDisplacement)) {
 152                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_SCALED, false);
 153                 } else {
 154                     boolean needsScratch = !isArithmeticImmediate(displacement);
 155                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 156                 }
 157             }
 158         }
 159     }
 160 
 161     /**
 162      * Returns an AArch64Address pointing to
 163      * {@code base + displacement + index << log2(transferSize)}.
 164      *
 165      * @param base general purpose register. May not be null or the zero register.
 166      * @param displacement arbitrary displacement added to base.
 167      * @param index general purpose register. May not be null or the stack pointer.
 168      * @param signExtendIndex if true consider index register a word register that should be
 169      *            sign-extended before being added.
 170      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 171      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 172      * @param additionalReg additional register used either as a scratch register or as part of the
 173      *            final address, depending on whether allowOverwrite is true or not. May not be null
 174      *            or stackpointer.
 175      * @param allowOverwrite if true allows to change value of base or index register to generate
 176      *            address.
 177      * @return AArch64Address pointing to memory at
 178      *         {@code base + displacement + index << log2(transferSize)}.
 179      */
 180     public AArch64Address makeAddress(Register base, long displacement, Register index, boolean signExtendIndex, int transferSize, Register additionalReg, boolean allowOverwrite) {
 181         AddressGenerationPlan plan = generateAddressPlan(displacement, !index.equals(zr), transferSize);
 182         assert allowOverwrite || !zr.equals(additionalReg) || plan.workPlan == NO_WORK;
 183         assert !plan.needsScratch || !zr.equals(additionalReg);
 184         int log2Scale = NumUtil.log2Ceil(transferSize);
 185         long scaledDisplacement = displacement >> log2Scale;
 186         Register newIndex = index;
 187         Register newBase = base;
 188         int immediate;
 189         switch (plan.workPlan) {
 190             case NO_WORK:
 191                 if (plan.addressingMode == IMMEDIATE_SCALED) {
 192                     immediate = (int) scaledDisplacement;
 193                 } else {
 194                     immediate = (int) displacement;
 195                 }
 196                 break;
 197             case ADD_TO_INDEX:
 198                 newIndex = allowOverwrite ? index : additionalReg;
 199                 if (plan.needsScratch) {
 200                     mov(additionalReg, scaledDisplacement);
 201                     add(signExtendIndex ? 32 : 64, newIndex, index, additionalReg);
 202                 } else {
 203                     add(signExtendIndex ? 32 : 64, newIndex, index, (int) scaledDisplacement);
 204                 }
 205                 immediate = 0;
 206                 break;
 207             case ADD_TO_BASE:
 208                 newBase = allowOverwrite ? base : additionalReg;
 209                 if (plan.needsScratch) {
 210                     mov(additionalReg, displacement);
 211                     add(64, newBase, base, additionalReg);
 212                 } else {
 213                     add(64, newBase, base, (int) displacement);
 214                 }
 215                 immediate = 0;
 216                 break;
 217             default:
 218                 throw GraalError.shouldNotReachHere();
 219         }
 220         AArch64Address.AddressingMode addressingMode = plan.addressingMode;
 221         ExtendType extendType = null;
 222         if (addressingMode == REGISTER_OFFSET) {
 223             if (newIndex.equals(zr)) {
 224                 addressingMode = BASE_REGISTER_ONLY;
 225             } else if (signExtendIndex) {
 226                 addressingMode = EXTENDED_REGISTER_OFFSET;
 227                 extendType = ExtendType.SXTW;
 228             }
 229         }
 230         return AArch64Address.createAddress(addressingMode, newBase, newIndex, immediate, transferSize != 0, extendType);
 231     }
 232 
 233     /**
 234      * Returns an AArch64Address pointing to {@code base + displacement}. Specifies the memory
 235      * transfer size to allow some optimizations when building the address.
 236      *
 237      * @param base general purpose register. May not be null or the zero register.
 238      * @param displacement arbitrary displacement added to base.
 239      * @param transferSize the memory transfer size in bytes.
 240      * @param additionalReg additional register used either as a scratch register or as part of the
 241      *            final address, depending on whether allowOverwrite is true or not. May not be
 242      *            null, zero register or stackpointer.
 243      * @param allowOverwrite if true allows to change value of base or index register to generate
 244      *            address.
 245      * @return AArch64Address pointing to memory at {@code base + displacement}.
 246      */
 247     public AArch64Address makeAddress(Register base, long displacement, Register additionalReg, int transferSize, boolean allowOverwrite) {
 248         assert additionalReg.getRegisterCategory().equals(CPU);
 249         return makeAddress(base, displacement, zr, /* sign-extend */false, transferSize, additionalReg, allowOverwrite);
 250     }
 251 
 252     /**
 253      * Returns an AArch64Address pointing to {@code base + displacement}. Fails if address cannot be
 254      * represented without overwriting base register or using a scratch register.
 255      *
 256      * @param base general purpose register. May not be null or the zero register.
 257      * @param displacement arbitrary displacement added to base.
 258      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 259      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 260      * @return AArch64Address pointing to memory at {@code base + displacement}.
 261      */
 262     public AArch64Address makeAddress(Register base, long displacement, int transferSize) {
 263         return makeAddress(base, displacement, zr, /* signExtend */false, transferSize, zr, /* allowOverwrite */false);
 264     }
 265 
 266     /**
 267      * Loads memory address into register.
 268      *
 269      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 270      * @param address address whose value is loaded into dst. May not be null,
 271      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_POST_INDEXED
 272      *            POST_INDEXED} or
 273      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_PRE_INDEXED
 274      *            IMMEDIATE_PRE_INDEXED}
 275      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 276      *            the index register is scaled. Can be 1, 2, 4 or 8.
 277      */
 278     public void loadAddress(Register dst, AArch64Address address, int transferSize) {
 279         assert transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 280         assert dst.getRegisterCategory().equals(CPU);
 281         int shiftAmt = NumUtil.log2Ceil(transferSize);
 282         switch (address.getAddressingMode()) {
 283             case IMMEDIATE_SCALED:
 284                 int scaledImmediate = address.getImmediateRaw() << shiftAmt;
 285                 int lowerBits = scaledImmediate & NumUtil.getNbitNumberInt(12);
 286                 int higherBits = scaledImmediate & ~NumUtil.getNbitNumberInt(12);
 287                 boolean firstAdd = true;
 288                 if (lowerBits != 0) {
 289                     add(64, dst, address.getBase(), lowerBits);
 290                     firstAdd = false;
 291                 }
 292                 if (higherBits != 0) {
 293                     Register src = firstAdd ? address.getBase() : dst;
 294                     add(64, dst, src, higherBits);
 295                 }
 296                 break;
 297             case IMMEDIATE_UNSCALED:
 298                 int immediate = address.getImmediateRaw();
 299                 add(64, dst, address.getBase(), immediate);
 300                 break;
 301             case REGISTER_OFFSET:
 302                 add(64, dst, address.getBase(), address.getOffset(), ShiftType.LSL, address.isScaled() ? shiftAmt : 0);
 303                 break;
 304             case EXTENDED_REGISTER_OFFSET:
 305                 add(64, dst, address.getBase(), address.getOffset(), address.getExtendType(), address.isScaled() ? shiftAmt : 0);
 306                 break;
 307             case PC_LITERAL:
 308                 super.adr(dst, address.getImmediateRaw());
 309                 break;
 310             case BASE_REGISTER_ONLY:
 311                 movx(dst, address.getBase());
 312                 break;
 313             default:
 314                 throw GraalError.shouldNotReachHere();
 315         }
 316     }
 317 
 318     public void movx(Register dst, Register src) {
 319         mov(64, dst, src);
 320     }
 321 
 322     public void mov(int size, Register dst, Register src) {
 323         if (dst.equals(sp) || src.equals(sp)) {
 324             add(size, dst, src, 0);
 325         } else {
 326             or(size, dst, zr, src);
 327         }
 328     }
 329 
 330     /**
 331      * Generates a 64-bit immediate move code sequence.
 332      *
 333      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 334      * @param imm
 335      */
 336     private void mov64(Register dst, long imm) {
 337         // We have to move all non zero parts of the immediate in 16-bit chunks
 338         boolean firstMove = true;
 339         for (int offset = 0; offset < 64; offset += 16) {
 340             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 341             if (chunk == 0) {
 342                 continue;
 343             }
 344             if (firstMove) {
 345                 movz(64, dst, chunk, offset);
 346                 firstMove = false;
 347             } else {
 348                 movk(64, dst, chunk, offset);
 349             }
 350         }
 351         assert !firstMove;
 352     }
 353 
 354     /**
 355      * Loads immediate into register.
 356      *
 357      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 358      * @param imm immediate loaded into register.
 359      */
 360     public void mov(Register dst, long imm) {
 361         assert dst.getRegisterCategory().equals(CPU);
 362         if (imm == 0L) {
 363             movx(dst, zr);
 364         } else if (LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 365             or(64, dst, zr, imm);
 366         } else if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 367             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 368             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 369             // then sign extend
 370             // them. This allows us to cover immediates like ~1L with 2 instructions.
 371             mov(dst, (int) imm);
 372             sxt(64, 32, dst, dst);
 373         } else {
 374             mov64(dst, imm);
 375         }
 376     }
 377 
 378     /**
 379      * Loads immediate into register.
 380      *
 381      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 382      * @param imm immediate loaded into register.
 383      */
 384     public void mov(Register dst, int imm) {
 385         mov(dst, imm & 0xFFFF_FFFFL);
 386     }
 387 
 388     /**
 389      * Generates a 48-bit immediate move code sequence. The immediate may later be updated by
 390      * HotSpot.
 391      *
 392      * In AArch64 mode the virtual address space is 48-bits in size, so we only need three
 393      * instructions to create a patchable instruction sequence that can reach anywhere.
 394      *
 395      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 396      * @param imm
 397      */
 398     public void movNativeAddress(Register dst, long imm) {
 399         assert (imm & 0xFFFF_0000_0000_0000L) == 0;
 400         // We have to move all non zero parts of the immediate in 16-bit chunks
 401         boolean firstMove = true;
 402         for (int offset = 0; offset < 48; offset += 16) {
 403             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 404             if (firstMove) {
 405                 movz(64, dst, chunk, offset);
 406                 firstMove = false;
 407             } else {
 408                 movk(64, dst, chunk, offset);
 409             }
 410         }
 411         assert !firstMove;
 412     }
 413 
 414     /**
 415      * @return Number of instructions necessary to load immediate into register.
 416      */
 417     public static int nrInstructionsToMoveImmediate(long imm) {
 418         if (imm == 0L || LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 419             return 1;
 420         }
 421         if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 422             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 423             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 424             // then sign extend
 425             // them. This allows us to cover immediates like ~1L with 2 instructions.
 426             return 2;
 427         }
 428         int nrInstructions = 0;
 429         for (int offset = 0; offset < 64; offset += 16) {
 430             int part = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 431             if (part != 0) {
 432                 nrInstructions++;
 433             }
 434         }
 435         return nrInstructions;
 436     }
 437 
 438     /**
 439      * Loads a srcSize value from address into rt sign-extending it if necessary.
 440      *
 441      * @param targetSize size of target register in bits. Must be 32 or 64.
 442      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 443      *            targetSize.
 444      * @param rt general purpose register. May not be null or stackpointer.
 445      * @param address all addressing modes allowed. May not be null.
 446      */
 447     @Override
 448     public void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
 449         assert targetSize == 32 || targetSize == 64;
 450         assert srcSize <= targetSize;
 451         if (targetSize == srcSize) {
 452             super.ldr(srcSize, rt, address);
 453         } else {
 454             super.ldrs(targetSize, srcSize, rt, address);
 455         }
 456     }
 457 
 458     /**
 459      * Conditional move. dst = src1 if condition else src2.
 460      *
 461      * @param size register size. Has to be 32 or 64.
 462      * @param result general purpose register. May not be null or the stackpointer.
 463      * @param trueValue general purpose register. May not be null or the stackpointer.
 464      * @param falseValue general purpose register. May not be null or the stackpointer.
 465      * @param cond any condition flag. May not be null.
 466      */
 467     public void cmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag cond) {
 468         super.csel(size, result, trueValue, falseValue, cond);
 469     }
 470 
 471     /**
 472      * Conditional set. dst = 1 if condition else 0.
 473      *
 474      * @param dst general purpose register. May not be null or stackpointer.
 475      * @param condition any condition. May not be null.
 476      */
 477     public void cset(Register dst, ConditionFlag condition) {
 478         super.csinc(32, dst, zr, zr, condition.negate());
 479     }
 480 
 481     /**
 482      * dst = src1 + src2.
 483      *
 484      * @param size register size. Has to be 32 or 64.
 485      * @param dst general purpose register. May not be null or stackpointer.
 486      * @param src1 general purpose register. May not be null or stackpointer.
 487      * @param src2 general purpose register. May not be null or stackpointer.
 488      */
 489     public void add(int size, Register dst, Register src1, Register src2) {
 490         super.add(size, dst, src1, src2, ShiftType.LSL, 0);
 491     }
 492 
 493     /**
 494      * dst = src1 + src2 and sets condition flags.
 495      *
 496      * @param size register size. Has to be 32 or 64.
 497      * @param dst general purpose register. May not be null or stackpointer.
 498      * @param src1 general purpose register. May not be null or stackpointer.
 499      * @param src2 general purpose register. May not be null or stackpointer.
 500      */
 501     public void adds(int size, Register dst, Register src1, Register src2) {
 502         super.adds(size, dst, src1, src2, getNopExtendType(size), 0);
 503     }
 504 
 505     /**
 506      * dst = src1 - src2 and sets condition flags.
 507      *
 508      * @param size register size. Has to be 32 or 64.
 509      * @param dst general purpose register. May not be null or stackpointer.
 510      * @param src1 general purpose register. May not be null or stackpointer.
 511      * @param src2 general purpose register. May not be null or stackpointer.
 512      */
 513     public void subs(int size, Register dst, Register src1, Register src2) {
 514         super.subs(size, dst, src1, src2, getNopExtendType(size), 0);
 515     }
 516 
 517     /**
 518      * Returns the ExtendType for the given size that corresponds to a no-op.
 519      *
 520      * I.e. when doing add X0, X1, X2, the actual instruction has the form add X0, X1, X2 UXTX.
 521      *
 522      * @param size
 523      */
 524     private static ExtendType getNopExtendType(int size) {
 525         if (size == 64) {
 526             return ExtendType.UXTX;
 527         } else if (size == 32) {
 528             return ExtendType.UXTW;
 529         } else {
 530             throw GraalError.shouldNotReachHere("No-op ");
 531         }
 532     }
 533 
 534     /**
 535      * dst = src1 - src2.
 536      *
 537      * @param size register size. Has to be 32 or 64.
 538      * @param dst general purpose register. May not be null or stackpointer.
 539      * @param src1 general purpose register. May not be null or stackpointer.
 540      * @param src2 general purpose register. May not be null or stackpointer.
 541      */
 542     public void sub(int size, Register dst, Register src1, Register src2) {
 543         super.sub(size, dst, src1, src2, ShiftType.LSL, 0);
 544     }
 545 
 546     /**
 547      * dst = src1 + shiftType(src2, shiftAmt & (size - 1)).
 548      *
 549      * @param size register size. Has to be 32 or 64.
 550      * @param dst general purpose register. May not be null or stackpointer.
 551      * @param src1 general purpose register. May not be null or stackpointer.
 552      * @param src2 general purpose register. May not be null or stackpointer.
 553      * @param shiftType any type but ROR.
 554      * @param shiftAmt arbitrary shift amount.
 555      */
 556     @Override
 557     public void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 558         int shift = clampShiftAmt(size, shiftAmt);
 559         super.add(size, dst, src1, src2, shiftType, shift);
 560     }
 561 
 562     /**
 563      * dst = src1 + shiftType(src2, shiftAmt & (size-1)) and sets condition flags.
 564      *
 565      * @param size register size. Has to be 32 or 64.
 566      * @param dst general purpose register. May not be null or stackpointer.
 567      * @param src1 general purpose register. May not be null or stackpointer.
 568      * @param src2 general purpose register. May not be null or stackpointer.
 569      * @param shiftType any type but ROR.
 570      * @param shiftAmt arbitrary shift amount.
 571      */
 572     @Override
 573     public void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 574         int shift = clampShiftAmt(size, shiftAmt);
 575         super.sub(size, dst, src1, src2, shiftType, shift);
 576     }
 577 
 578     /**
 579      * dst = -src1.
 580      *
 581      * @param size register size. Has to be 32 or 64.
 582      * @param dst general purpose register. May not be null or stackpointer.
 583      * @param src general purpose register. May not be null or stackpointer.
 584      */
 585     public void neg(int size, Register dst, Register src) {
 586         sub(size, dst, zr, src);
 587     }
 588 
 589     /**
 590      * dst = src + immediate.
 591      *
 592      * @param size register size. Has to be 32 or 64.
 593      * @param dst general purpose register. May not be null or stackpointer.
 594      * @param src general purpose register. May not be null or stackpointer.
 595      * @param immediate arithmetic immediate
 596      */
 597     @Override
 598     public void add(int size, Register dst, Register src, int immediate) {
 599         if (immediate < 0) {
 600             sub(size, dst, src, -immediate);
 601         } else if (!(dst.equals(src) && immediate == 0)) {
 602             super.add(size, dst, src, immediate);
 603         }
 604     }
 605 
 606     /**
 607      * dst = src + aimm and sets condition flags.
 608      *
 609      * @param size register size. Has to be 32 or 64.
 610      * @param dst general purpose register. May not be null or stackpointer.
 611      * @param src general purpose register. May not be null or zero-register.
 612      * @param immediate arithmetic immediate.
 613      */
 614     @Override
 615     public void adds(int size, Register dst, Register src, int immediate) {
 616         if (immediate < 0) {
 617             subs(size, dst, src, -immediate);
 618         } else if (!(dst.equals(src) && immediate == 0)) {
 619             super.adds(size, dst, src, immediate);
 620         }
 621     }
 622 
 623     /**
 624      * dst = src - immediate.
 625      *
 626      * @param size register size. Has to be 32 or 64.
 627      * @param dst general purpose register. May not be null or stackpointer.
 628      * @param src general purpose register. May not be null or stackpointer.
 629      * @param immediate arithmetic immediate
 630      */
 631     @Override
 632     public void sub(int size, Register dst, Register src, int immediate) {
 633         if (immediate < 0) {
 634             add(size, dst, src, -immediate);
 635         } else if (!dst.equals(src) || immediate != 0) {
 636             super.sub(size, dst, src, immediate);
 637         }
 638     }
 639 
 640     /**
 641      * dst = src - aimm and sets condition flags.
 642      *
 643      * @param size register size. Has to be 32 or 64.
 644      * @param dst general purpose register. May not be null or stackpointer.
 645      * @param src general purpose register. May not be null or zero-register.
 646      * @param immediate arithmetic immediate.
 647      */
 648     @Override
 649     public void subs(int size, Register dst, Register src, int immediate) {
 650         if (immediate < 0) {
 651             adds(size, dst, src, -immediate);
 652         } else if (!dst.equals(src) || immediate != 0) {
 653             super.sub(size, dst, src, immediate);
 654         }
 655     }
 656 
 657     /**
 658      * dst = src1 * src2.
 659      *
 660      * @param size register size. Has to be 32 or 64.
 661      * @param dst general purpose register. May not be null or the stackpointer.
 662      * @param src1 general purpose register. May not be null or the stackpointer.
 663      * @param src2 general purpose register. May not be null or the stackpointer.
 664      */
 665     public void mul(int size, Register dst, Register src1, Register src2) {
 666         super.madd(size, dst, src1, src2, zr);
 667     }
 668 
 669     /**
 670      * unsigned multiply high. dst = (src1 * src2) >> size
 671      *
 672      * @param size register size. Has to be 32 or 64.
 673      * @param dst general purpose register. May not be null or the stackpointer.
 674      * @param src1 general purpose register. May not be null or the stackpointer.
 675      * @param src2 general purpose register. May not be null or the stackpointer.
 676      */
 677     public void umulh(int size, Register dst, Register src1, Register src2) {
 678         assert size == 32 || size == 64;
 679         if (size == 64) {
 680             super.umulh(dst, src1, src2);
 681         } else {
 682             // xDst = wSrc1 * wSrc2
 683             super.umaddl(dst, src1, src2, zr);
 684             // xDst = xDst >> 32
 685             lshr(64, dst, dst, 32);
 686         }
 687     }
 688 
 689     /**
 690      * signed multiply high. dst = (src1 * src2) >> size
 691      *
 692      * @param size register size. Has to be 32 or 64.
 693      * @param dst general purpose register. May not be null or the stackpointer.
 694      * @param src1 general purpose register. May not be null or the stackpointer.
 695      * @param src2 general purpose register. May not be null or the stackpointer.
 696      */
 697     public void smulh(int size, Register dst, Register src1, Register src2) {
 698         assert size == 32 || size == 64;
 699         if (size == 64) {
 700             super.smulh(dst, src1, src2);
 701         } else {
 702             // xDst = wSrc1 * wSrc2
 703             super.smaddl(dst, src1, src2, zr);
 704             // xDst = xDst >> 32
 705             lshr(64, dst, dst, 32);
 706         }
 707     }
 708 
 709     /**
 710      * dst = src1 % src2. Signed.
 711      *
 712      * @param size register size. Has to be 32 or 64.
 713      * @param dst general purpose register. May not be null or the stackpointer.
 714      * @param n numerator. General purpose register. May not be null or the stackpointer.
 715      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 716      */
 717     public void rem(int size, Register dst, Register n, Register d) {
 718         // There is no irem or similar instruction. Instead we use the relation:
 719         // n % d = n - Floor(n / d) * d if nd >= 0
 720         // n % d = n - Ceil(n / d) * d else
 721         // Which is equivalent to n - TruncatingDivision(n, d) * d
 722         super.sdiv(size, dst, n, d);
 723         super.msub(size, dst, dst, d, n);
 724     }
 725 
 726     /**
 727      * dst = src1 % src2. Unsigned.
 728      *
 729      * @param size register size. Has to be 32 or 64.
 730      * @param dst general purpose register. May not be null or the stackpointer.
 731      * @param n numerator. General purpose register. May not be null or the stackpointer.
 732      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 733      */
 734     public void urem(int size, Register dst, Register n, Register d) {
 735         // There is no irem or similar instruction. Instead we use the relation:
 736         // n % d = n - Floor(n / d) * d
 737         // Which is equivalent to n - TruncatingDivision(n, d) * d
 738         super.udiv(size, dst, n, d);
 739         super.msub(size, dst, dst, d, n);
 740     }
 741 
 742     /**
 743      * Add/subtract instruction encoding supports 12-bit immediate values.
 744      *
 745      * @param imm immediate value to be tested.
 746      * @return true if immediate can be used directly for arithmetic instructions (add/sub), false
 747      *         otherwise.
 748      */
 749     public static boolean isArithmeticImmediate(long imm) {
 750         // If we have a negative immediate we just use the opposite operator. I.e.: x - (-5) == x +
 751         // 5.
 752         return NumUtil.isInt(Math.abs(imm)) && isAimm((int) Math.abs(imm));
 753     }
 754 
 755     /**
 756      * Compare instructions are add/subtract instructions and so support 12-bit immediate values.
 757      *
 758      * @param imm immediate value to be tested.
 759      * @return true if immediate can be used directly with comparison instructions, false otherwise.
 760      */
 761     public static boolean isComparisonImmediate(long imm) {
 762         return isArithmeticImmediate(imm);
 763     }
 764 
 765     /**
 766      * Move wide immediate instruction encoding supports 16-bit immediate values which can be
 767      * optionally-shifted by multiples of 16 (i.e. 0, 16, 32, 48).
 768      *
 769      * @return true if immediate can be moved directly into a register, false otherwise.
 770      */
 771     public static boolean isMovableImmediate(long imm) {
 772         // // Positions of first, respectively last set bit.
 773         // int start = Long.numberOfTrailingZeros(imm);
 774         // int end = 64 - Long.numberOfLeadingZeros(imm);
 775         // int length = end - start;
 776         // if (length > 16) {
 777         // return false;
 778         // }
 779         // // We can shift the necessary part of the immediate (i.e. everything between the first
 780         // and
 781         // // last set bit) by as much as 16 - length around to arrive at a valid shift amount
 782         // int tolerance = 16 - length;
 783         // int prevMultiple = NumUtil.roundDown(start, 16);
 784         // int nextMultiple = NumUtil.roundUp(start, 16);
 785         // return start - prevMultiple <= tolerance || nextMultiple - start <= tolerance;
 786         /*
 787          * This is a bit optimistic because the constant could also be for an arithmetic instruction
 788          * which only supports 12-bits. That case needs to be handled in the backend.
 789          */
 790         return NumUtil.isInt(Math.abs(imm)) && NumUtil.isUnsignedNbit(16, (int) Math.abs(imm));
 791     }
 792 
 793     /**
 794      * dst = src << (shiftAmt & (size - 1)).
 795      *
 796      * @param size register size. Has to be 32 or 64.
 797      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 798      * @param src general purpose register. May not be null, stackpointer or zero-register.
 799      * @param shiftAmt amount by which src is shifted.
 800      */
 801     public void shl(int size, Register dst, Register src, long shiftAmt) {
 802         int shift = clampShiftAmt(size, shiftAmt);
 803         super.ubfm(size, dst, src, (size - shift) & (size - 1), size - 1 - shift);
 804     }
 805 
 806     /**
 807      * dst = src1 << (src2 & (size - 1)).
 808      *
 809      * @param size register size. Has to be 32 or 64.
 810      * @param dst general purpose register. May not be null or stackpointer.
 811      * @param src general purpose register. May not be null or stackpointer.
 812      * @param shift general purpose register. May not be null or stackpointer.
 813      */
 814     public void shl(int size, Register dst, Register src, Register shift) {
 815         super.lsl(size, dst, src, shift);
 816     }
 817 
 818     /**
 819      * dst = src >>> (shiftAmt & (size - 1)).
 820      *
 821      * @param size register size. Has to be 32 or 64.
 822      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 823      * @param src general purpose register. May not be null, stackpointer or zero-register.
 824      * @param shiftAmt amount by which src is shifted.
 825      */
 826     public void lshr(int size, Register dst, Register src, long shiftAmt) {
 827         int shift = clampShiftAmt(size, shiftAmt);
 828         super.ubfm(size, dst, src, shift, size - 1);
 829     }
 830 
 831     /**
 832      * dst = src1 >>> (src2 & (size - 1)).
 833      *
 834      * @param size register size. Has to be 32 or 64.
 835      * @param dst general purpose register. May not be null or stackpointer.
 836      * @param src general purpose register. May not be null or stackpointer.
 837      * @param shift general purpose register. May not be null or stackpointer.
 838      */
 839     public void lshr(int size, Register dst, Register src, Register shift) {
 840         super.lsr(size, dst, src, shift);
 841     }
 842 
 843     /**
 844      * dst = src >> (shiftAmt & log2(size)).
 845      *
 846      * @param size register size. Has to be 32 or 64.
 847      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 848      * @param src general purpose register. May not be null, stackpointer or zero-register.
 849      * @param shiftAmt amount by which src is shifted.
 850      */
 851     public void ashr(int size, Register dst, Register src, long shiftAmt) {
 852         int shift = clampShiftAmt(size, shiftAmt);
 853         super.sbfm(size, dst, src, shift, size - 1);
 854     }
 855 
 856     /**
 857      * dst = src1 >> (src2 & log2(size)).
 858      *
 859      * @param size register size. Has to be 32 or 64.
 860      * @param dst general purpose register. May not be null or stackpointer.
 861      * @param src general purpose register. May not be null or stackpointer.
 862      * @param shift general purpose register. May not be null or stackpointer.
 863      */
 864     public void ashr(int size, Register dst, Register src, Register shift) {
 865         super.asr(size, dst, src, shift);
 866     }
 867 
 868     /**
 869      * Clamps shiftAmt into range 0 <= shiftamt < size according to JLS.
 870      *
 871      * @param size size of operation.
 872      * @param shiftAmt arbitrary shift amount.
 873      * @return value between 0 and size - 1 inclusive that is equivalent to shiftAmt according to
 874      *         JLS.
 875      */
 876     private static int clampShiftAmt(int size, long shiftAmt) {
 877         return (int) (shiftAmt & (size - 1));
 878     }
 879 
 880     /**
 881      * dst = src1 & src2.
 882      *
 883      * @param size register size. Has to be 32 or 64.
 884      * @param dst general purpose register. May not be null or stackpointer.
 885      * @param src1 general purpose register. May not be null or stackpointer.
 886      * @param src2 general purpose register. May not be null or stackpointer.
 887      */
 888     public void and(int size, Register dst, Register src1, Register src2) {
 889         super.and(size, dst, src1, src2, ShiftType.LSL, 0);
 890     }
 891 
 892     /**
 893      * dst = src1 ^ src2.
 894      *
 895      * @param size register size. Has to be 32 or 64.
 896      * @param dst general purpose register. May not be null or stackpointer.
 897      * @param src1 general purpose register. May not be null or stackpointer.
 898      * @param src2 general purpose register. May not be null or stackpointer.
 899      */
 900     public void eor(int size, Register dst, Register src1, Register src2) {
 901         super.eor(size, dst, src1, src2, ShiftType.LSL, 0);
 902     }
 903 
 904     /**
 905      * dst = src1 | src2.
 906      *
 907      * @param size register size. Has to be 32 or 64.
 908      * @param dst general purpose register. May not be null or stackpointer.
 909      * @param src1 general purpose register. May not be null or stackpointer.
 910      * @param src2 general purpose register. May not be null or stackpointer.
 911      */
 912     public void or(int size, Register dst, Register src1, Register src2) {
 913         super.orr(size, dst, src1, src2, ShiftType.LSL, 0);
 914     }
 915 
 916     /**
 917      * dst = src | bimm.
 918      *
 919      * @param size register size. Has to be 32 or 64.
 920      * @param dst general purpose register. May not be null or zero-register.
 921      * @param src general purpose register. May not be null or stack-pointer.
 922      * @param bimm logical immediate. See {@link AArch64Assembler.LogicalImmediateTable} for exact
 923      *            definition.
 924      */
 925     public void or(int size, Register dst, Register src, long bimm) {
 926         super.orr(size, dst, src, bimm);
 927     }
 928 
 929     /**
 930      * dst = ~src.
 931      *
 932      * @param size register size. Has to be 32 or 64.
 933      * @param dst general purpose register. May not be null or stackpointer.
 934      * @param src general purpose register. May not be null or stackpointer.
 935      */
 936     public void not(int size, Register dst, Register src) {
 937         super.orn(size, dst, zr, src, ShiftType.LSL, 0);
 938     }
 939 
 940     /**
 941      * Sign-extend value from src into dst.
 942      *
 943      * @param destSize destination register size. Has to be 32 or 64.
 944      * @param srcSize source register size. May be 8, 16 or 32 and smaller than destSize.
 945      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 946      * @param src general purpose register. May not be null, stackpointer or zero-register.
 947      */
 948     public void sxt(int destSize, int srcSize, Register dst, Register src) {
 949         assert (destSize == 32 || destSize == 64) && srcSize < destSize;
 950         assert srcSize == 8 || srcSize == 16 || srcSize == 32;
 951         int[] srcSizeValues = {7, 15, 31};
 952         super.sbfm(destSize, dst, src, 0, srcSizeValues[NumUtil.log2Ceil(srcSize / 8)]);
 953     }
 954 
 955     /**
 956      * dst = src if condition else -src.
 957      *
 958      * @param size register size. Must be 32 or 64.
 959      * @param dst general purpose register. May not be null or the stackpointer.
 960      * @param src general purpose register. May not be null or the stackpointer.
 961      * @param condition any condition except AV or NV. May not be null.
 962      */
 963     public void csneg(int size, Register dst, Register src, ConditionFlag condition) {
 964         super.csneg(size, dst, src, src, condition.negate());
 965     }
 966 
 967     /**
 968      * @return True if the immediate can be used directly for logical 64-bit instructions.
 969      */
 970     public static boolean isLogicalImmediate(long imm) {
 971         return LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO;
 972     }
 973 
 974     /**
 975      * @return True if the immediate can be used directly for logical 32-bit instructions.
 976      */
 977     public static boolean isLogicalImmediate(int imm) {
 978         return LogicalImmediateTable.isRepresentable(imm) == LogicalImmediateTable.Representable.YES;
 979     }
 980 
 981     /* Float instructions */
 982 
 983     /**
 984      * Moves integer to float, float to integer, or float to float. Does not support integer to
 985      * integer moves.
 986      *
 987      * @param size register size. Has to be 32 or 64.
 988      * @param dst Either floating-point or general-purpose register. If general-purpose register may
 989      *            not be stackpointer or zero register. Cannot be null in any case.
 990      * @param src Either floating-point or general-purpose register. If general-purpose register may
 991      *            not be stackpointer. Cannot be null in any case.
 992      */
 993     @Override
 994     public void fmov(int size, Register dst, Register src) {
 995         assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU)) : "src and dst cannot both be integer registers.";
 996         if (dst.getRegisterCategory().equals(CPU)) {
 997             super.fmovFpu2Cpu(size, dst, src);
 998         } else if (src.getRegisterCategory().equals(CPU)) {
 999             super.fmovCpu2Fpu(size, dst, src);
1000         } else {
1001             super.fmov(size, dst, src);
1002         }
1003     }
1004 
1005     /**
1006      *
1007      * @param size register size. Has to be 32 or 64.
1008      * @param dst floating point register. May not be null.
1009      * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
1010      *            loaded, i.e. (float) imm == imm must be true. In all cases
1011      *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
1012      *            depending on size.
1013      */
1014     @Override
1015     public void fmov(int size, Register dst, double imm) {
1016         if (imm == 0.0) {
1017             assert Double.doubleToRawLongBits(imm) == 0L : "-0.0 is no valid immediate.";
1018             super.fmovCpu2Fpu(size, dst, zr);
1019         } else {
1020             super.fmov(size, dst, imm);
1021         }
1022     }
1023 
1024     /**
1025      *
1026      * @return true if immediate can be loaded directly into floating-point register, false
1027      *         otherwise.
1028      */
1029     public static boolean isDoubleImmediate(double imm) {
1030         return Double.doubleToRawLongBits(imm) == 0L || AArch64Assembler.isDoubleImmediate(imm);
1031     }
1032 
1033     /**
1034      *
1035      * @return true if immediate can be loaded directly into floating-point register, false
1036      *         otherwise.
1037      */
1038     public static boolean isFloatImmediate(float imm) {
1039         return Float.floatToRawIntBits(imm) == 0 || AArch64Assembler.isFloatImmediate(imm);
1040     }
1041 
1042     /**
1043      * Conditional move. dst = src1 if condition else src2.
1044      *
1045      * @param size register size.
1046      * @param result floating point register. May not be null.
1047      * @param trueValue floating point register. May not be null.
1048      * @param falseValue floating point register. May not be null.
1049      * @param condition every condition allowed. May not be null.
1050      */
1051     public void fcmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag condition) {
1052         super.fcsel(size, result, trueValue, falseValue, condition);
1053     }
1054 
1055     /**
1056      * dst = src1 % src2.
1057      *
1058      * @param size register size. Has to be 32 or 64.
1059      * @param dst floating-point register. May not be null.
1060      * @param n numerator. Floating-point register. May not be null.
1061      * @param d denominator. Floating-point register. May not be null.
1062      */
1063     public void frem(int size, Register dst, Register n, Register d) {
1064         // There is no frem instruction, instead we compute the remainder using the relation:
1065         // rem = n - Truncating(n / d) * d
1066         super.fdiv(size, dst, n, d);
1067         super.frintz(size, dst, dst);
1068         super.fmsub(size, dst, dst, d, n);
1069     }
1070 
1071     /* Branches */
1072 
1073     /**
1074      * Compares x and y and sets condition flags.
1075      *
1076      * @param size register size. Has to be 32 or 64.
1077      * @param x general purpose register. May not be null or stackpointer.
1078      * @param y general purpose register. May not be null or stackpointer.
1079      */
1080     public void cmp(int size, Register x, Register y) {
1081         super.subs(size, zr, x, y, ShiftType.LSL, 0);
1082     }
1083 
1084     /**
1085      * Compares x to y and sets condition flags.
1086      *
1087      * @param size register size. Has to be 32 or 64.
1088      * @param x general purpose register. May not be null or stackpointer.
1089      * @param y comparison immediate, {@link #isComparisonImmediate(long)} has to be true for it.
1090      */
1091     public void cmp(int size, Register x, int y) {
1092         if (y < 0) {
1093             super.adds(size, zr, x, -y);
1094         } else {
1095             super.subs(size, zr, x, y);
1096         }
1097     }
1098 
1099     /**
1100      * Sets condition flags according to result of x & y.
1101      *
1102      * @param size register size. Has to be 32 or 64.
1103      * @param dst general purpose register. May not be null or stack-pointer.
1104      * @param x general purpose register. May not be null or stackpointer.
1105      * @param y general purpose register. May not be null or stackpointer.
1106      */
1107     public void ands(int size, Register dst, Register x, Register y) {
1108         super.ands(size, dst, x, y, ShiftType.LSL, 0);
1109     }
1110 
1111     /**
1112      * When patching up Labels we have to know what kind of code to generate.
1113      */
1114     public enum PatchLabelKind {
1115         BRANCH_CONDITIONALLY(0x0),
1116         BRANCH_UNCONDITIONALLY(0x1),
1117         BRANCH_NONZERO(0x2),
1118         BRANCH_ZERO(0x3),
1119         JUMP_ADDRESS(0x4),
1120         ADR(0x5);
1121 
1122         /**
1123          * Offset by which additional information for branch conditionally, branch zero and branch
1124          * non zero has to be shifted.
1125          */
1126         public static final int INFORMATION_OFFSET = 5;
1127 
1128         public final int encoding;
1129 
1130         PatchLabelKind(int encoding) {
1131             this.encoding = encoding;
1132         }
1133 
1134         /**
1135          * @return PatchLabelKind with given encoding.
1136          */
1137         private static PatchLabelKind fromEncoding(int encoding) {
1138             return values()[encoding & NumUtil.getNbitNumberInt(INFORMATION_OFFSET)];
1139         }
1140 
1141     }
1142 
1143     public void adr(Register dst, Label label) {
1144         // TODO Handle case where offset is too large for a single jump instruction
1145         if (label.isBound()) {
1146             int offset = label.position() - position();
1147             super.adr(dst, offset);
1148         } else {
1149             label.addPatchAt(position());
1150             // Encode condition flag so that we know how to patch the instruction later
1151             emitInt(PatchLabelKind.ADR.encoding | dst.encoding << PatchLabelKind.INFORMATION_OFFSET);
1152         }
1153     }
1154 
1155     /**
1156      * Compare register and branch if non-zero.
1157      *
1158      * @param size Instruction size in bits. Should be either 32 or 64.
1159      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1160      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1161      */
1162     public void cbnz(int size, Register cmp, Label label) {
1163         // TODO Handle case where offset is too large for a single jump instruction
1164         if (label.isBound()) {
1165             int offset = label.position() - position();
1166             super.cbnz(size, cmp, offset);
1167         } else {
1168             label.addPatchAt(position());
1169             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1170             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1171             // Encode condition flag so that we know how to patch the instruction later
1172             emitInt(PatchLabelKind.BRANCH_NONZERO.encoding | regEncoding | sizeEncoding);
1173         }
1174     }
1175 
1176     /**
1177      * Compare register and branch if zero.
1178      *
1179      * @param size Instruction size in bits. Should be either 32 or 64.
1180      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1181      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1182      */
1183     public void cbz(int size, Register cmp, Label label) {
1184         // TODO Handle case where offset is too large for a single jump instruction
1185         if (label.isBound()) {
1186             int offset = label.position() - position();
1187             super.cbz(size, cmp, offset);
1188         } else {
1189             label.addPatchAt(position());
1190             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1191             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1192             // Encode condition flag so that we know how to patch the instruction later
1193             emitInt(PatchLabelKind.BRANCH_ZERO.encoding | regEncoding | sizeEncoding);
1194         }
1195     }
1196 
1197     /**
1198      * Branches to label if condition is true.
1199      *
1200      * @param condition any condition value allowed. Non null.
1201      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1202      */
1203     public void branchConditionally(ConditionFlag condition, Label label) {
1204         // TODO Handle case where offset is too large for a single jump instruction
1205         if (label.isBound()) {
1206             int offset = label.position() - position();
1207             super.b(condition, offset);
1208         } else {
1209             label.addPatchAt(position());
1210             // Encode condition flag so that we know how to patch the instruction later
1211             emitInt(PatchLabelKind.BRANCH_CONDITIONALLY.encoding | condition.encoding << PatchLabelKind.INFORMATION_OFFSET);
1212         }
1213     }
1214 
1215     /**
1216      * Branches if condition is true. Address of jump is patched up by HotSpot c++ code.
1217      *
1218      * @param condition any condition value allowed. Non null.
1219      */
1220     public void branchConditionally(ConditionFlag condition) {
1221         // Correct offset is fixed up by HotSpot later.
1222         super.b(condition, 0);
1223     }
1224 
1225     /**
1226      * Jumps to label.
1227      *
1228      * param label Can only handle signed 28-bit offsets. May be unbound. Non null.
1229      */
1230     @Override
1231     public void jmp(Label label) {
1232         // TODO Handle case where offset is too large for a single jump instruction
1233         if (label.isBound()) {
1234             int offset = label.position() - position();
1235             super.b(offset);
1236         } else {
1237             label.addPatchAt(position());
1238             emitInt(PatchLabelKind.BRANCH_UNCONDITIONALLY.encoding);
1239         }
1240     }
1241 
1242     /**
1243      * Jump to address in dest.
1244      *
1245      * @param dest General purpose register. May not be null, zero-register or stackpointer.
1246      */
1247     public void jmp(Register dest) {
1248         super.br(dest);
1249     }
1250 
1251     /**
1252      * Immediate jump instruction fixed up by HotSpot c++ code.
1253      */
1254     public void jmp() {
1255         // Offset has to be fixed up by c++ code.
1256         super.b(0);
1257     }
1258 
1259     /**
1260      *
1261      * @return true if immediate offset can be used in a single branch instruction.
1262      */
1263     public static boolean isBranchImmediateOffset(long imm) {
1264         return NumUtil.isSignedNbit(28, imm);
1265     }
1266 
1267     /* system instructions */
1268 
1269     /**
1270      * Exception codes used when calling hlt instruction.
1271      */
1272     public enum AArch64ExceptionCode {
1273         NO_SWITCH_TARGET(0x0),
1274         BREAKPOINT(0x1);
1275 
1276         public final int encoding;
1277 
1278         AArch64ExceptionCode(int encoding) {
1279             this.encoding = encoding;
1280         }
1281     }
1282 
1283     /**
1284      * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
1285      * UNALLOCATED instruction.
1286      *
1287      * @param exceptionCode exception code specifying why halt was called. Non null.
1288      */
1289     public void hlt(AArch64ExceptionCode exceptionCode) {
1290         super.hlt(exceptionCode.encoding);
1291     }
1292 
1293     /**
1294      * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
1295      * exception level.
1296      *
1297      * @param exceptionCode exception code specifying why break was called. Non null.
1298      */
1299     public void brk(AArch64ExceptionCode exceptionCode) {
1300         super.brk(exceptionCode.encoding);
1301     }
1302 
1303     public void pause() {
1304         throw GraalError.unimplemented();
1305     }
1306 
1307     /**
1308      * Executes no-op instruction. No registers or flags are updated, except for PC.
1309      */
1310     public void nop() {
1311         super.hint(SystemHint.NOP);
1312     }
1313 
1314     /**
1315      * Same as {@link #nop()}.
1316      */
1317     @Override
1318     public void ensureUniquePC() {
1319         nop();
1320     }
1321 
1322     /**
1323      * Aligns PC.
1324      *
1325      * @param modulus Has to be positive multiple of 4.
1326      */
1327     @Override
1328     public void align(int modulus) {
1329         assert modulus > 0 && (modulus & 0x3) == 0 : "Modulus has to be a positive multiple of 4.";
1330         if (position() % modulus == 0) {
1331             return;
1332         }
1333         int offset = modulus - position() % modulus;
1334         for (int i = 0; i < offset; i += 4) {
1335             nop();
1336         }
1337     }
1338 
1339     /**
1340      * Patches jump targets when label gets bound.
1341      */
1342     @Override
1343     protected void patchJumpTarget(int branch, int jumpTarget) {
1344         int instruction = getInt(branch);
1345         int branchOffset = jumpTarget - branch;
1346         PatchLabelKind type = PatchLabelKind.fromEncoding(instruction);
1347         switch (type) {
1348             case BRANCH_CONDITIONALLY:
1349                 ConditionFlag cf = ConditionFlag.fromEncoding(instruction >>> PatchLabelKind.INFORMATION_OFFSET);
1350                 super.b(cf, branchOffset, branch);
1351                 break;
1352             case BRANCH_UNCONDITIONALLY:
1353                 super.b(branchOffset, branch);
1354                 break;
1355             case JUMP_ADDRESS:
1356                 emitInt(jumpTarget, branch);
1357                 break;
1358             case BRANCH_NONZERO:
1359             case BRANCH_ZERO: {
1360                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1361                 int sizeEncoding = information & 1;
1362                 int regEncoding = information >>> 1;
1363                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1364                 // 1 => 64; 0 => 32
1365                 int size = sizeEncoding * 32 + 32;
1366                 switch (type) {
1367                     case BRANCH_NONZERO:
1368                         super.cbnz(size, reg, branchOffset, branch);
1369                         break;
1370                     case BRANCH_ZERO:
1371                         super.cbz(size, reg, branchOffset, branch);
1372                         break;
1373                 }
1374                 break;
1375             }
1376             case ADR: {
1377                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1378                 int regEncoding = information;
1379                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1380                 super.adr(reg, branchOffset, branch);
1381                 break;
1382             }
1383             default:
1384                 throw GraalError.shouldNotReachHere();
1385         }
1386     }
1387 
1388     /**
1389      * Generates an address of the form {@code base + displacement}.
1390      *
1391      * Does not change base register to fulfill this requirement. Will fail if displacement cannot
1392      * be represented directly as address.
1393      *
1394      * @param base general purpose register. May not be null or the zero register.
1395      * @param displacement arbitrary displacement added to base.
1396      * @return AArch64Address referencing memory at {@code base + displacement}.
1397      */
1398     @Override
1399     public AArch64Address makeAddress(Register base, int displacement) {
1400         return makeAddress(base, displacement, zr, /* signExtend */false, /* transferSize */0, zr, /* allowOverwrite */false);
1401     }
1402 
1403     @Override
1404     public AbstractAddress getPlaceholder(int instructionStartPosition) {
1405         return AArch64Address.PLACEHOLDER;
1406     }
1407 }