1 /*
   2  * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.graalvm.compiler.asm.aarch64;
  25 
  26 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.BASE_REGISTER_ONLY;
  27 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.EXTENDED_REGISTER_OFFSET;
  28 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SCALED;
  29 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_UNSCALED;
  30 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.REGISTER_OFFSET;
  31 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_BASE;
  32 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_INDEX;
  33 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.NO_WORK;
  34 import static jdk.vm.ci.aarch64.AArch64.CPU;
  35 import static jdk.vm.ci.aarch64.AArch64.r8;
  36 import static jdk.vm.ci.aarch64.AArch64.r9;
  37 import static jdk.vm.ci.aarch64.AArch64.sp;
  38 import static jdk.vm.ci.aarch64.AArch64.zr;
  39 
  40 import org.graalvm.compiler.asm.AbstractAddress;
  41 import org.graalvm.compiler.asm.Label;
  42 import org.graalvm.compiler.core.common.NumUtil;
  43 import org.graalvm.compiler.debug.GraalError;
  44 
  45 import jdk.vm.ci.aarch64.AArch64;
  46 import jdk.vm.ci.code.Register;
  47 import jdk.vm.ci.code.TargetDescription;
  48 
  49 public class AArch64MacroAssembler extends AArch64Assembler {
  50 
  51     private final ScratchRegister[] scratchRegister = new ScratchRegister[]{new ScratchRegister(r8), new ScratchRegister(r9)};
  52 
  53     // Points to the next free scratch register
  54     private int nextFreeScratchRegister = 0;
  55 
  56     public AArch64MacroAssembler(TargetDescription target) {
  57         super(target);
  58     }
  59 
  60     public class ScratchRegister implements AutoCloseable {
  61         private final Register register;
  62 
  63         public ScratchRegister(Register register) {
  64             this.register = register;
  65         }
  66 
  67         public Register getRegister() {
  68             return register;
  69         }
  70 
  71         @Override
  72         public void close() {
  73             assert nextFreeScratchRegister > 0 : "Close called too often";
  74             nextFreeScratchRegister--;
  75         }
  76     }
  77 
  78     public ScratchRegister getScratchRegister() {
  79         return scratchRegister[nextFreeScratchRegister++];
  80     }
  81 
  82     /**
  83      * Specifies what actions have to be taken to turn an arbitrary address of the form
  84      * {@code base + displacement [+ index [<< scale]]} into a valid AArch64Address.
  85      */
  86     public static class AddressGenerationPlan {
  87         public final WorkPlan workPlan;
  88         public final AArch64Address.AddressingMode addressingMode;
  89         public final boolean needsScratch;
  90 
  91         public enum WorkPlan {
  92             /**
  93              * Can be used as-is without extra work.
  94              */
  95             NO_WORK,
  96             /**
  97              * Add scaled displacement to index register.
  98              */
  99             ADD_TO_INDEX,
 100             /**
 101              * Add unscaled displacement to base register.
 102              */
 103             ADD_TO_BASE,
 104         }
 105 
 106         /**
 107          * @param workPlan Work necessary to generate a valid address.
 108          * @param addressingMode Addressing mode of generated address.
 109          * @param needsScratch True if generating address needs a scatch register, false otherwise.
 110          */
 111         public AddressGenerationPlan(WorkPlan workPlan, AArch64Address.AddressingMode addressingMode, boolean needsScratch) {
 112             this.workPlan = workPlan;
 113             this.addressingMode = addressingMode;
 114             this.needsScratch = needsScratch;
 115         }
 116     }
 117 
 118     /**
 119      * Generates an addressplan for an address of the form
 120      * {@code base + displacement [+ index [<< log2(transferSize)]]} with the index register and
 121      * scaling being optional.
 122      *
 123      * @param displacement an arbitrary displacement.
 124      * @param hasIndexRegister true if the address uses an index register, false otherwise. non null
 125      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 126      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 127      * @return AddressGenerationPlan that specifies the actions necessary to generate a valid
 128      *         AArch64Address for the given parameters.
 129      */
 130     public static AddressGenerationPlan generateAddressPlan(long displacement, boolean hasIndexRegister, int transferSize) {
 131         assert transferSize == 0 || transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 132         boolean indexScaled = transferSize != 0;
 133         int log2Scale = NumUtil.log2Ceil(transferSize);
 134         long scaledDisplacement = displacement >> log2Scale;
 135         boolean displacementScalable = indexScaled && (displacement & (transferSize - 1)) == 0;
 136         if (displacement == 0) {
 137             // register offset without any work beforehand.
 138             return new AddressGenerationPlan(NO_WORK, REGISTER_OFFSET, false);
 139         } else {
 140             if (hasIndexRegister) {
 141                 if (displacementScalable) {
 142                     boolean needsScratch = !isArithmeticImmediate(scaledDisplacement);
 143                     return new AddressGenerationPlan(ADD_TO_INDEX, REGISTER_OFFSET, needsScratch);
 144                 } else {
 145                     boolean needsScratch = !isArithmeticImmediate(displacement);
 146                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 147                 }
 148             } else {
 149                 if (NumUtil.isSignedNbit(9, displacement)) {
 150                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_UNSCALED, false);
 151                 } else if (displacementScalable && NumUtil.isUnsignedNbit(12, scaledDisplacement)) {
 152                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_SCALED, false);
 153                 } else {
 154                     boolean needsScratch = !isArithmeticImmediate(displacement);
 155                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 156                 }
 157             }
 158         }
 159     }
 160 
 161     /**
 162      * Returns an AArch64Address pointing to
 163      * {@code base + displacement + index << log2(transferSize)}.
 164      *
 165      * @param base general purpose register. May not be null or the zero register.
 166      * @param displacement arbitrary displacement added to base.
 167      * @param index general purpose register. May not be null or the stack pointer.
 168      * @param signExtendIndex if true consider index register a word register that should be
 169      *            sign-extended before being added.
 170      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 171      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 172      * @param additionalReg additional register used either as a scratch register or as part of the
 173      *            final address, depending on whether allowOverwrite is true or not. May not be null
 174      *            or stackpointer.
 175      * @param allowOverwrite if true allows to change value of base or index register to generate
 176      *            address.
 177      * @return AArch64Address pointing to memory at
 178      *         {@code base + displacement + index << log2(transferSize)}.
 179      */
 180     public AArch64Address makeAddress(Register base, long displacement, Register index, boolean signExtendIndex, int transferSize, Register additionalReg, boolean allowOverwrite) {
 181         AddressGenerationPlan plan = generateAddressPlan(displacement, !index.equals(zr), transferSize);
 182         assert allowOverwrite || !zr.equals(additionalReg) || plan.workPlan == NO_WORK;
 183         assert !plan.needsScratch || !zr.equals(additionalReg);
 184         int log2Scale = NumUtil.log2Ceil(transferSize);
 185         long scaledDisplacement = displacement >> log2Scale;
 186         Register newIndex = index;
 187         Register newBase = base;
 188         int immediate;
 189         switch (plan.workPlan) {
 190             case NO_WORK:
 191                 if (plan.addressingMode == IMMEDIATE_SCALED) {
 192                     immediate = (int) scaledDisplacement;
 193                 } else {
 194                     immediate = (int) displacement;
 195                 }
 196                 break;
 197             case ADD_TO_INDEX:
 198                 newIndex = allowOverwrite ? index : additionalReg;
 199                 assert !newIndex.equals(sp) && !newIndex.equals(zr);
 200                 if (plan.needsScratch) {
 201                     mov(additionalReg, scaledDisplacement);
 202                     add(signExtendIndex ? 32 : 64, newIndex, index, additionalReg);
 203                 } else {
 204                     add(signExtendIndex ? 32 : 64, newIndex, index, (int) scaledDisplacement);
 205                 }
 206                 immediate = 0;
 207                 break;
 208             case ADD_TO_BASE:
 209                 newBase = allowOverwrite ? base : additionalReg;
 210                 assert !newBase.equals(sp) && !newBase.equals(zr);
 211                 if (plan.needsScratch) {
 212                     mov(additionalReg, displacement);
 213                     add(64, newBase, base, additionalReg);
 214                 } else {
 215                     add(64, newBase, base, (int) displacement);
 216                 }
 217                 immediate = 0;
 218                 break;
 219             default:
 220                 throw GraalError.shouldNotReachHere();
 221         }
 222         AArch64Address.AddressingMode addressingMode = plan.addressingMode;
 223         ExtendType extendType = null;
 224         if (addressingMode == REGISTER_OFFSET) {
 225             if (newIndex.equals(zr)) {
 226                 addressingMode = BASE_REGISTER_ONLY;
 227             } else if (signExtendIndex) {
 228                 addressingMode = EXTENDED_REGISTER_OFFSET;
 229                 extendType = ExtendType.SXTW;
 230             }
 231         }
 232         return AArch64Address.createAddress(addressingMode, newBase, newIndex, immediate, transferSize != 0, extendType);
 233     }
 234 
 235     /**
 236      * Returns an AArch64Address pointing to {@code base + displacement}. Specifies the memory
 237      * transfer size to allow some optimizations when building the address.
 238      *
 239      * @param base general purpose register. May not be null or the zero register.
 240      * @param displacement arbitrary displacement added to base.
 241      * @param transferSize the memory transfer size in bytes.
 242      * @param additionalReg additional register used either as a scratch register or as part of the
 243      *            final address, depending on whether allowOverwrite is true or not. May not be
 244      *            null, zero register or stackpointer.
 245      * @param allowOverwrite if true allows to change value of base or index register to generate
 246      *            address.
 247      * @return AArch64Address pointing to memory at {@code base + displacement}.
 248      */
 249     public AArch64Address makeAddress(Register base, long displacement, Register additionalReg, int transferSize, boolean allowOverwrite) {
 250         assert additionalReg.getRegisterCategory().equals(CPU);
 251         return makeAddress(base, displacement, zr, /* sign-extend */false, transferSize, additionalReg, allowOverwrite);
 252     }
 253 
 254     /**
 255      * Returns an AArch64Address pointing to {@code base + displacement}. Fails if address cannot be
 256      * represented without overwriting base register or using a scratch register.
 257      *
 258      * @param base general purpose register. May not be null or the zero register.
 259      * @param displacement arbitrary displacement added to base.
 260      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 261      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 262      * @return AArch64Address pointing to memory at {@code base + displacement}.
 263      */
 264     public AArch64Address makeAddress(Register base, long displacement, int transferSize) {
 265         return makeAddress(base, displacement, zr, /* signExtend */false, transferSize, zr, /* allowOverwrite */false);
 266     }
 267 
 268     /**
 269      * Loads memory address into register.
 270      *
 271      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 272      * @param address address whose value is loaded into dst. May not be null,
 273      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_POST_INDEXED
 274      *            POST_INDEXED} or
 275      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_PRE_INDEXED
 276      *            IMMEDIATE_PRE_INDEXED}
 277      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 278      *            the index register is scaled. Can be 1, 2, 4 or 8.
 279      */
 280     public void loadAddress(Register dst, AArch64Address address, int transferSize) {
 281         assert transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 282         assert dst.getRegisterCategory().equals(CPU);
 283         int shiftAmt = NumUtil.log2Ceil(transferSize);
 284         switch (address.getAddressingMode()) {
 285             case IMMEDIATE_SCALED:
 286                 int scaledImmediate = address.getImmediateRaw() << shiftAmt;
 287                 int lowerBits = scaledImmediate & NumUtil.getNbitNumberInt(12);
 288                 int higherBits = scaledImmediate & ~NumUtil.getNbitNumberInt(12);
 289                 boolean firstAdd = true;
 290                 if (lowerBits != 0) {
 291                     add(64, dst, address.getBase(), lowerBits);
 292                     firstAdd = false;
 293                 }
 294                 if (higherBits != 0) {
 295                     Register src = firstAdd ? address.getBase() : dst;
 296                     add(64, dst, src, higherBits);
 297                 }
 298                 break;
 299             case IMMEDIATE_UNSCALED:
 300                 int immediate = address.getImmediateRaw();
 301                 add(64, dst, address.getBase(), immediate);
 302                 break;
 303             case REGISTER_OFFSET:
 304                 add(64, dst, address.getBase(), address.getOffset(), ShiftType.LSL, address.isScaled() ? shiftAmt : 0);
 305                 break;
 306             case EXTENDED_REGISTER_OFFSET:
 307                 add(64, dst, address.getBase(), address.getOffset(), address.getExtendType(), address.isScaled() ? shiftAmt : 0);
 308                 break;
 309             case PC_LITERAL:
 310                 super.adr(dst, address.getImmediateRaw());
 311                 break;
 312             case BASE_REGISTER_ONLY:
 313                 movx(dst, address.getBase());
 314                 break;
 315             default:
 316                 throw GraalError.shouldNotReachHere();
 317         }
 318     }
 319 
 320     public void movx(Register dst, Register src) {
 321         mov(64, dst, src);
 322     }
 323 
 324     public void mov(int size, Register dst, Register src) {
 325         if (dst.equals(sp) || src.equals(sp)) {
 326             add(size, dst, src, 0);
 327         } else {
 328             or(size, dst, zr, src);
 329         }
 330     }
 331 
 332     /**
 333      * Generates a 64-bit immediate move code sequence.
 334      *
 335      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 336      * @param imm
 337      */
 338     private void mov64(Register dst, long imm) {
 339         // We have to move all non zero parts of the immediate in 16-bit chunks
 340         boolean firstMove = true;
 341         for (int offset = 0; offset < 64; offset += 16) {
 342             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 343             if (chunk == 0) {
 344                 continue;
 345             }
 346             if (firstMove) {
 347                 movz(64, dst, chunk, offset);
 348                 firstMove = false;
 349             } else {
 350                 movk(64, dst, chunk, offset);
 351             }
 352         }
 353         assert !firstMove;
 354     }
 355 
 356     /**
 357      * Loads immediate into register.
 358      *
 359      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 360      * @param imm immediate loaded into register.
 361      */
 362     public void mov(Register dst, long imm) {
 363         assert dst.getRegisterCategory().equals(CPU);
 364         if (imm == 0L) {
 365             movx(dst, zr);
 366         } else if (LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 367             or(64, dst, zr, imm);
 368         } else if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 369             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 370             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 371             // then sign extend
 372             // them. This allows us to cover immediates like ~1L with 2 instructions.
 373             mov(dst, (int) imm);
 374             sxt(64, 32, dst, dst);
 375         } else {
 376             mov64(dst, imm);
 377         }
 378     }
 379 
 380     /**
 381      * Loads immediate into register.
 382      *
 383      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 384      * @param imm immediate loaded into register.
 385      */
 386     public void mov(Register dst, int imm) {
 387         mov(dst, imm & 0xFFFF_FFFFL);
 388     }
 389 
 390     /**
 391      * Generates a 48-bit immediate move code sequence. The immediate may later be updated by
 392      * HotSpot.
 393      *
 394      * In AArch64 mode the virtual address space is 48-bits in size, so we only need three
 395      * instructions to create a patchable instruction sequence that can reach anywhere.
 396      *
 397      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 398      * @param imm
 399      */
 400     public void movNativeAddress(Register dst, long imm) {
 401         assert (imm & 0xFFFF_0000_0000_0000L) == 0;
 402         // We have to move all non zero parts of the immediate in 16-bit chunks
 403         boolean firstMove = true;
 404         for (int offset = 0; offset < 48; offset += 16) {
 405             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 406             if (firstMove) {
 407                 movz(64, dst, chunk, offset);
 408                 firstMove = false;
 409             } else {
 410                 movk(64, dst, chunk, offset);
 411             }
 412         }
 413         assert !firstMove;
 414     }
 415 
 416     /**
 417      * Generates a 32-bit immediate move code sequence. The immediate may later be updated by
 418      * HotSpot.
 419      *
 420      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 421      * @param imm
 422      */
 423     public void movNarrowAddress(Register dst, long imm) {
 424         assert (imm & 0xFFFF_FFFF_0000_0000L) == 0;
 425         movz(64, dst, (int) (imm >>> 16), 16);
 426         movk(64, dst, (int) (imm & 0xffff), 0);
 427     }
 428 
 429     /**
 430      * @return Number of instructions necessary to load immediate into register.
 431      */
 432     public static int nrInstructionsToMoveImmediate(long imm) {
 433         if (imm == 0L || LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 434             return 1;
 435         }
 436         if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 437             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 438             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 439             // then sign extend
 440             // them. This allows us to cover immediates like ~1L with 2 instructions.
 441             return 2;
 442         }
 443         int nrInstructions = 0;
 444         for (int offset = 0; offset < 64; offset += 16) {
 445             int part = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 446             if (part != 0) {
 447                 nrInstructions++;
 448             }
 449         }
 450         return nrInstructions;
 451     }
 452 
 453     /**
 454      * Loads a srcSize value from address into rt sign-extending it if necessary.
 455      *
 456      * @param targetSize size of target register in bits. Must be 32 or 64.
 457      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 458      *            targetSize.
 459      * @param rt general purpose register. May not be null or stackpointer.
 460      * @param address all addressing modes allowed. May not be null.
 461      */
 462     @Override
 463     public void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
 464         assert targetSize == 32 || targetSize == 64;
 465         assert srcSize <= targetSize;
 466         if (targetSize == srcSize) {
 467             super.ldr(srcSize, rt, address);
 468         } else {
 469             super.ldrs(targetSize, srcSize, rt, address);
 470         }
 471     }
 472 
 473     /**
 474      * Loads a srcSize value from address into rt zero-extending it if necessary.
 475      *
 476      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 477      *            targetSize.
 478      * @param rt general purpose register. May not be null or stackpointer.
 479      * @param address all addressing modes allowed. May not be null.
 480      */
 481     @Override
 482     public void ldr(int srcSize, Register rt, AArch64Address address) {
 483         super.ldr(srcSize, rt, address);
 484     }
 485 
 486     /**
 487      * Conditional move. dst = src1 if condition else src2.
 488      *
 489      * @param size register size. Has to be 32 or 64.
 490      * @param result general purpose register. May not be null or the stackpointer.
 491      * @param trueValue general purpose register. May not be null or the stackpointer.
 492      * @param falseValue general purpose register. May not be null or the stackpointer.
 493      * @param cond any condition flag. May not be null.
 494      */
 495     public void cmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag cond) {
 496         super.csel(size, result, trueValue, falseValue, cond);
 497     }
 498 
 499     /**
 500      * Conditional set. dst = 1 if condition else 0.
 501      *
 502      * @param dst general purpose register. May not be null or stackpointer.
 503      * @param condition any condition. May not be null.
 504      */
 505     public void cset(Register dst, ConditionFlag condition) {
 506         super.csinc(32, dst, zr, zr, condition.negate());
 507     }
 508 
 509     /**
 510      * dst = src1 + src2.
 511      *
 512      * @param size register size. Has to be 32 or 64.
 513      * @param dst general purpose register. May not be null.
 514      * @param src1 general purpose register. May not be null.
 515      * @param src2 general purpose register. May not be null or stackpointer.
 516      */
 517     public void add(int size, Register dst, Register src1, Register src2) {
 518         if (dst.equals(sp) || src1.equals(sp)) {
 519             super.add(size, dst, src1, src2, ExtendType.UXTX, 0);
 520         } else {
 521             super.add(size, dst, src1, src2, ShiftType.LSL, 0);
 522         }
 523     }
 524 
 525     /**
 526      * dst = src1 + src2 and sets condition flags.
 527      *
 528      * @param size register size. Has to be 32 or 64.
 529      * @param dst general purpose register. May not be null.
 530      * @param src1 general purpose register. May not be null.
 531      * @param src2 general purpose register. May not be null or stackpointer.
 532      */
 533     public void adds(int size, Register dst, Register src1, Register src2) {
 534         if (dst.equals(sp) || src1.equals(sp)) {
 535             super.adds(size, dst, src1, src2, ExtendType.UXTX, 0);
 536         } else {
 537             super.adds(size, dst, src1, src2, ShiftType.LSL, 0);
 538         }
 539     }
 540 
 541     /**
 542      * dst = src1 - src2 and sets condition flags.
 543      *
 544      * @param size register size. Has to be 32 or 64.
 545      * @param dst general purpose register. May not be null.
 546      * @param src1 general purpose register. May not be null.
 547      * @param src2 general purpose register. May not be null or stackpointer.
 548      */
 549     public void subs(int size, Register dst, Register src1, Register src2) {
 550         if (dst.equals(sp) || src1.equals(sp)) {
 551             super.subs(size, dst, src1, src2, ExtendType.UXTX, 0);
 552         } else {
 553             super.subs(size, dst, src1, src2, ShiftType.LSL, 0);
 554         }
 555     }
 556 
 557     /**
 558      * dst = src1 - src2.
 559      *
 560      * @param size register size. Has to be 32 or 64.
 561      * @param dst general purpose register. May not be null.
 562      * @param src1 general purpose register. May not be null.
 563      * @param src2 general purpose register. May not be null or stackpointer.
 564      */
 565     public void sub(int size, Register dst, Register src1, Register src2) {
 566         if (dst.equals(sp) || src1.equals(sp)) {
 567             super.sub(size, dst, src1, src2, ExtendType.UXTX, 0);
 568         } else {
 569             super.sub(size, dst, src1, src2, ShiftType.LSL, 0);
 570         }
 571     }
 572 
 573     /**
 574      * dst = src1 + shiftType(src2, shiftAmt & (size - 1)).
 575      *
 576      * @param size register size. Has to be 32 or 64.
 577      * @param dst general purpose register. May not be null or stackpointer.
 578      * @param src1 general purpose register. May not be null or stackpointer.
 579      * @param src2 general purpose register. May not be null or stackpointer.
 580      * @param shiftType any type but ROR.
 581      * @param shiftAmt arbitrary shift amount.
 582      */
 583     @Override
 584     public void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 585         int shift = clampShiftAmt(size, shiftAmt);
 586         super.add(size, dst, src1, src2, shiftType, shift);
 587     }
 588 
 589     /**
 590      * dst = src1 + shiftType(src2, shiftAmt & (size-1)) and sets condition flags.
 591      *
 592      * @param size register size. Has to be 32 or 64.
 593      * @param dst general purpose register. May not be null or stackpointer.
 594      * @param src1 general purpose register. May not be null or stackpointer.
 595      * @param src2 general purpose register. May not be null or stackpointer.
 596      * @param shiftType any type but ROR.
 597      * @param shiftAmt arbitrary shift amount.
 598      */
 599     @Override
 600     public void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 601         int shift = clampShiftAmt(size, shiftAmt);
 602         super.sub(size, dst, src1, src2, shiftType, shift);
 603     }
 604 
 605     /**
 606      * dst = -src1.
 607      *
 608      * @param size register size. Has to be 32 or 64.
 609      * @param dst general purpose register. May not be null or stackpointer.
 610      * @param src general purpose register. May not be null or stackpointer.
 611      */
 612     public void neg(int size, Register dst, Register src) {
 613         sub(size, dst, zr, src);
 614     }
 615 
 616     /**
 617      * dst = src + immediate.
 618      *
 619      * @param size register size. Has to be 32 or 64.
 620      * @param dst general purpose register. May not be null or zero-register.
 621      * @param src general purpose register. May not be null or zero-register.
 622      * @param immediate 32-bit signed int
 623      */
 624     @Override
 625     public void add(int size, Register dst, Register src, int immediate) {
 626         assert (!dst.equals(zr) && !src.equals(zr));
 627         if (immediate < 0) {
 628             sub(size, dst, src, -immediate);
 629         } else if (isAimm(immediate)) {
 630             if (!(dst.equals(src) && immediate == 0)) {
 631                 super.add(size, dst, src, immediate);
 632             }
 633         } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
 634             super.add(size, dst, src, immediate & -(1 << 12));
 635             super.add(size, dst, dst, immediate & ((1 << 12) - 1));
 636         } else {
 637             assert !dst.equals(src);
 638             mov(dst, immediate);
 639             add(size, src, dst, dst);
 640         }
 641     }
 642 
 643     /**
 644      * dst = src + aimm and sets condition flags.
 645      *
 646      * @param size register size. Has to be 32 or 64.
 647      * @param dst general purpose register. May not be null or stackpointer.
 648      * @param src general purpose register. May not be null or zero-register.
 649      * @param immediate arithmetic immediate.
 650      */
 651     @Override
 652     public void adds(int size, Register dst, Register src, int immediate) {
 653         assert (!dst.equals(sp) && !src.equals(zr));
 654         if (immediate < 0) {
 655             subs(size, dst, src, -immediate);
 656         } else if (!(dst.equals(src) && immediate == 0)) {
 657             super.adds(size, dst, src, immediate);
 658         }
 659     }
 660 
 661     /**
 662      * dst = src - immediate.
 663      *
 664      * @param size register size. Has to be 32 or 64.
 665      * @param dst general purpose register. May not be null or zero-register.
 666      * @param src general purpose register. May not be null or zero-register.
 667      * @param immediate 32-bit signed int
 668      */
 669     @Override
 670     public void sub(int size, Register dst, Register src, int immediate) {
 671         assert (!dst.equals(zr) && !src.equals(zr));
 672         if (immediate < 0) {
 673             add(size, dst, src, -immediate);
 674         } else if (isAimm(immediate)) {
 675             if (!(dst.equals(src) && immediate == 0)) {
 676                 super.sub(size, dst, src, immediate);
 677             }
 678         } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
 679             super.sub(size, dst, src, immediate & -(1 << 12));
 680             super.sub(size, dst, dst, immediate & ((1 << 12) - 1));
 681         } else {
 682             assert !dst.equals(src);
 683             mov(dst, immediate);
 684             sub(size, src, dst, dst);
 685         }
 686     }
 687 
 688     /**
 689      * dst = src - aimm and sets condition flags.
 690      *
 691      * @param size register size. Has to be 32 or 64.
 692      * @param dst general purpose register. May not be null or stackpointer.
 693      * @param src general purpose register. May not be null or zero-register.
 694      * @param immediate arithmetic immediate.
 695      */
 696     @Override
 697     public void subs(int size, Register dst, Register src, int immediate) {
 698         assert (!dst.equals(sp) && !src.equals(zr));
 699         if (immediate < 0) {
 700             adds(size, dst, src, -immediate);
 701         } else if (!dst.equals(src) || immediate != 0) {
 702             super.subs(size, dst, src, immediate);
 703         }
 704     }
 705 
 706     /**
 707      * dst = src1 * src2.
 708      *
 709      * @param size register size. Has to be 32 or 64.
 710      * @param dst general purpose register. May not be null or the stackpointer.
 711      * @param src1 general purpose register. May not be null or the stackpointer.
 712      * @param src2 general purpose register. May not be null or the stackpointer.
 713      */
 714     public void mul(int size, Register dst, Register src1, Register src2) {
 715         super.madd(size, dst, src1, src2, zr);
 716     }
 717 
 718     /**
 719      * unsigned multiply high. dst = (src1 * src2) >> size
 720      *
 721      * @param size register size. Has to be 32 or 64.
 722      * @param dst general purpose register. May not be null or the stackpointer.
 723      * @param src1 general purpose register. May not be null or the stackpointer.
 724      * @param src2 general purpose register. May not be null or the stackpointer.
 725      */
 726     public void umulh(int size, Register dst, Register src1, Register src2) {
 727         assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
 728         assert size == 32 || size == 64;
 729         if (size == 64) {
 730             super.umulh(dst, src1, src2);
 731         } else {
 732             // xDst = wSrc1 * wSrc2
 733             super.umaddl(dst, src1, src2, zr);
 734             // xDst = xDst >> 32
 735             lshr(64, dst, dst, 32);
 736         }
 737     }
 738 
 739     /**
 740      * signed multiply high. dst = (src1 * src2) >> size
 741      *
 742      * @param size register size. Has to be 32 or 64.
 743      * @param dst general purpose register. May not be null or the stackpointer.
 744      * @param src1 general purpose register. May not be null or the stackpointer.
 745      * @param src2 general purpose register. May not be null or the stackpointer.
 746      */
 747     public void smulh(int size, Register dst, Register src1, Register src2) {
 748         assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
 749         assert size == 32 || size == 64;
 750         if (size == 64) {
 751             super.smulh(dst, src1, src2);
 752         } else {
 753             // xDst = wSrc1 * wSrc2
 754             super.smaddl(dst, src1, src2, zr);
 755             // xDst = xDst >> 32
 756             lshr(64, dst, dst, 32);
 757         }
 758     }
 759 
 760     /**
 761      * dst = src1 % src2. Signed.
 762      *
 763      * @param size register size. Has to be 32 or 64.
 764      * @param dst general purpose register. May not be null or the stackpointer.
 765      * @param n numerator. General purpose register. May not be null or the stackpointer.
 766      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 767      */
 768     public void rem(int size, Register dst, Register n, Register d) {
 769         assert (!dst.equals(sp) && !n.equals(sp) && !d.equals(sp));
 770         // There is no irem or similar instruction. Instead we use the relation:
 771         // n % d = n - Floor(n / d) * d if nd >= 0
 772         // n % d = n - Ceil(n / d) * d else
 773         // Which is equivalent to n - TruncatingDivision(n, d) * d
 774         super.sdiv(size, dst, n, d);
 775         super.msub(size, dst, dst, d, n);
 776     }
 777 
 778     /**
 779      * dst = src1 % src2. Unsigned.
 780      *
 781      * @param size register size. Has to be 32 or 64.
 782      * @param dst general purpose register. May not be null or the stackpointer.
 783      * @param n numerator. General purpose register. May not be null or the stackpointer.
 784      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 785      */
 786     public void urem(int size, Register dst, Register n, Register d) {
 787         // There is no irem or similar instruction. Instead we use the relation:
 788         // n % d = n - Floor(n / d) * d
 789         // Which is equivalent to n - TruncatingDivision(n, d) * d
 790         super.udiv(size, dst, n, d);
 791         super.msub(size, dst, dst, d, n);
 792     }
 793 
 794     /**
 795      * Add/subtract instruction encoding supports 12-bit immediate values.
 796      *
 797      * @param imm immediate value to be tested.
 798      * @return true if immediate can be used directly for arithmetic instructions (add/sub), false
 799      *         otherwise.
 800      */
 801     public static boolean isArithmeticImmediate(long imm) {
 802         // If we have a negative immediate we just use the opposite operator. I.e.: x - (-5) == x +
 803         // 5.
 804         return NumUtil.isInt(Math.abs(imm)) && isAimm((int) Math.abs(imm));
 805     }
 806 
 807     /**
 808      * Compare instructions are add/subtract instructions and so support 12-bit immediate values.
 809      *
 810      * @param imm immediate value to be tested.
 811      * @return true if immediate can be used directly with comparison instructions, false otherwise.
 812      */
 813     public static boolean isComparisonImmediate(long imm) {
 814         return isArithmeticImmediate(imm);
 815     }
 816 
 817     /**
 818      * Move wide immediate instruction encoding supports 16-bit immediate values which can be
 819      * optionally-shifted by multiples of 16 (i.e. 0, 16, 32, 48).
 820      *
 821      * @return true if immediate can be moved directly into a register, false otherwise.
 822      */
 823     public static boolean isMovableImmediate(long imm) {
 824         // // Positions of first, respectively last set bit.
 825         // int start = Long.numberOfTrailingZeros(imm);
 826         // int end = 64 - Long.numberOfLeadingZeros(imm);
 827         // int length = end - start;
 828         // if (length > 16) {
 829         // return false;
 830         // }
 831         // // We can shift the necessary part of the immediate (i.e. everything between the first
 832         // and
 833         // // last set bit) by as much as 16 - length around to arrive at a valid shift amount
 834         // int tolerance = 16 - length;
 835         // int prevMultiple = NumUtil.roundDown(start, 16);
 836         // int nextMultiple = NumUtil.roundUp(start, 16);
 837         // return start - prevMultiple <= tolerance || nextMultiple - start <= tolerance;
 838         /*
 839          * This is a bit optimistic because the constant could also be for an arithmetic instruction
 840          * which only supports 12-bits. That case needs to be handled in the backend.
 841          */
 842         return NumUtil.isInt(Math.abs(imm)) && NumUtil.isUnsignedNbit(16, (int) Math.abs(imm));
 843     }
 844 
 845     /**
 846      * dst = src << (shiftAmt & (size - 1)).
 847      *
 848      * @param size register size. Has to be 32 or 64.
 849      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 850      * @param src general purpose register. May not be null, stackpointer or zero-register.
 851      * @param shiftAmt amount by which src is shifted.
 852      */
 853     public void shl(int size, Register dst, Register src, long shiftAmt) {
 854         int shift = clampShiftAmt(size, shiftAmt);
 855         super.ubfm(size, dst, src, (size - shift) & (size - 1), size - 1 - shift);
 856     }
 857 
 858     /**
 859      * dst = src1 << (src2 & (size - 1)).
 860      *
 861      * @param size register size. Has to be 32 or 64.
 862      * @param dst general purpose register. May not be null or stackpointer.
 863      * @param src general purpose register. May not be null or stackpointer.
 864      * @param shift general purpose register. May not be null or stackpointer.
 865      */
 866     public void shl(int size, Register dst, Register src, Register shift) {
 867         super.lsl(size, dst, src, shift);
 868     }
 869 
 870     /**
 871      * dst = src >>> (shiftAmt & (size - 1)).
 872      *
 873      * @param size register size. Has to be 32 or 64.
 874      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 875      * @param src general purpose register. May not be null, stackpointer or zero-register.
 876      * @param shiftAmt amount by which src is shifted.
 877      */
 878     public void lshr(int size, Register dst, Register src, long shiftAmt) {
 879         int shift = clampShiftAmt(size, shiftAmt);
 880         super.ubfm(size, dst, src, shift, size - 1);
 881     }
 882 
 883     /**
 884      * dst = src1 >>> (src2 & (size - 1)).
 885      *
 886      * @param size register size. Has to be 32 or 64.
 887      * @param dst general purpose register. May not be null or stackpointer.
 888      * @param src general purpose register. May not be null or stackpointer.
 889      * @param shift general purpose register. May not be null or stackpointer.
 890      */
 891     public void lshr(int size, Register dst, Register src, Register shift) {
 892         super.lsr(size, dst, src, shift);
 893     }
 894 
 895     /**
 896      * dst = src >> (shiftAmt & log2(size)).
 897      *
 898      * @param size register size. Has to be 32 or 64.
 899      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 900      * @param src general purpose register. May not be null, stackpointer or zero-register.
 901      * @param shiftAmt amount by which src is shifted.
 902      */
 903     public void ashr(int size, Register dst, Register src, long shiftAmt) {
 904         int shift = clampShiftAmt(size, shiftAmt);
 905         super.sbfm(size, dst, src, shift, size - 1);
 906     }
 907 
 908     /**
 909      * dst = src1 >> (src2 & log2(size)).
 910      *
 911      * @param size register size. Has to be 32 or 64.
 912      * @param dst general purpose register. May not be null or stackpointer.
 913      * @param src general purpose register. May not be null or stackpointer.
 914      * @param shift general purpose register. May not be null or stackpointer.
 915      */
 916     public void ashr(int size, Register dst, Register src, Register shift) {
 917         super.asr(size, dst, src, shift);
 918     }
 919 
 920     /**
 921      * Clamps shiftAmt into range 0 <= shiftamt < size according to JLS.
 922      *
 923      * @param size size of operation.
 924      * @param shiftAmt arbitrary shift amount.
 925      * @return value between 0 and size - 1 inclusive that is equivalent to shiftAmt according to
 926      *         JLS.
 927      */
 928     private static int clampShiftAmt(int size, long shiftAmt) {
 929         return (int) (shiftAmt & (size - 1));
 930     }
 931 
 932     /**
 933      * dst = src1 & src2.
 934      *
 935      * @param size register size. Has to be 32 or 64.
 936      * @param dst general purpose register. May not be null or stackpointer.
 937      * @param src1 general purpose register. May not be null or stackpointer.
 938      * @param src2 general purpose register. May not be null or stackpointer.
 939      */
 940     public void and(int size, Register dst, Register src1, Register src2) {
 941         super.and(size, dst, src1, src2, ShiftType.LSL, 0);
 942     }
 943 
 944     /**
 945      * dst = src1 ^ src2.
 946      *
 947      * @param size register size. Has to be 32 or 64.
 948      * @param dst general purpose register. May not be null or stackpointer.
 949      * @param src1 general purpose register. May not be null or stackpointer.
 950      * @param src2 general purpose register. May not be null or stackpointer.
 951      */
 952     public void eor(int size, Register dst, Register src1, Register src2) {
 953         super.eor(size, dst, src1, src2, ShiftType.LSL, 0);
 954     }
 955 
 956     /**
 957      * dst = src1 | src2.
 958      *
 959      * @param size register size. Has to be 32 or 64.
 960      * @param dst general purpose register. May not be null or stackpointer.
 961      * @param src1 general purpose register. May not be null or stackpointer.
 962      * @param src2 general purpose register. May not be null or stackpointer.
 963      */
 964     public void or(int size, Register dst, Register src1, Register src2) {
 965         super.orr(size, dst, src1, src2, ShiftType.LSL, 0);
 966     }
 967 
 968     /**
 969      * dst = src | bimm.
 970      *
 971      * @param size register size. Has to be 32 or 64.
 972      * @param dst general purpose register. May not be null or zero-register.
 973      * @param src general purpose register. May not be null or stack-pointer.
 974      * @param bimm logical immediate. See {@link AArch64Assembler.LogicalImmediateTable} for exact
 975      *            definition.
 976      */
 977     public void or(int size, Register dst, Register src, long bimm) {
 978         super.orr(size, dst, src, bimm);
 979     }
 980 
 981     /**
 982      * dst = ~src.
 983      *
 984      * @param size register size. Has to be 32 or 64.
 985      * @param dst general purpose register. May not be null or stackpointer.
 986      * @param src general purpose register. May not be null or stackpointer.
 987      */
 988     public void not(int size, Register dst, Register src) {
 989         super.orn(size, dst, zr, src, ShiftType.LSL, 0);
 990     }
 991 
 992     /**
 993      * Sign-extend value from src into dst.
 994      *
 995      * @param destSize destination register size. Must be 32 or 64.
 996      * @param srcSize source register size. Must be smaller than destSize.
 997      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 998      * @param src general purpose register. May not be null, stackpointer or zero-register.
 999      */
1000     public void sxt(int destSize, int srcSize, Register dst, Register src) {
1001         assert (srcSize < destSize && srcSize > 0);
1002         super.sbfm(destSize, dst, src, 0, srcSize - 1);
1003     }
1004 
1005     /**
1006      * dst = src if condition else -src.
1007      *
1008      * @param size register size. Must be 32 or 64.
1009      * @param dst general purpose register. May not be null or the stackpointer.
1010      * @param src general purpose register. May not be null or the stackpointer.
1011      * @param condition any condition except AV or NV. May not be null.
1012      */
1013     public void csneg(int size, Register dst, Register src, ConditionFlag condition) {
1014         super.csneg(size, dst, src, src, condition.negate());
1015     }
1016 
1017     /**
1018      * @return True if the immediate can be used directly for logical 64-bit instructions.
1019      */
1020     public static boolean isLogicalImmediate(long imm) {
1021         return LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO;
1022     }
1023 
1024     /**
1025      * @return True if the immediate can be used directly for logical 32-bit instructions.
1026      */
1027     public static boolean isLogicalImmediate(int imm) {
1028         return LogicalImmediateTable.isRepresentable(imm) == LogicalImmediateTable.Representable.YES;
1029     }
1030 
1031     /* Float instructions */
1032 
1033     /**
1034      * Moves integer to float, float to integer, or float to float. Does not support integer to
1035      * integer moves.
1036      *
1037      * @param size register size. Has to be 32 or 64.
1038      * @param dst Either floating-point or general-purpose register. If general-purpose register may
1039      *            not be stackpointer or zero register. Cannot be null in any case.
1040      * @param src Either floating-point or general-purpose register. If general-purpose register may
1041      *            not be stackpointer. Cannot be null in any case.
1042      */
1043     @Override
1044     public void fmov(int size, Register dst, Register src) {
1045         assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU)) : "src and dst cannot both be integer registers.";
1046         if (dst.getRegisterCategory().equals(CPU)) {
1047             super.fmovFpu2Cpu(size, dst, src);
1048         } else if (src.getRegisterCategory().equals(CPU)) {
1049             super.fmovCpu2Fpu(size, dst, src);
1050         } else {
1051             super.fmov(size, dst, src);
1052         }
1053     }
1054 
1055     /**
1056      *
1057      * @param size register size. Has to be 32 or 64.
1058      * @param dst floating point register. May not be null.
1059      * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
1060      *            loaded, i.e. (float) imm == imm must be true. In all cases
1061      *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
1062      *            depending on size.
1063      */
1064     @Override
1065     public void fmov(int size, Register dst, double imm) {
1066         if (imm == 0.0) {
1067             assert Double.doubleToRawLongBits(imm) == 0L : "-0.0 is no valid immediate.";
1068             super.fmovCpu2Fpu(size, dst, zr);
1069         } else {
1070             super.fmov(size, dst, imm);
1071         }
1072     }
1073 
1074     /**
1075      *
1076      * @return true if immediate can be loaded directly into floating-point register, false
1077      *         otherwise.
1078      */
1079     public static boolean isDoubleImmediate(double imm) {
1080         return Double.doubleToRawLongBits(imm) == 0L || AArch64Assembler.isDoubleImmediate(imm);
1081     }
1082 
1083     /**
1084      *
1085      * @return true if immediate can be loaded directly into floating-point register, false
1086      *         otherwise.
1087      */
1088     public static boolean isFloatImmediate(float imm) {
1089         return Float.floatToRawIntBits(imm) == 0 || AArch64Assembler.isFloatImmediate(imm);
1090     }
1091 
1092     /**
1093      * Conditional move. dst = src1 if condition else src2.
1094      *
1095      * @param size register size.
1096      * @param result floating point register. May not be null.
1097      * @param trueValue floating point register. May not be null.
1098      * @param falseValue floating point register. May not be null.
1099      * @param condition every condition allowed. May not be null.
1100      */
1101     public void fcmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag condition) {
1102         super.fcsel(size, result, trueValue, falseValue, condition);
1103     }
1104 
1105     /**
1106      * dst = src1 % src2.
1107      *
1108      * @param size register size. Has to be 32 or 64.
1109      * @param dst floating-point register. May not be null.
1110      * @param n numerator. Floating-point register. May not be null.
1111      * @param d denominator. Floating-point register. May not be null.
1112      */
1113     public void frem(int size, Register dst, Register n, Register d) {
1114         // There is no frem instruction, instead we compute the remainder using the relation:
1115         // rem = n - Truncating(n / d) * d
1116         super.fdiv(size, dst, n, d);
1117         super.frintz(size, dst, dst);
1118         super.fmsub(size, dst, dst, d, n);
1119     }
1120 
1121     /* Branches */
1122 
1123     /**
1124      * Compares x and y and sets condition flags.
1125      *
1126      * @param size register size. Has to be 32 or 64.
1127      * @param x general purpose register. May not be null or stackpointer.
1128      * @param y general purpose register. May not be null or stackpointer.
1129      */
1130     public void cmp(int size, Register x, Register y) {
1131         assert size == 32 || size == 64;
1132         super.subs(size, zr, x, y, ShiftType.LSL, 0);
1133     }
1134 
1135     /**
1136      * Compares x to y and sets condition flags.
1137      *
1138      * @param size register size. Has to be 32 or 64.
1139      * @param x general purpose register. May not be null or stackpointer.
1140      * @param y comparison immediate, {@link #isComparisonImmediate(long)} has to be true for it.
1141      */
1142     public void cmp(int size, Register x, int y) {
1143         assert size == 32 || size == 64;
1144         if (y < 0) {
1145             super.adds(size, zr, x, -y);
1146         } else {
1147             super.subs(size, zr, x, y);
1148         }
1149     }
1150 
1151     /**
1152      * Sets condition flags according to result of x & y.
1153      *
1154      * @param size register size. Has to be 32 or 64.
1155      * @param dst general purpose register. May not be null or stack-pointer.
1156      * @param x general purpose register. May not be null or stackpointer.
1157      * @param y general purpose register. May not be null or stackpointer.
1158      */
1159     public void ands(int size, Register dst, Register x, Register y) {
1160         super.ands(size, dst, x, y, ShiftType.LSL, 0);
1161     }
1162 
1163     /**
1164      * Sets overflow flag according to result of x * y.
1165      *
1166      * @param size register size. Has to be 32 or 64.
1167      * @param dst general purpose register. May not be null or stack-pointer.
1168      * @param x general purpose register. May not be null or stackpointer.
1169      * @param y general purpose register. May not be null or stackpointer.
1170      */
1171     public void mulvs(int size, Register dst, Register x, Register y) {
1172         try (ScratchRegister sc1 = getScratchRegister();
1173                         ScratchRegister sc2 = getScratchRegister()) {
1174             switch (size) {
1175                 case 64: {
1176                     // Be careful with registers: it's possible that x, y, and dst are the same
1177                     // register.
1178                     Register rscratch1 = sc1.getRegister();
1179                     Register rscratch2 = sc2.getRegister();
1180                     mul(64, rscratch1, x, y);     // Result bits 0..63
1181                     smulh(64, rscratch2, x, y);  // Result bits 64..127
1182                     // Top is pure sign ext
1183                     subs(64, zr, rscratch2, rscratch1, ShiftType.ASR, 63);
1184                     // Copy all 64 bits of the result into dst
1185                     mov(64, dst, rscratch1);
1186                     mov(rscratch1, 0x80000000);
1187                     // Develop 0 (EQ), or 0x80000000 (NE)
1188                     cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
1189                     cmp(32, rscratch1, 1);
1190                     // 0x80000000 - 1 => VS
1191                     break;
1192                 }
1193                 case 32: {
1194                     Register rscratch1 = sc1.getRegister();
1195                     smaddl(rscratch1, x, y, zr);
1196                     // Copy the low 32 bits of the result into dst
1197                     mov(32, dst, rscratch1);
1198                     subs(64, zr, rscratch1, rscratch1, ExtendType.SXTW, 0);
1199                     // NE => overflow
1200                     mov(rscratch1, 0x80000000);
1201                     // Develop 0 (EQ), or 0x80000000 (NE)
1202                     cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
1203                     cmp(32, rscratch1, 1);
1204                     // 0x80000000 - 1 => VS
1205                     break;
1206                 }
1207             }
1208         }
1209     }
1210 
1211     /**
1212      * When patching up Labels we have to know what kind of code to generate.
1213      */
1214     public enum PatchLabelKind {
1215         BRANCH_CONDITIONALLY(0x0),
1216         BRANCH_UNCONDITIONALLY(0x1),
1217         BRANCH_NONZERO(0x2),
1218         BRANCH_ZERO(0x3),
1219         JUMP_ADDRESS(0x4),
1220         ADR(0x5);
1221 
1222         /**
1223          * Offset by which additional information for branch conditionally, branch zero and branch
1224          * non zero has to be shifted.
1225          */
1226         public static final int INFORMATION_OFFSET = 5;
1227 
1228         public final int encoding;
1229 
1230         PatchLabelKind(int encoding) {
1231             this.encoding = encoding;
1232         }
1233 
1234         /**
1235          * @return PatchLabelKind with given encoding.
1236          */
1237         private static PatchLabelKind fromEncoding(int encoding) {
1238             return values()[encoding & NumUtil.getNbitNumberInt(INFORMATION_OFFSET)];
1239         }
1240 
1241     }
1242 
1243     public void adr(Register dst, Label label) {
1244         // TODO Handle case where offset is too large for a single jump instruction
1245         if (label.isBound()) {
1246             int offset = label.position() - position();
1247             super.adr(dst, offset);
1248         } else {
1249             label.addPatchAt(position());
1250             // Encode condition flag so that we know how to patch the instruction later
1251             emitInt(PatchLabelKind.ADR.encoding | dst.encoding << PatchLabelKind.INFORMATION_OFFSET);
1252         }
1253     }
1254 
1255     /**
1256      * Compare register and branch if non-zero.
1257      *
1258      * @param size Instruction size in bits. Should be either 32 or 64.
1259      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1260      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1261      */
1262     public void cbnz(int size, Register cmp, Label label) {
1263         // TODO Handle case where offset is too large for a single jump instruction
1264         if (label.isBound()) {
1265             int offset = label.position() - position();
1266             super.cbnz(size, cmp, offset);
1267         } else {
1268             label.addPatchAt(position());
1269             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1270             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1271             // Encode condition flag so that we know how to patch the instruction later
1272             emitInt(PatchLabelKind.BRANCH_NONZERO.encoding | regEncoding | sizeEncoding);
1273         }
1274     }
1275 
1276     /**
1277      * Compare register and branch if zero.
1278      *
1279      * @param size Instruction size in bits. Should be either 32 or 64.
1280      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1281      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1282      */
1283     public void cbz(int size, Register cmp, Label label) {
1284         // TODO Handle case where offset is too large for a single jump instruction
1285         if (label.isBound()) {
1286             int offset = label.position() - position();
1287             super.cbz(size, cmp, offset);
1288         } else {
1289             label.addPatchAt(position());
1290             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1291             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1292             // Encode condition flag so that we know how to patch the instruction later
1293             emitInt(PatchLabelKind.BRANCH_ZERO.encoding | regEncoding | sizeEncoding);
1294         }
1295     }
1296 
1297     /**
1298      * Branches to label if condition is true.
1299      *
1300      * @param condition any condition value allowed. Non null.
1301      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1302      */
1303     public void branchConditionally(ConditionFlag condition, Label label) {
1304         // TODO Handle case where offset is too large for a single jump instruction
1305         if (label.isBound()) {
1306             int offset = label.position() - position();
1307             super.b(condition, offset);
1308         } else {
1309             label.addPatchAt(position());
1310             // Encode condition flag so that we know how to patch the instruction later
1311             emitInt(PatchLabelKind.BRANCH_CONDITIONALLY.encoding | condition.encoding << PatchLabelKind.INFORMATION_OFFSET);
1312         }
1313     }
1314 
1315     /**
1316      * Branches if condition is true. Address of jump is patched up by HotSpot c++ code.
1317      *
1318      * @param condition any condition value allowed. Non null.
1319      */
1320     public void branchConditionally(ConditionFlag condition) {
1321         // Correct offset is fixed up by HotSpot later.
1322         super.b(condition, 0);
1323     }
1324 
1325     /**
1326      * Jumps to label.
1327      *
1328      * param label Can only handle signed 28-bit offsets. May be unbound. Non null.
1329      */
1330     @Override
1331     public void jmp(Label label) {
1332         // TODO Handle case where offset is too large for a single jump instruction
1333         if (label.isBound()) {
1334             int offset = label.position() - position();
1335             super.b(offset);
1336         } else {
1337             label.addPatchAt(position());
1338             emitInt(PatchLabelKind.BRANCH_UNCONDITIONALLY.encoding);
1339         }
1340     }
1341 
1342     /**
1343      * Jump to address in dest.
1344      *
1345      * @param dest General purpose register. May not be null, zero-register or stackpointer.
1346      */
1347     public void jmp(Register dest) {
1348         super.br(dest);
1349     }
1350 
1351     /**
1352      * Immediate jump instruction fixed up by HotSpot c++ code.
1353      */
1354     public void jmp() {
1355         // Offset has to be fixed up by c++ code.
1356         super.b(0);
1357     }
1358 
1359     /**
1360      *
1361      * @return true if immediate offset can be used in a single branch instruction.
1362      */
1363     public static boolean isBranchImmediateOffset(long imm) {
1364         return NumUtil.isSignedNbit(28, imm);
1365     }
1366 
1367     /* system instructions */
1368 
1369     /**
1370      * Exception codes used when calling hlt instruction.
1371      */
1372     public enum AArch64ExceptionCode {
1373         NO_SWITCH_TARGET(0x0),
1374         BREAKPOINT(0x1);
1375 
1376         public final int encoding;
1377 
1378         AArch64ExceptionCode(int encoding) {
1379             this.encoding = encoding;
1380         }
1381     }
1382 
1383     /**
1384      * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
1385      * UNALLOCATED instruction.
1386      *
1387      * @param exceptionCode exception code specifying why halt was called. Non null.
1388      */
1389     public void hlt(AArch64ExceptionCode exceptionCode) {
1390         super.hlt(exceptionCode.encoding);
1391     }
1392 
1393     /**
1394      * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
1395      * exception level.
1396      *
1397      * @param exceptionCode exception code specifying why break was called. Non null.
1398      */
1399     public void brk(AArch64ExceptionCode exceptionCode) {
1400         super.brk(exceptionCode.encoding);
1401     }
1402 
1403     public void pause() {
1404         throw GraalError.unimplemented();
1405     }
1406 
1407     /**
1408      * Executes no-op instruction. No registers or flags are updated, except for PC.
1409      */
1410     public void nop() {
1411         super.hint(SystemHint.NOP);
1412     }
1413 
1414     /**
1415      * Same as {@link #nop()}.
1416      */
1417     @Override
1418     public void ensureUniquePC() {
1419         nop();
1420     }
1421 
1422     /**
1423      * Aligns PC.
1424      *
1425      * @param modulus Has to be positive multiple of 4.
1426      */
1427     @Override
1428     public void align(int modulus) {
1429         assert modulus > 0 && (modulus & 0x3) == 0 : "Modulus has to be a positive multiple of 4.";
1430         if (position() % modulus == 0) {
1431             return;
1432         }
1433         int offset = modulus - position() % modulus;
1434         for (int i = 0; i < offset; i += 4) {
1435             nop();
1436         }
1437     }
1438 
1439     /**
1440      * Patches jump targets when label gets bound.
1441      */
1442     @Override
1443     protected void patchJumpTarget(int branch, int jumpTarget) {
1444         int instruction = getInt(branch);
1445         int branchOffset = jumpTarget - branch;
1446         PatchLabelKind type = PatchLabelKind.fromEncoding(instruction);
1447         switch (type) {
1448             case BRANCH_CONDITIONALLY:
1449                 ConditionFlag cf = ConditionFlag.fromEncoding(instruction >>> PatchLabelKind.INFORMATION_OFFSET);
1450                 super.b(cf, branchOffset, branch);
1451                 break;
1452             case BRANCH_UNCONDITIONALLY:
1453                 super.b(branchOffset, branch);
1454                 break;
1455             case JUMP_ADDRESS:
1456                 int offset = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1457                 emitInt(jumpTarget - offset, branch);
1458                 break;
1459             case BRANCH_NONZERO:
1460             case BRANCH_ZERO: {
1461                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1462                 int sizeEncoding = information & 1;
1463                 int regEncoding = information >>> 1;
1464                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1465                 // 1 => 64; 0 => 32
1466                 int size = sizeEncoding * 32 + 32;
1467                 switch (type) {
1468                     case BRANCH_NONZERO:
1469                         super.cbnz(size, reg, branchOffset, branch);
1470                         break;
1471                     case BRANCH_ZERO:
1472                         super.cbz(size, reg, branchOffset, branch);
1473                         break;
1474                 }
1475                 break;
1476             }
1477             case ADR: {
1478                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1479                 int regEncoding = information;
1480                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1481                 super.adr(reg, branchOffset, branch);
1482                 break;
1483             }
1484             default:
1485                 throw GraalError.shouldNotReachHere();
1486         }
1487     }
1488 
1489     /**
1490      * Generates an address of the form {@code base + displacement}.
1491      *
1492      * Does not change base register to fulfill this requirement. Will fail if displacement cannot
1493      * be represented directly as address.
1494      *
1495      * @param base general purpose register. May not be null or the zero register.
1496      * @param displacement arbitrary displacement added to base.
1497      * @return AArch64Address referencing memory at {@code base + displacement}.
1498      */
1499     @Override
1500     public AArch64Address makeAddress(Register base, int displacement) {
1501         return makeAddress(base, displacement, zr, /* signExtend */false, /* transferSize */0, zr, /* allowOverwrite */false);
1502     }
1503 
1504     @Override
1505     public AbstractAddress getPlaceholder(int instructionStartPosition) {
1506         return AArch64Address.PLACEHOLDER;
1507     }
1508 
1509     /**
1510      * Loads an address into Register d.
1511      *
1512      * @param d general purpose register. May not be null.
1513      * @param a AArch64Address the address of an operand.
1514      */
1515     public void lea(Register d, AArch64Address a) {
1516         a.lea(this, d);
1517     }
1518 }