1 /*
   2  * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 
  26 package org.graalvm.compiler.asm.aarch64;
  27 
  28 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.BASE_REGISTER_ONLY;
  29 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.EXTENDED_REGISTER_OFFSET;
  30 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SCALED;
  31 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_UNSCALED;
  32 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.REGISTER_OFFSET;
  33 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_BASE;
  34 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_INDEX;
  35 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.NO_WORK;
  36 import static jdk.vm.ci.aarch64.AArch64.CPU;
  37 import static jdk.vm.ci.aarch64.AArch64.r8;
  38 import static jdk.vm.ci.aarch64.AArch64.r9;
  39 import static jdk.vm.ci.aarch64.AArch64.sp;
  40 import static jdk.vm.ci.aarch64.AArch64.zr;
  41 
  42 import org.graalvm.compiler.asm.Label;
  43 import org.graalvm.compiler.core.common.NumUtil;
  44 import org.graalvm.compiler.debug.GraalError;
  45 
  46 import jdk.vm.ci.aarch64.AArch64;
  47 import jdk.vm.ci.code.Register;
  48 import jdk.vm.ci.code.TargetDescription;
  49 
  50 public class AArch64MacroAssembler extends AArch64Assembler {
  51 
  52     private final ScratchRegister[] scratchRegister = new ScratchRegister[]{new ScratchRegister(r8), new ScratchRegister(r9)};
  53 
  54     // Points to the next free scratch register
  55     private int nextFreeScratchRegister = 0;
  56 
  57     public AArch64MacroAssembler(TargetDescription target) {
  58         super(target);
  59     }
  60 
  61     public class ScratchRegister implements AutoCloseable {
  62         private final Register register;
  63 
  64         public ScratchRegister(Register register) {
  65             this.register = register;
  66         }
  67 
  68         public Register getRegister() {
  69             return register;
  70         }
  71 
  72         @Override
  73         public void close() {
  74             assert nextFreeScratchRegister > 0 : "Close called too often";
  75             nextFreeScratchRegister--;
  76         }
  77     }
  78 
  79     public ScratchRegister getScratchRegister() {
  80         return scratchRegister[nextFreeScratchRegister++];
  81     }
  82 
  83     /**
  84      * Specifies what actions have to be taken to turn an arbitrary address of the form
  85      * {@code base + displacement [+ index [<< scale]]} into a valid AArch64Address.
  86      */
  87     public static class AddressGenerationPlan {
  88         public final WorkPlan workPlan;
  89         public final AArch64Address.AddressingMode addressingMode;
  90         public final boolean needsScratch;
  91 
  92         public enum WorkPlan {
  93             /**
  94              * Can be used as-is without extra work.
  95              */
  96             NO_WORK,
  97             /**
  98              * Add scaled displacement to index register.
  99              */
 100             ADD_TO_INDEX,
 101             /**
 102              * Add unscaled displacement to base register.
 103              */
 104             ADD_TO_BASE,
 105         }
 106 
 107         /**
 108          * @param workPlan Work necessary to generate a valid address.
 109          * @param addressingMode Addressing mode of generated address.
 110          * @param needsScratch True if generating address needs a scatch register, false otherwise.
 111          */
 112         public AddressGenerationPlan(WorkPlan workPlan, AArch64Address.AddressingMode addressingMode, boolean needsScratch) {
 113             this.workPlan = workPlan;
 114             this.addressingMode = addressingMode;
 115             this.needsScratch = needsScratch;
 116         }
 117     }
 118 
 119     /**
 120      * Generates an addressplan for an address of the form
 121      * {@code base + displacement [+ index [<< log2(transferSize)]]} with the index register and
 122      * scaling being optional.
 123      *
 124      * @param displacement an arbitrary displacement.
 125      * @param hasIndexRegister true if the address uses an index register, false otherwise. non null
 126      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 127      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 128      * @return AddressGenerationPlan that specifies the actions necessary to generate a valid
 129      *         AArch64Address for the given parameters.
 130      */
 131     public static AddressGenerationPlan generateAddressPlan(long displacement, boolean hasIndexRegister, int transferSize) {
 132         assert transferSize == 0 || transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 133         boolean indexScaled = transferSize != 0;
 134         int log2Scale = NumUtil.log2Ceil(transferSize);
 135         long scaledDisplacement = displacement >> log2Scale;
 136         boolean displacementScalable = indexScaled && (displacement & (transferSize - 1)) == 0;
 137         if (displacement == 0) {
 138             // register offset without any work beforehand.
 139             return new AddressGenerationPlan(NO_WORK, REGISTER_OFFSET, false);
 140         } else {
 141             if (hasIndexRegister) {
 142                 if (displacementScalable) {
 143                     boolean needsScratch = !isArithmeticImmediate(scaledDisplacement);
 144                     return new AddressGenerationPlan(ADD_TO_INDEX, REGISTER_OFFSET, needsScratch);
 145                 } else {
 146                     boolean needsScratch = !isArithmeticImmediate(displacement);
 147                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 148                 }
 149             } else {
 150                 if (displacementScalable && NumUtil.isUnsignedNbit(12, scaledDisplacement)) {
 151                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_SCALED, false);
 152                 } else if (NumUtil.isSignedNbit(9, displacement)) {
 153                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_UNSCALED, false);
 154                 } else {
 155                     boolean needsScratch = !isArithmeticImmediate(displacement);
 156                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 157                 }
 158             }
 159         }
 160     }
 161 
 162     /**
 163      * Returns an AArch64Address pointing to
 164      * {@code base + displacement + index << log2(transferSize)}.
 165      *
 166      * @param base general purpose register. May not be null or the zero register.
 167      * @param displacement arbitrary displacement added to base.
 168      * @param index general purpose register. May not be null or the stack pointer.
 169      * @param signExtendIndex if true consider index register a word register that should be
 170      *            sign-extended before being added.
 171      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 172      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 173      * @param additionalReg additional register used either as a scratch register or as part of the
 174      *            final address, depending on whether allowOverwrite is true or not. May not be null
 175      *            or stackpointer.
 176      * @param allowOverwrite if true allows to change value of base or index register to generate
 177      *            address.
 178      * @return AArch64Address pointing to memory at
 179      *         {@code base + displacement + index << log2(transferSize)}.
 180      */
 181     public AArch64Address makeAddress(Register base, long displacement, Register index, boolean signExtendIndex, int transferSize, Register additionalReg, boolean allowOverwrite) {
 182         AddressGenerationPlan plan = generateAddressPlan(displacement, !index.equals(zr), transferSize);
 183         assert allowOverwrite || !zr.equals(additionalReg) || plan.workPlan == NO_WORK;
 184         assert !plan.needsScratch || !zr.equals(additionalReg);
 185         int log2Scale = NumUtil.log2Ceil(transferSize);
 186         long scaledDisplacement = displacement >> log2Scale;
 187         Register newIndex = index;
 188         Register newBase = base;
 189         int immediate;
 190         switch (plan.workPlan) {
 191             case NO_WORK:
 192                 if (plan.addressingMode == IMMEDIATE_SCALED) {
 193                     immediate = (int) scaledDisplacement;
 194                 } else {
 195                     immediate = (int) displacement;
 196                 }
 197                 break;
 198             case ADD_TO_INDEX:
 199                 newIndex = allowOverwrite ? index : additionalReg;
 200                 assert !newIndex.equals(sp) && !newIndex.equals(zr);
 201                 if (plan.needsScratch) {
 202                     mov(additionalReg, scaledDisplacement);
 203                     add(signExtendIndex ? 32 : 64, newIndex, index, additionalReg);
 204                 } else {
 205                     add(signExtendIndex ? 32 : 64, newIndex, index, (int) scaledDisplacement);
 206                 }
 207                 immediate = 0;
 208                 break;
 209             case ADD_TO_BASE:
 210                 newBase = allowOverwrite ? base : additionalReg;
 211                 assert !newBase.equals(sp) && !newBase.equals(zr);
 212                 if (plan.needsScratch) {
 213                     mov(additionalReg, displacement);
 214                     add(64, newBase, base, additionalReg);
 215                 } else {
 216                     add(64, newBase, base, (int) displacement);
 217                 }
 218                 immediate = 0;
 219                 break;
 220             default:
 221                 throw GraalError.shouldNotReachHere();
 222         }
 223         AArch64Address.AddressingMode addressingMode = plan.addressingMode;
 224         ExtendType extendType = null;
 225         if (addressingMode == REGISTER_OFFSET) {
 226             if (newIndex.equals(zr)) {
 227                 addressingMode = BASE_REGISTER_ONLY;
 228             } else if (signExtendIndex) {
 229                 addressingMode = EXTENDED_REGISTER_OFFSET;
 230                 extendType = ExtendType.SXTW;
 231             }
 232         }
 233         return AArch64Address.createAddress(addressingMode, newBase, newIndex, immediate, transferSize != 0, extendType);
 234     }
 235 
 236     /**
 237      * Returns an AArch64Address pointing to {@code base + displacement}. Specifies the memory
 238      * transfer size to allow some optimizations when building the address.
 239      *
 240      * @param base general purpose register. May not be null or the zero register.
 241      * @param displacement arbitrary displacement added to base.
 242      * @param transferSize the memory transfer size in bytes.
 243      * @param additionalReg additional register used either as a scratch register or as part of the
 244      *            final address, depending on whether allowOverwrite is true or not. May not be
 245      *            null, zero register or stackpointer.
 246      * @param allowOverwrite if true allows to change value of base or index register to generate
 247      *            address.
 248      * @return AArch64Address pointing to memory at {@code base + displacement}.
 249      */
 250     public AArch64Address makeAddress(Register base, long displacement, Register additionalReg, int transferSize, boolean allowOverwrite) {
 251         assert additionalReg.getRegisterCategory().equals(CPU);
 252         return makeAddress(base, displacement, zr, /* sign-extend */false, transferSize, additionalReg, allowOverwrite);
 253     }
 254 
 255     /**
 256      * Returns an AArch64Address pointing to {@code base + displacement}. Fails if address cannot be
 257      * represented without overwriting base register or using a scratch register.
 258      *
 259      * @param base general purpose register. May not be null or the zero register.
 260      * @param displacement arbitrary displacement added to base.
 261      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 262      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 263      * @return AArch64Address pointing to memory at {@code base + displacement}.
 264      */
 265     public AArch64Address makeAddress(Register base, long displacement, int transferSize) {
 266         return makeAddress(base, displacement, zr, /* signExtend */false, transferSize, zr, /* allowOverwrite */false);
 267     }
 268 
 269     /**
 270      * Loads memory address into register.
 271      *
 272      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 273      * @param address address whose value is loaded into dst. May not be null,
 274      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_POST_INDEXED
 275      *            POST_INDEXED} or
 276      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_PRE_INDEXED
 277      *            IMMEDIATE_PRE_INDEXED}
 278      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 279      *            the index register is scaled. Can be 1, 2, 4 or 8.
 280      */
 281     public void loadAddress(Register dst, AArch64Address address, int transferSize) {
 282         assert transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 283         assert dst.getRegisterCategory().equals(CPU);
 284         int shiftAmt = NumUtil.log2Ceil(transferSize);
 285         switch (address.getAddressingMode()) {
 286             case IMMEDIATE_SCALED:
 287                 int scaledImmediate = address.getImmediateRaw() << shiftAmt;
 288                 int lowerBits = scaledImmediate & NumUtil.getNbitNumberInt(12);
 289                 int higherBits = scaledImmediate & ~NumUtil.getNbitNumberInt(12);
 290                 boolean firstAdd = true;
 291                 if (lowerBits != 0) {
 292                     add(64, dst, address.getBase(), lowerBits);
 293                     firstAdd = false;
 294                 }
 295                 if (higherBits != 0) {
 296                     Register src = firstAdd ? address.getBase() : dst;
 297                     add(64, dst, src, higherBits);
 298                 }
 299                 break;
 300             case IMMEDIATE_UNSCALED:
 301                 int immediate = address.getImmediateRaw();
 302                 add(64, dst, address.getBase(), immediate);
 303                 break;
 304             case REGISTER_OFFSET:
 305                 add(64, dst, address.getBase(), address.getOffset(), ShiftType.LSL, address.isScaled() ? shiftAmt : 0);
 306                 break;
 307             case EXTENDED_REGISTER_OFFSET:
 308                 add(64, dst, address.getBase(), address.getOffset(), address.getExtendType(), address.isScaled() ? shiftAmt : 0);
 309                 break;
 310             case PC_LITERAL: {
 311                 addressOf(dst);
 312                 break;
 313             }
 314             case BASE_REGISTER_ONLY:
 315                 movx(dst, address.getBase());
 316                 break;
 317             default:
 318                 throw GraalError.shouldNotReachHere();
 319         }
 320     }
 321 
 322     public void movx(Register dst, Register src) {
 323         mov(64, dst, src);
 324     }
 325 
 326     public void mov(int size, Register dst, Register src) {
 327         if (dst.equals(sp) || src.equals(sp)) {
 328             add(size, dst, src, 0);
 329         } else {
 330             or(size, dst, zr, src);
 331         }
 332     }
 333 
 334     /**
 335      * Generates a 64-bit immediate move code sequence.
 336      *
 337      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 338      * @param imm
 339      */
 340     private void mov64(Register dst, long imm) {
 341         // We have to move all non zero parts of the immediate in 16-bit chunks
 342         boolean firstMove = true;
 343         for (int offset = 0; offset < 64; offset += 16) {
 344             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 345             if (chunk == 0) {
 346                 continue;
 347             }
 348             if (firstMove) {
 349                 movz(64, dst, chunk, offset);
 350                 firstMove = false;
 351             } else {
 352                 movk(64, dst, chunk, offset);
 353             }
 354         }
 355         assert !firstMove;
 356     }
 357 
 358     /**
 359      * Loads immediate into register.
 360      *
 361      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 362      * @param imm immediate loaded into register.
 363      */
 364     public void mov(Register dst, long imm) {
 365         assert dst.getRegisterCategory().equals(CPU);
 366         if (imm == 0L) {
 367             movx(dst, zr);
 368         } else if (LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 369             or(64, dst, zr, imm);
 370         } else if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 371             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 372             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 373             // then sign extend
 374             // them. This allows us to cover immediates like ~1L with 2 instructions.
 375             mov(dst, (int) imm);
 376             sxt(64, 32, dst, dst);
 377         } else {
 378             mov64(dst, imm);
 379         }
 380     }
 381 
 382     /**
 383      * Loads immediate into register.
 384      *
 385      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 386      * @param imm immediate loaded into register.
 387      */
 388     public void mov(Register dst, int imm) {
 389         mov(dst, imm & 0xFFFF_FFFFL);
 390     }
 391 
 392     /**
 393      * Generates a 48-bit immediate move code sequence. The immediate may later be updated by
 394      * HotSpot.
 395      *
 396      * In AArch64 mode the virtual address space is 48-bits in size, so we only need three
 397      * instructions to create a patchable instruction sequence that can reach anywhere.
 398      *
 399      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 400      * @param imm
 401      */
 402     public void movNativeAddress(Register dst, long imm) {
 403         assert (imm & 0xFFFF_0000_0000_0000L) == 0;
 404         // We have to move all non zero parts of the immediate in 16-bit chunks
 405         boolean firstMove = true;
 406         for (int offset = 0; offset < 48; offset += 16) {
 407             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 408             if (firstMove) {
 409                 movz(64, dst, chunk, offset);
 410                 firstMove = false;
 411             } else {
 412                 movk(64, dst, chunk, offset);
 413             }
 414         }
 415         assert !firstMove;
 416     }
 417 
 418     /**
 419      * Generates a 32-bit immediate move code sequence. The immediate may later be updated by
 420      * HotSpot.
 421      *
 422      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 423      * @param imm
 424      */
 425     public void movNarrowAddress(Register dst, long imm) {
 426         assert (imm & 0xFFFF_FFFF_0000_0000L) == 0;
 427         movz(64, dst, (int) (imm >>> 16), 16);
 428         movk(64, dst, (int) (imm & 0xffff), 0);
 429     }
 430 
 431     /**
 432      * @return Number of instructions necessary to load immediate into register.
 433      */
 434     public static int nrInstructionsToMoveImmediate(long imm) {
 435         if (imm == 0L || LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 436             return 1;
 437         }
 438         if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 439             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 440             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 441             // then sign extend
 442             // them. This allows us to cover immediates like ~1L with 2 instructions.
 443             return 2;
 444         }
 445         int nrInstructions = 0;
 446         for (int offset = 0; offset < 64; offset += 16) {
 447             int part = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 448             if (part != 0) {
 449                 nrInstructions++;
 450             }
 451         }
 452         return nrInstructions;
 453     }
 454 
 455     /**
 456      * Loads a srcSize value from address into rt sign-extending it if necessary.
 457      *
 458      * @param targetSize size of target register in bits. Must be 32 or 64.
 459      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 460      *            targetSize.
 461      * @param rt general purpose register. May not be null or stackpointer.
 462      * @param address all addressing modes allowed. May not be null.
 463      */
 464     @Override
 465     public void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
 466         assert targetSize == 32 || targetSize == 64;
 467         assert srcSize <= targetSize;
 468         if (targetSize == srcSize) {
 469             super.ldr(srcSize, rt, address);
 470         } else {
 471             super.ldrs(targetSize, srcSize, rt, address);
 472         }
 473     }
 474 
 475     /**
 476      * Loads a srcSize value from address into rt zero-extending it if necessary.
 477      *
 478      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 479      *            targetSize.
 480      * @param rt general purpose register. May not be null or stackpointer.
 481      * @param address all addressing modes allowed. May not be null.
 482      */
 483     @Override
 484     public void ldr(int srcSize, Register rt, AArch64Address address) {
 485         super.ldr(srcSize, rt, address);
 486     }
 487 
 488     /**
 489      * Conditional move. dst = src1 if condition else src2.
 490      *
 491      * @param size register size. Has to be 32 or 64.
 492      * @param result general purpose register. May not be null or the stackpointer.
 493      * @param trueValue general purpose register. May not be null or the stackpointer.
 494      * @param falseValue general purpose register. May not be null or the stackpointer.
 495      * @param cond any condition flag. May not be null.
 496      */
 497     public void cmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag cond) {
 498         super.csel(size, result, trueValue, falseValue, cond);
 499     }
 500 
 501     /**
 502      * Conditional set. dst = 1 if condition else 0.
 503      *
 504      * @param dst general purpose register. May not be null or stackpointer.
 505      * @param condition any condition. May not be null.
 506      */
 507     public void cset(int size, Register dst, ConditionFlag condition) {
 508         super.csinc(size, dst, zr, zr, condition.negate());
 509     }
 510 
 511     /**
 512      * dst = src1 + src2.
 513      *
 514      * @param size register size. Has to be 32 or 64.
 515      * @param dst general purpose register. May not be null.
 516      * @param src1 general purpose register. May not be null.
 517      * @param src2 general purpose register. May not be null or stackpointer.
 518      */
 519     public void add(int size, Register dst, Register src1, Register src2) {
 520         if (dst.equals(sp) || src1.equals(sp)) {
 521             super.add(size, dst, src1, src2, ExtendType.UXTX, 0);
 522         } else {
 523             super.add(size, dst, src1, src2, ShiftType.LSL, 0);
 524         }
 525     }
 526 
 527     /**
 528      * dst = src1 + src2 and sets condition flags.
 529      *
 530      * @param size register size. Has to be 32 or 64.
 531      * @param dst general purpose register. May not be null.
 532      * @param src1 general purpose register. May not be null.
 533      * @param src2 general purpose register. May not be null or stackpointer.
 534      */
 535     public void adds(int size, Register dst, Register src1, Register src2) {
 536         if (dst.equals(sp) || src1.equals(sp)) {
 537             super.adds(size, dst, src1, src2, ExtendType.UXTX, 0);
 538         } else {
 539             super.adds(size, dst, src1, src2, ShiftType.LSL, 0);
 540         }
 541     }
 542 
 543     /**
 544      * dst = src1 - src2 and sets condition flags.
 545      *
 546      * @param size register size. Has to be 32 or 64.
 547      * @param dst general purpose register. May not be null.
 548      * @param src1 general purpose register. May not be null.
 549      * @param src2 general purpose register. May not be null or stackpointer.
 550      */
 551     public void subs(int size, Register dst, Register src1, Register src2) {
 552         if (dst.equals(sp) || src1.equals(sp)) {
 553             super.subs(size, dst, src1, src2, ExtendType.UXTX, 0);
 554         } else {
 555             super.subs(size, dst, src1, src2, ShiftType.LSL, 0);
 556         }
 557     }
 558 
 559     /**
 560      * dst = src1 - src2.
 561      *
 562      * @param size register size. Has to be 32 or 64.
 563      * @param dst general purpose register. May not be null.
 564      * @param src1 general purpose register. May not be null.
 565      * @param src2 general purpose register. May not be null or stackpointer.
 566      */
 567     public void sub(int size, Register dst, Register src1, Register src2) {
 568         if (dst.equals(sp) || src1.equals(sp)) {
 569             super.sub(size, dst, src1, src2, ExtendType.UXTX, 0);
 570         } else {
 571             super.sub(size, dst, src1, src2, ShiftType.LSL, 0);
 572         }
 573     }
 574 
 575     /**
 576      * dst = src1 + shiftType(src2, shiftAmt & (size - 1)).
 577      *
 578      * @param size register size. Has to be 32 or 64.
 579      * @param dst general purpose register. May not be null or stackpointer.
 580      * @param src1 general purpose register. May not be null or stackpointer.
 581      * @param src2 general purpose register. May not be null or stackpointer.
 582      * @param shiftType any type but ROR.
 583      * @param shiftAmt arbitrary shift amount.
 584      */
 585     @Override
 586     public void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 587         int shift = clampShiftAmt(size, shiftAmt);
 588         super.add(size, dst, src1, src2, shiftType, shift);
 589     }
 590 
 591     /**
 592      * dst = src1 + shiftType(src2, shiftAmt & (size-1)) and sets condition flags.
 593      *
 594      * @param size register size. Has to be 32 or 64.
 595      * @param dst general purpose register. May not be null or stackpointer.
 596      * @param src1 general purpose register. May not be null or stackpointer.
 597      * @param src2 general purpose register. May not be null or stackpointer.
 598      * @param shiftType any type but ROR.
 599      * @param shiftAmt arbitrary shift amount.
 600      */
 601     @Override
 602     public void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 603         int shift = clampShiftAmt(size, shiftAmt);
 604         super.sub(size, dst, src1, src2, shiftType, shift);
 605     }
 606 
 607     /**
 608      * dst = -src1.
 609      *
 610      * @param size register size. Has to be 32 or 64.
 611      * @param dst general purpose register. May not be null or stackpointer.
 612      * @param src general purpose register. May not be null or stackpointer.
 613      */
 614     public void neg(int size, Register dst, Register src) {
 615         sub(size, dst, zr, src);
 616     }
 617 
 618     /**
 619      * dst = src + immediate.
 620      *
 621      * @param size register size. Has to be 32 or 64.
 622      * @param dst general purpose register. May not be null or zero-register.
 623      * @param src general purpose register. May not be null or zero-register.
 624      * @param immediate 32-bit signed int
 625      */
 626     @Override
 627     public void add(int size, Register dst, Register src, int immediate) {
 628         assert (!dst.equals(zr) && !src.equals(zr));
 629         if (immediate < 0) {
 630             sub(size, dst, src, -immediate);
 631         } else if (isAimm(immediate)) {
 632             if (!(dst.equals(src) && immediate == 0)) {
 633                 super.add(size, dst, src, immediate);
 634             }
 635         } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
 636             super.add(size, dst, src, immediate & -(1 << 12));
 637             super.add(size, dst, dst, immediate & ((1 << 12) - 1));
 638         } else {
 639             assert !dst.equals(src);
 640             mov(dst, immediate);
 641             add(size, src, dst, dst);
 642         }
 643     }
 644 
 645     /**
 646      * dst = src + immediate.
 647      *
 648      * @param size register size. Has to be 32 or 64.
 649      * @param dst general purpose register. May not be null or zero-register.
 650      * @param src general purpose register. May not be null or zero-register.
 651      * @param immediate 64-bit signed int
 652      */
 653     public void add(int size, Register dst, Register src, long immediate) {
 654         if (NumUtil.isInt(immediate)) {
 655             add(size, dst, src, (int) immediate);
 656         } else {
 657             assert (!dst.equals(zr) && !src.equals(zr));
 658             assert !dst.equals(src);
 659             assert size == 64;
 660             mov(dst, immediate);
 661             add(size, src, dst, dst);
 662         }
 663     }
 664 
 665     /**
 666      * dst = src + aimm and sets condition flags.
 667      *
 668      * @param size register size. Has to be 32 or 64.
 669      * @param dst general purpose register. May not be null or stackpointer.
 670      * @param src general purpose register. May not be null or zero-register.
 671      * @param immediate arithmetic immediate.
 672      */
 673     @Override
 674     public void adds(int size, Register dst, Register src, int immediate) {
 675         assert (!dst.equals(sp) && !src.equals(zr));
 676         if (immediate < 0) {
 677             subs(size, dst, src, -immediate);
 678         } else if (!(dst.equals(src) && immediate == 0)) {
 679             super.adds(size, dst, src, immediate);
 680         }
 681     }
 682 
 683     /**
 684      * dst = src - immediate.
 685      *
 686      * @param size register size. Has to be 32 or 64.
 687      * @param dst general purpose register. May not be null or zero-register.
 688      * @param src general purpose register. May not be null or zero-register.
 689      * @param immediate 32-bit signed int
 690      */
 691     @Override
 692     public void sub(int size, Register dst, Register src, int immediate) {
 693         assert (!dst.equals(zr) && !src.equals(zr));
 694         if (immediate < 0) {
 695             add(size, dst, src, -immediate);
 696         } else if (isAimm(immediate)) {
 697             if (!(dst.equals(src) && immediate == 0)) {
 698                 super.sub(size, dst, src, immediate);
 699             }
 700         } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
 701             super.sub(size, dst, src, immediate & -(1 << 12));
 702             super.sub(size, dst, dst, immediate & ((1 << 12) - 1));
 703         } else {
 704             assert !dst.equals(src);
 705             mov(dst, immediate);
 706             sub(size, src, dst, dst);
 707         }
 708     }
 709 
 710     /**
 711      * dst = src - aimm and sets condition flags.
 712      *
 713      * @param size register size. Has to be 32 or 64.
 714      * @param dst general purpose register. May not be null or stackpointer.
 715      * @param src general purpose register. May not be null or zero-register.
 716      * @param immediate arithmetic immediate.
 717      */
 718     @Override
 719     public void subs(int size, Register dst, Register src, int immediate) {
 720         assert (!dst.equals(sp) && !src.equals(zr));
 721         if (immediate < 0) {
 722             adds(size, dst, src, -immediate);
 723         } else if (!dst.equals(src) || immediate != 0) {
 724             super.subs(size, dst, src, immediate);
 725         }
 726     }
 727 
 728     /**
 729      * dst = src1 * src2.
 730      *
 731      * @param size register size. Has to be 32 or 64.
 732      * @param dst general purpose register. May not be null or the stackpointer.
 733      * @param src1 general purpose register. May not be null or the stackpointer.
 734      * @param src2 general purpose register. May not be null or the stackpointer.
 735      */
 736     public void mul(int size, Register dst, Register src1, Register src2) {
 737         super.madd(size, dst, src1, src2, zr);
 738     }
 739 
 740     /**
 741      * dst = src3 + src1 * src2.
 742      *
 743      * @param size register size. Has to be 32 or 64.
 744      * @param dst general purpose register. May not be null or the stackpointer.
 745      * @param src1 general purpose register. May not be null or the stackpointer.
 746      * @param src2 general purpose register. May not be null or the stackpointer.
 747      * @param src3 general purpose register. May not be null or the stackpointer.
 748      */
 749     @Override
 750     public void madd(int size, Register dst, Register src1, Register src2, Register src3) {
 751         super.madd(size, dst, src1, src2, src3);
 752     }
 753 
 754     /**
 755      * dst = src3 - src1 * src2.
 756      *
 757      * @param size register size. Has to be 32 or 64.
 758      * @param dst general purpose register. May not be null or the stackpointer.
 759      * @param src1 general purpose register. May not be null or the stackpointer.
 760      * @param src2 general purpose register. May not be null or the stackpointer.
 761      * @param src3 general purpose register. May not be null or the stackpointer.
 762      */
 763     @Override
 764     public void msub(int size, Register dst, Register src1, Register src2, Register src3) {
 765         super.msub(size, dst, src1, src2, src3);
 766     }
 767 
 768     /**
 769      * dst = 0 - src1 * src2.
 770      *
 771      * @param size register size. Has to be 32 or 64.
 772      * @param dst general purpose register. May not be null or the stackpointer.
 773      * @param src1 general purpose register. May not be null or the stackpointer.
 774      * @param src2 general purpose register. May not be null or the stackpointer.
 775      */
 776     public void mneg(int size, Register dst, Register src1, Register src2) {
 777         super.msub(size, dst, src1, src2, zr);
 778     }
 779 
 780     /**
 781      * unsigned multiply high. dst = (src1 * src2) >> size
 782      *
 783      * @param size register size. Has to be 32 or 64.
 784      * @param dst general purpose register. May not be null or the stackpointer.
 785      * @param src1 general purpose register. May not be null or the stackpointer.
 786      * @param src2 general purpose register. May not be null or the stackpointer.
 787      */
 788     public void umulh(int size, Register dst, Register src1, Register src2) {
 789         assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
 790         assert size == 32 || size == 64;
 791         if (size == 64) {
 792             super.umulh(dst, src1, src2);
 793         } else {
 794             // xDst = wSrc1 * wSrc2
 795             super.umaddl(dst, src1, src2, zr);
 796             // xDst = xDst >> 32
 797             lshr(64, dst, dst, 32);
 798         }
 799     }
 800 
 801     /**
 802      * signed multiply high. dst = (src1 * src2) >> size
 803      *
 804      * @param size register size. Has to be 32 or 64.
 805      * @param dst general purpose register. May not be null or the stackpointer.
 806      * @param src1 general purpose register. May not be null or the stackpointer.
 807      * @param src2 general purpose register. May not be null or the stackpointer.
 808      */
 809     public void smulh(int size, Register dst, Register src1, Register src2) {
 810         assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
 811         assert size == 32 || size == 64;
 812         if (size == 64) {
 813             super.smulh(dst, src1, src2);
 814         } else {
 815             // xDst = wSrc1 * wSrc2
 816             super.smaddl(dst, src1, src2, zr);
 817             // xDst = xDst >> 32
 818             lshr(64, dst, dst, 32);
 819         }
 820     }
 821 
 822     /**
 823      * dst = src1 % src2. Signed.
 824      *
 825      * @param size register size. Has to be 32 or 64.
 826      * @param dst general purpose register. May not be null or the stackpointer.
 827      * @param n numerator. General purpose register. May not be null or the stackpointer.
 828      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 829      */
 830     public void rem(int size, Register dst, Register n, Register d) {
 831         assert (!dst.equals(sp) && !n.equals(sp) && !d.equals(sp));
 832         // There is no irem or similar instruction. Instead we use the relation:
 833         // n % d = n - Floor(n / d) * d if nd >= 0
 834         // n % d = n - Ceil(n / d) * d else
 835         // Which is equivalent to n - TruncatingDivision(n, d) * d
 836         super.sdiv(size, dst, n, d);
 837         super.msub(size, dst, dst, d, n);
 838     }
 839 
 840     /**
 841      * dst = src1 % src2. Unsigned.
 842      *
 843      * @param size register size. Has to be 32 or 64.
 844      * @param dst general purpose register. May not be null or the stackpointer.
 845      * @param n numerator. General purpose register. May not be null or the stackpointer.
 846      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 847      */
 848     public void urem(int size, Register dst, Register n, Register d) {
 849         // There is no irem or similar instruction. Instead we use the relation:
 850         // n % d = n - Floor(n / d) * d
 851         // Which is equivalent to n - TruncatingDivision(n, d) * d
 852         super.udiv(size, dst, n, d);
 853         super.msub(size, dst, dst, d, n);
 854     }
 855 
 856     /**
 857      * Add/subtract instruction encoding supports 12-bit immediate values.
 858      *
 859      * @param imm immediate value to be tested.
 860      * @return true if immediate can be used directly for arithmetic instructions (add/sub), false
 861      *         otherwise.
 862      */
 863     public static boolean isArithmeticImmediate(long imm) {
 864         // If we have a negative immediate we just use the opposite operator. I.e.: x - (-5) == x +
 865         // 5.
 866         return NumUtil.isInt(Math.abs(imm)) && isAimm((int) Math.abs(imm));
 867     }
 868 
 869     /**
 870      * Compare instructions are add/subtract instructions and so support 12-bit immediate values.
 871      *
 872      * @param imm immediate value to be tested.
 873      * @return true if immediate can be used directly with comparison instructions, false otherwise.
 874      */
 875     public static boolean isComparisonImmediate(long imm) {
 876         return isArithmeticImmediate(imm);
 877     }
 878 
 879     /**
 880      * Move wide immediate instruction encoding supports 16-bit immediate values which can be
 881      * optionally-shifted by multiples of 16 (i.e. 0, 16, 32, 48).
 882      *
 883      * @return true if immediate can be moved directly into a register, false otherwise.
 884      */
 885     public static boolean isMovableImmediate(long imm) {
 886         // // Positions of first, respectively last set bit.
 887         // int start = Long.numberOfTrailingZeros(imm);
 888         // int end = 64 - Long.numberOfLeadingZeros(imm);
 889         // int length = end - start;
 890         // if (length > 16) {
 891         // return false;
 892         // }
 893         // // We can shift the necessary part of the immediate (i.e. everything between the first
 894         // and
 895         // // last set bit) by as much as 16 - length around to arrive at a valid shift amount
 896         // int tolerance = 16 - length;
 897         // int prevMultiple = NumUtil.roundDown(start, 16);
 898         // int nextMultiple = NumUtil.roundUp(start, 16);
 899         // return start - prevMultiple <= tolerance || nextMultiple - start <= tolerance;
 900         /*
 901          * This is a bit optimistic because the constant could also be for an arithmetic instruction
 902          * which only supports 12-bits. That case needs to be handled in the backend.
 903          */
 904         return NumUtil.isInt(Math.abs(imm)) && NumUtil.isUnsignedNbit(16, (int) Math.abs(imm));
 905     }
 906 
 907     /**
 908      * dst = src << (shiftAmt & (size - 1)).
 909      *
 910      * @param size register size. Has to be 32 or 64.
 911      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 912      * @param src general purpose register. May not be null, stackpointer or zero-register.
 913      * @param shiftAmt amount by which src is shifted.
 914      */
 915     public void shl(int size, Register dst, Register src, long shiftAmt) {
 916         int shift = clampShiftAmt(size, shiftAmt);
 917         super.ubfm(size, dst, src, (size - shift) & (size - 1), size - 1 - shift);
 918     }
 919 
 920     /**
 921      * dst = src1 << (src2 & (size - 1)).
 922      *
 923      * @param size register size. Has to be 32 or 64.
 924      * @param dst general purpose register. May not be null or stackpointer.
 925      * @param src general purpose register. May not be null or stackpointer.
 926      * @param shift general purpose register. May not be null or stackpointer.
 927      */
 928     public void shl(int size, Register dst, Register src, Register shift) {
 929         super.lsl(size, dst, src, shift);
 930     }
 931 
 932     /**
 933      * dst = src >>> (shiftAmt & (size - 1)).
 934      *
 935      * @param size register size. Has to be 32 or 64.
 936      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 937      * @param src general purpose register. May not be null, stackpointer or zero-register.
 938      * @param shiftAmt amount by which src is shifted.
 939      */
 940     public void lshr(int size, Register dst, Register src, long shiftAmt) {
 941         int shift = clampShiftAmt(size, shiftAmt);
 942         super.ubfm(size, dst, src, shift, size - 1);
 943     }
 944 
 945     /**
 946      * dst = src1 >>> (src2 & (size - 1)).
 947      *
 948      * @param size register size. Has to be 32 or 64.
 949      * @param dst general purpose register. May not be null or stackpointer.
 950      * @param src general purpose register. May not be null or stackpointer.
 951      * @param shift general purpose register. May not be null or stackpointer.
 952      */
 953     public void lshr(int size, Register dst, Register src, Register shift) {
 954         super.lsr(size, dst, src, shift);
 955     }
 956 
 957     /**
 958      * dst = src >> (shiftAmt & log2(size)).
 959      *
 960      * @param size register size. Has to be 32 or 64.
 961      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 962      * @param src general purpose register. May not be null, stackpointer or zero-register.
 963      * @param shiftAmt amount by which src is shifted.
 964      */
 965     public void ashr(int size, Register dst, Register src, long shiftAmt) {
 966         int shift = clampShiftAmt(size, shiftAmt);
 967         super.sbfm(size, dst, src, shift, size - 1);
 968     }
 969 
 970     /**
 971      * dst = src1 >> (src2 & log2(size)).
 972      *
 973      * @param size register size. Has to be 32 or 64.
 974      * @param dst general purpose register. May not be null or stackpointer.
 975      * @param src general purpose register. May not be null or stackpointer.
 976      * @param shift general purpose register. May not be null or stackpointer.
 977      */
 978     public void ashr(int size, Register dst, Register src, Register shift) {
 979         super.asr(size, dst, src, shift);
 980     }
 981 
 982     /**
 983      * Clamps shiftAmt into range 0 <= shiftamt < size according to JLS.
 984      *
 985      * @param size size of operation.
 986      * @param shiftAmt arbitrary shift amount.
 987      * @return value between 0 and size - 1 inclusive that is equivalent to shiftAmt according to
 988      *         JLS.
 989      */
 990     private static int clampShiftAmt(int size, long shiftAmt) {
 991         return (int) (shiftAmt & (size - 1));
 992     }
 993 
 994     /**
 995      * dst = src1 & src2.
 996      *
 997      * @param size register size. Has to be 32 or 64.
 998      * @param dst general purpose register. May not be null or stackpointer.
 999      * @param src1 general purpose register. May not be null or stackpointer.
1000      * @param src2 general purpose register. May not be null or stackpointer.
1001      */
1002     public void and(int size, Register dst, Register src1, Register src2) {
1003         super.and(size, dst, src1, src2, ShiftType.LSL, 0);
1004     }
1005 
1006     /**
1007      * dst = src1 ^ src2.
1008      *
1009      * @param size register size. Has to be 32 or 64.
1010      * @param dst general purpose register. May not be null or stackpointer.
1011      * @param src1 general purpose register. May not be null or stackpointer.
1012      * @param src2 general purpose register. May not be null or stackpointer.
1013      */
1014     public void eor(int size, Register dst, Register src1, Register src2) {
1015         super.eor(size, dst, src1, src2, ShiftType.LSL, 0);
1016     }
1017 
1018     /**
1019      * dst = src1 | src2.
1020      *
1021      * @param size register size. Has to be 32 or 64.
1022      * @param dst general purpose register. May not be null or stackpointer.
1023      * @param src1 general purpose register. May not be null or stackpointer.
1024      * @param src2 general purpose register. May not be null or stackpointer.
1025      */
1026     public void or(int size, Register dst, Register src1, Register src2) {
1027         super.orr(size, dst, src1, src2, ShiftType.LSL, 0);
1028     }
1029 
1030     /**
1031      * dst = src | bimm.
1032      *
1033      * @param size register size. Has to be 32 or 64.
1034      * @param dst general purpose register. May not be null or zero-register.
1035      * @param src general purpose register. May not be null or stack-pointer.
1036      * @param bimm logical immediate. See {@link AArch64Assembler.LogicalImmediateTable} for exact
1037      *            definition.
1038      */
1039     public void or(int size, Register dst, Register src, long bimm) {
1040         super.orr(size, dst, src, bimm);
1041     }
1042 
1043     /**
1044      * dst = ~src.
1045      *
1046      * @param size register size. Has to be 32 or 64.
1047      * @param dst general purpose register. May not be null or stackpointer.
1048      * @param src general purpose register. May not be null or stackpointer.
1049      */
1050     public void not(int size, Register dst, Register src) {
1051         super.orn(size, dst, zr, src, ShiftType.LSL, 0);
1052     }
1053 
1054     /**
1055      * dst = src1 & shiftType(src2, imm).
1056      *
1057      * @param size register size. Has to be 32 or 64.
1058      * @param dst general purpose register. May not be null or stackpointer.
1059      * @param src1 general purpose register. May not be null or stackpointer.
1060      * @param src2 general purpose register. May not be null or stackpointer.
1061      * @param shiftType all types allowed, may not be null.
1062      * @param shiftAmt must be in range 0 to size - 1.
1063      */
1064     @Override
1065     public void and(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1066         super.and(size, dst, src1, src2, shiftType, shiftAmt);
1067     }
1068 
1069     /**
1070      * dst = src1 ^ shiftType(src2, imm).
1071      *
1072      * @param size register size. Has to be 32 or 64.
1073      * @param dst general purpose register. May not be null or stackpointer.
1074      * @param src1 general purpose register. May not be null or stackpointer.
1075      * @param src2 general purpose register. May not be null or stackpointer.
1076      * @param shiftType all types allowed, may not be null.
1077      * @param shiftAmt must be in range 0 to size - 1.
1078      */
1079     @Override
1080     public void eor(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1081         super.eor(size, dst, src1, src2, shiftType, shiftAmt);
1082     }
1083 
1084     /**
1085      * dst = src1 | shiftType(src2, imm).
1086      *
1087      * @param size register size. Has to be 32 or 64.
1088      * @param dst general purpose register. May not be null or stackpointer.
1089      * @param src1 general purpose register. May not be null or stackpointer.
1090      * @param src2 general purpose register. May not be null or stackpointer.
1091      * @param shiftType all types allowed, may not be null.
1092      * @param shiftAmt must be in range 0 to size - 1.
1093      */
1094     public void or(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1095         super.orr(size, dst, src1, src2, shiftType, shiftAmt);
1096     }
1097 
1098     /**
1099      * dst = src1 & ~(shiftType(src2, imm)).
1100      *
1101      * @param size register size. Has to be 32 or 64.
1102      * @param dst general purpose register. May not be null or stackpointer.
1103      * @param src1 general purpose register. May not be null or stackpointer.
1104      * @param src2 general purpose register. May not be null or stackpointer.
1105      * @param shiftType all types allowed, may not be null.
1106      * @param shiftAmt must be in range 0 to size - 1.
1107      */
1108     @Override
1109     public void bic(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1110         super.bic(size, dst, src1, src2, shiftType, shiftAmt);
1111     }
1112 
1113     /**
1114      * dst = src1 ^ ~(shiftType(src2, imm)).
1115      *
1116      * @param size register size. Has to be 32 or 64.
1117      * @param dst general purpose register. May not be null or stackpointer.
1118      * @param src1 general purpose register. May not be null or stackpointer.
1119      * @param src2 general purpose register. May not be null or stackpointer.
1120      * @param shiftType all types allowed, may not be null.
1121      * @param shiftAmt must be in range 0 to size - 1.
1122      */
1123     @Override
1124     public void eon(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1125         super.eon(size, dst, src1, src2, shiftType, shiftAmt);
1126     }
1127 
1128     /**
1129      * dst = src1 | ~(shiftType(src2, imm)).
1130      *
1131      * @param size register size. Has to be 32 or 64.
1132      * @param dst general purpose register. May not be null or stackpointer.
1133      * @param src1 general purpose register. May not be null or stackpointer.
1134      * @param src2 general purpose register. May not be null or stackpointer.
1135      * @param shiftType all types allowed, may not be null.
1136      * @param shiftAmt must be in range 0 to size - 1.
1137      */
1138     @Override
1139     public void orn(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1140         super.orn(size, dst, src1, src2, shiftType, shiftAmt);
1141     }
1142 
1143     /**
1144      * Sign-extend value from src into dst.
1145      *
1146      * @param destSize destination register size. Must be 32 or 64.
1147      * @param srcSize source register size. Must be smaller than destSize.
1148      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1149      * @param src general purpose register. May not be null, stackpointer or zero-register.
1150      */
1151     public void sxt(int destSize, int srcSize, Register dst, Register src) {
1152         assert (srcSize < destSize && srcSize > 0);
1153         super.sbfm(destSize, dst, src, 0, srcSize - 1);
1154     }
1155 
1156     /**
1157      * dst = src if condition else -src.
1158      *
1159      * @param size register size. Must be 32 or 64.
1160      * @param dst general purpose register. May not be null or the stackpointer.
1161      * @param src general purpose register. May not be null or the stackpointer.
1162      * @param condition any condition except AV or NV. May not be null.
1163      */
1164     public void csneg(int size, Register dst, Register src, ConditionFlag condition) {
1165         super.csneg(size, dst, src, src, condition.negate());
1166     }
1167 
1168     /**
1169      * @return True if the immediate can be used directly for logical 64-bit instructions.
1170      */
1171     public static boolean isLogicalImmediate(long imm) {
1172         return LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO;
1173     }
1174 
1175     /**
1176      * @return True if the immediate can be used directly for logical 32-bit instructions.
1177      */
1178     public static boolean isLogicalImmediate(int imm) {
1179         return LogicalImmediateTable.isRepresentable(imm) == LogicalImmediateTable.Representable.YES;
1180     }
1181 
1182     /* Float instructions */
1183 
1184     /**
1185      * Moves integer to float, float to integer, or float to float. Does not support integer to
1186      * integer moves.
1187      *
1188      * @param size register size. Has to be 32 or 64.
1189      * @param dst Either floating-point or general-purpose register. If general-purpose register may
1190      *            not be stackpointer or zero register. Cannot be null in any case.
1191      * @param src Either floating-point or general-purpose register. If general-purpose register may
1192      *            not be stackpointer. Cannot be null in any case.
1193      */
1194     @Override
1195     public void fmov(int size, Register dst, Register src) {
1196         assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU)) : "src and dst cannot both be integer registers.";
1197         if (dst.getRegisterCategory().equals(CPU)) {
1198             super.fmovFpu2Cpu(size, dst, src);
1199         } else if (src.getRegisterCategory().equals(CPU)) {
1200             super.fmovCpu2Fpu(size, dst, src);
1201         } else {
1202             super.fmov(size, dst, src);
1203         }
1204     }
1205 
1206     /**
1207      *
1208      * @param size register size. Has to be 32 or 64.
1209      * @param dst floating point register. May not be null.
1210      * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
1211      *            loaded, i.e. (float) imm == imm must be true. In all cases
1212      *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
1213      *            depending on size.
1214      */
1215     @Override
1216     public void fmov(int size, Register dst, double imm) {
1217         if (imm == 0.0) {
1218             assert Double.doubleToRawLongBits(imm) == 0L : "-0.0 is no valid immediate.";
1219             super.fmovCpu2Fpu(size, dst, zr);
1220         } else {
1221             super.fmov(size, dst, imm);
1222         }
1223     }
1224 
1225     /**
1226      *
1227      * @return true if immediate can be loaded directly into floating-point register, false
1228      *         otherwise.
1229      */
1230     public static boolean isDoubleImmediate(double imm) {
1231         return Double.doubleToRawLongBits(imm) == 0L || AArch64Assembler.isDoubleImmediate(imm);
1232     }
1233 
1234     /**
1235      *
1236      * @return true if immediate can be loaded directly into floating-point register, false
1237      *         otherwise.
1238      */
1239     public static boolean isFloatImmediate(float imm) {
1240         return Float.floatToRawIntBits(imm) == 0 || AArch64Assembler.isFloatImmediate(imm);
1241     }
1242 
1243     /**
1244      * Conditional move. dst = src1 if condition else src2.
1245      *
1246      * @param size register size.
1247      * @param result floating point register. May not be null.
1248      * @param trueValue floating point register. May not be null.
1249      * @param falseValue floating point register. May not be null.
1250      * @param condition every condition allowed. May not be null.
1251      */
1252     public void fcmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag condition) {
1253         super.fcsel(size, result, trueValue, falseValue, condition);
1254     }
1255 
1256     /**
1257      * dst = src1 % src2.
1258      *
1259      * @param size register size. Has to be 32 or 64.
1260      * @param dst floating-point register. May not be null.
1261      * @param n numerator. Floating-point register. May not be null.
1262      * @param d denominator. Floating-point register. May not be null.
1263      */
1264     public void frem(int size, Register dst, Register n, Register d) {
1265         // There is no frem instruction, instead we compute the remainder using the relation:
1266         // rem = n - Truncating(n / d) * d
1267         super.fdiv(size, dst, n, d);
1268         super.frintz(size, dst, dst);
1269         super.fmsub(size, dst, dst, d, n);
1270     }
1271 
1272     /* Branches */
1273 
1274     /**
1275      * Compares x and y and sets condition flags.
1276      *
1277      * @param size register size. Has to be 32 or 64.
1278      * @param x general purpose register. May not be null or stackpointer.
1279      * @param y general purpose register. May not be null or stackpointer.
1280      */
1281     public void cmp(int size, Register x, Register y) {
1282         assert size == 32 || size == 64;
1283         super.subs(size, zr, x, y, ShiftType.LSL, 0);
1284     }
1285 
1286     /**
1287      * Compares x to y and sets condition flags.
1288      *
1289      * @param size register size. Has to be 32 or 64.
1290      * @param x general purpose register. May not be null or stackpointer.
1291      * @param y comparison immediate, {@link #isComparisonImmediate(long)} has to be true for it.
1292      */
1293     public void cmp(int size, Register x, int y) {
1294         assert size == 32 || size == 64;
1295         if (y < 0) {
1296             super.adds(size, zr, x, -y);
1297         } else {
1298             super.subs(size, zr, x, y);
1299         }
1300     }
1301 
1302     /**
1303      * Sets condition flags according to result of x & y.
1304      *
1305      * @param size register size. Has to be 32 or 64.
1306      * @param dst general purpose register. May not be null or stack-pointer.
1307      * @param x general purpose register. May not be null or stackpointer.
1308      * @param y general purpose register. May not be null or stackpointer.
1309      */
1310     public void ands(int size, Register dst, Register x, Register y) {
1311         super.ands(size, dst, x, y, ShiftType.LSL, 0);
1312     }
1313 
1314     /**
1315      * Sets overflow flag according to result of x * y.
1316      *
1317      * @param size register size. Has to be 32 or 64.
1318      * @param dst general purpose register. May not be null or stack-pointer.
1319      * @param x general purpose register. May not be null or stackpointer.
1320      * @param y general purpose register. May not be null or stackpointer.
1321      */
1322     public void mulvs(int size, Register dst, Register x, Register y) {
1323         try (ScratchRegister sc1 = getScratchRegister();
1324                         ScratchRegister sc2 = getScratchRegister()) {
1325             switch (size) {
1326                 case 64: {
1327                     // Be careful with registers: it's possible that x, y, and dst are the same
1328                     // register.
1329                     Register rscratch1 = sc1.getRegister();
1330                     Register rscratch2 = sc2.getRegister();
1331                     mul(64, rscratch1, x, y);     // Result bits 0..63
1332                     smulh(64, rscratch2, x, y);  // Result bits 64..127
1333                     // Top is pure sign ext
1334                     subs(64, zr, rscratch2, rscratch1, ShiftType.ASR, 63);
1335                     // Copy all 64 bits of the result into dst
1336                     mov(64, dst, rscratch1);
1337                     mov(rscratch1, 0x80000000);
1338                     // Develop 0 (EQ), or 0x80000000 (NE)
1339                     cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
1340                     cmp(32, rscratch1, 1);
1341                     // 0x80000000 - 1 => VS
1342                     break;
1343                 }
1344                 case 32: {
1345                     Register rscratch1 = sc1.getRegister();
1346                     smaddl(rscratch1, x, y, zr);
1347                     // Copy the low 32 bits of the result into dst
1348                     mov(32, dst, rscratch1);
1349                     subs(64, zr, rscratch1, rscratch1, ExtendType.SXTW, 0);
1350                     // NE => overflow
1351                     mov(rscratch1, 0x80000000);
1352                     // Develop 0 (EQ), or 0x80000000 (NE)
1353                     cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
1354                     cmp(32, rscratch1, 1);
1355                     // 0x80000000 - 1 => VS
1356                     break;
1357                 }
1358             }
1359         }
1360     }
1361 
1362     /**
1363      * When patching up Labels we have to know what kind of code to generate.
1364      */
1365     public enum PatchLabelKind {
1366         BRANCH_CONDITIONALLY(0x0),
1367         BRANCH_UNCONDITIONALLY(0x1),
1368         BRANCH_NONZERO(0x2),
1369         BRANCH_ZERO(0x3),
1370         BRANCH_BIT_NONZERO(0x4),
1371         BRANCH_BIT_ZERO(0x5),
1372         JUMP_ADDRESS(0x6),
1373         ADR(0x7);
1374 
1375         /**
1376          * Offset by which additional information for branch conditionally, branch zero and branch
1377          * non zero has to be shifted.
1378          */
1379         public static final int INFORMATION_OFFSET = 5;
1380 
1381         public final int encoding;
1382 
1383         PatchLabelKind(int encoding) {
1384             this.encoding = encoding;
1385         }
1386 
1387         /**
1388          * @return PatchLabelKind with given encoding.
1389          */
1390         private static PatchLabelKind fromEncoding(int encoding) {
1391             return values()[encoding & NumUtil.getNbitNumberInt(INFORMATION_OFFSET)];
1392         }
1393 
1394     }
1395 
1396     public void adr(Register dst, Label label) {
1397         // TODO Handle case where offset is too large for a single jump instruction
1398         if (label.isBound()) {
1399             int offset = label.position() - position();
1400             super.adr(dst, offset);
1401         } else {
1402             label.addPatchAt(position());
1403             // Encode condition flag so that we know how to patch the instruction later
1404             emitInt(PatchLabelKind.ADR.encoding | dst.encoding << PatchLabelKind.INFORMATION_OFFSET);
1405         }
1406     }
1407 
1408     /**
1409      * Compare register and branch if non-zero.
1410      *
1411      * @param size Instruction size in bits. Should be either 32 or 64.
1412      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1413      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1414      */
1415     public void cbnz(int size, Register cmp, Label label) {
1416         // TODO Handle case where offset is too large for a single jump instruction
1417         if (label.isBound()) {
1418             int offset = label.position() - position();
1419             super.cbnz(size, cmp, offset);
1420         } else {
1421             label.addPatchAt(position());
1422             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1423             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1424             // Encode condition flag so that we know how to patch the instruction later
1425             emitInt(PatchLabelKind.BRANCH_NONZERO.encoding | regEncoding | sizeEncoding);
1426         }
1427     }
1428 
1429     /**
1430      * Compare register and branch if zero.
1431      *
1432      * @param size Instruction size in bits. Should be either 32 or 64.
1433      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1434      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1435      */
1436     public void cbz(int size, Register cmp, Label label) {
1437         // TODO Handle case where offset is too large for a single jump instruction
1438         if (label.isBound()) {
1439             int offset = label.position() - position();
1440             super.cbz(size, cmp, offset);
1441         } else {
1442             label.addPatchAt(position());
1443             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1444             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1445             // Encode condition flag so that we know how to patch the instruction later
1446             emitInt(PatchLabelKind.BRANCH_ZERO.encoding | regEncoding | sizeEncoding);
1447         }
1448     }
1449 
1450     /**
1451      * Test a single bit and branch if the bit is nonzero.
1452      *
1453      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1454      * @param uimm6 Unsigned 6-bit bit index.
1455      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1456      */
1457     public void tbnz(Register cmp, int uimm6, Label label) {
1458         assert NumUtil.isUnsignedNbit(6, uimm6);
1459         if (label.isBound()) {
1460             int offset = label.position() - position();
1461             super.tbnz(cmp, uimm6, offset);
1462         } else {
1463             label.addPatchAt(position());
1464             int indexEncoding = uimm6 << PatchLabelKind.INFORMATION_OFFSET;
1465             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 6);
1466             emitInt(PatchLabelKind.BRANCH_BIT_NONZERO.encoding | indexEncoding | regEncoding);
1467         }
1468     }
1469 
1470     /**
1471      * Test a single bit and branch if the bit is zero.
1472      *
1473      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1474      * @param uimm6 Unsigned 6-bit bit index.
1475      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1476      */
1477     public void tbz(Register cmp, int uimm6, Label label) {
1478         assert NumUtil.isUnsignedNbit(6, uimm6);
1479         if (label.isBound()) {
1480             int offset = label.position() - position();
1481             super.tbz(cmp, uimm6, offset);
1482         } else {
1483             label.addPatchAt(position());
1484             int indexEncoding = uimm6 << PatchLabelKind.INFORMATION_OFFSET;
1485             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 6);
1486             emitInt(PatchLabelKind.BRANCH_BIT_ZERO.encoding | indexEncoding | regEncoding);
1487         }
1488     }
1489 
1490     /**
1491      * Branches to label if condition is true.
1492      *
1493      * @param condition any condition value allowed. Non null.
1494      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1495      */
1496     public void branchConditionally(ConditionFlag condition, Label label) {
1497         // TODO Handle case where offset is too large for a single jump instruction
1498         if (label.isBound()) {
1499             int offset = label.position() - position();
1500             super.b(condition, offset);
1501         } else {
1502             label.addPatchAt(position());
1503             // Encode condition flag so that we know how to patch the instruction later
1504             emitInt(PatchLabelKind.BRANCH_CONDITIONALLY.encoding | condition.encoding << PatchLabelKind.INFORMATION_OFFSET);
1505         }
1506     }
1507 
1508     /**
1509      * Branches if condition is true. Address of jump is patched up by HotSpot c++ code.
1510      *
1511      * @param condition any condition value allowed. Non null.
1512      */
1513     public void branchConditionally(ConditionFlag condition) {
1514         // Correct offset is fixed up by HotSpot later.
1515         super.b(condition, 0);
1516     }
1517 
1518     /**
1519      * Jumps to label.
1520      *
1521      * param label Can only handle signed 28-bit offsets. May be unbound. Non null.
1522      */
1523     @Override
1524     public void jmp(Label label) {
1525         // TODO Handle case where offset is too large for a single jump instruction
1526         if (label.isBound()) {
1527             int offset = label.position() - position();
1528             super.b(offset);
1529         } else {
1530             label.addPatchAt(position());
1531             emitInt(PatchLabelKind.BRANCH_UNCONDITIONALLY.encoding);
1532         }
1533     }
1534 
1535     /**
1536      * Jump to address in dest.
1537      *
1538      * @param dest General purpose register. May not be null, zero-register or stackpointer.
1539      */
1540     public void jmp(Register dest) {
1541         super.br(dest);
1542     }
1543 
1544     /**
1545      * Immediate jump instruction fixed up by HotSpot c++ code.
1546      */
1547     public void jmp() {
1548         // Offset has to be fixed up by c++ code.
1549         super.b(0);
1550     }
1551 
1552     /**
1553      *
1554      * @return true if immediate offset can be used in a single branch instruction.
1555      */
1556     public static boolean isBranchImmediateOffset(long imm) {
1557         return NumUtil.isSignedNbit(28, imm);
1558     }
1559 
1560     /* system instructions */
1561 
1562     /**
1563      * Exception codes used when calling hlt instruction.
1564      */
1565     public enum AArch64ExceptionCode {
1566         NO_SWITCH_TARGET(0x0),
1567         BREAKPOINT(0x1);
1568 
1569         public final int encoding;
1570 
1571         AArch64ExceptionCode(int encoding) {
1572             this.encoding = encoding;
1573         }
1574     }
1575 
1576     /**
1577      * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
1578      * UNALLOCATED instruction.
1579      *
1580      * @param exceptionCode exception code specifying why halt was called. Non null.
1581      */
1582     public void hlt(AArch64ExceptionCode exceptionCode) {
1583         super.hlt(exceptionCode.encoding);
1584     }
1585 
1586     /**
1587      * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
1588      * exception level.
1589      *
1590      * @param exceptionCode exception code specifying why break was called. Non null.
1591      */
1592     public void brk(AArch64ExceptionCode exceptionCode) {
1593         super.brk(exceptionCode.encoding);
1594     }
1595 
1596     public void pause() {
1597         throw GraalError.unimplemented();
1598     }
1599 
1600     /**
1601      * Executes no-op instruction. No registers or flags are updated, except for PC.
1602      */
1603     public void nop() {
1604         super.hint(SystemHint.NOP);
1605     }
1606 
1607     /**
1608      * Consumption of Speculative Data Barrier. This is a memory barrier that controls speculative
1609      * execution and data value prediction.
1610      */
1611     public void csdb() {
1612         super.hint(SystemHint.CSDB);
1613     }
1614 
1615     /**
1616      * Same as {@link #nop()}.
1617      */
1618     @Override
1619     public void ensureUniquePC() {
1620         nop();
1621     }
1622 
1623     /**
1624      * Aligns PC.
1625      *
1626      * @param modulus Has to be positive multiple of 4.
1627      */
1628     @Override
1629     public void align(int modulus) {
1630         assert modulus > 0 && (modulus & 0x3) == 0 : "Modulus has to be a positive multiple of 4.";
1631         if (position() % modulus == 0) {
1632             return;
1633         }
1634         int offset = modulus - position() % modulus;
1635         for (int i = 0; i < offset; i += 4) {
1636             nop();
1637         }
1638     }
1639 
1640     /**
1641      * Patches jump targets when label gets bound.
1642      */
1643     @Override
1644     protected void patchJumpTarget(int branch, int jumpTarget) {
1645         int instruction = getInt(branch);
1646         int branchOffset = jumpTarget - branch;
1647         PatchLabelKind type = PatchLabelKind.fromEncoding(instruction);
1648         switch (type) {
1649             case BRANCH_CONDITIONALLY:
1650                 ConditionFlag cf = ConditionFlag.fromEncoding(instruction >>> PatchLabelKind.INFORMATION_OFFSET);
1651                 super.b(cf, branchOffset, branch);
1652                 break;
1653             case BRANCH_UNCONDITIONALLY:
1654                 super.b(branchOffset, branch);
1655                 break;
1656             case JUMP_ADDRESS:
1657                 int offset = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1658                 emitInt(jumpTarget - offset, branch);
1659                 break;
1660             case BRANCH_NONZERO:
1661             case BRANCH_ZERO: {
1662                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1663                 int sizeEncoding = information & 1;
1664                 int regEncoding = information >>> 1;
1665                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1666                 // 1 => 64; 0 => 32
1667                 int size = sizeEncoding * 32 + 32;
1668                 switch (type) {
1669                     case BRANCH_NONZERO:
1670                         super.cbnz(size, reg, branchOffset, branch);
1671                         break;
1672                     case BRANCH_ZERO:
1673                         super.cbz(size, reg, branchOffset, branch);
1674                         break;
1675                 }
1676                 break;
1677             }
1678             case BRANCH_BIT_NONZERO:
1679             case BRANCH_BIT_ZERO: {
1680                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1681                 int sizeEncoding = information & NumUtil.getNbitNumberInt(6);
1682                 int regEncoding = information >>> 6;
1683                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1684                 switch (type) {
1685                     case BRANCH_BIT_NONZERO:
1686                         super.tbnz(reg, sizeEncoding, branchOffset, branch);
1687                         break;
1688                     case BRANCH_BIT_ZERO:
1689                         super.tbz(reg, sizeEncoding, branchOffset, branch);
1690                         break;
1691                 }
1692                 break;
1693             }
1694             case ADR: {
1695                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1696                 int regEncoding = information;
1697                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1698                 super.adr(reg, branchOffset, branch);
1699                 break;
1700             }
1701             default:
1702                 throw GraalError.shouldNotReachHere();
1703         }
1704     }
1705 
1706     /**
1707      * Generates an address of the form {@code base + displacement}.
1708      *
1709      * Does not change base register to fulfill this requirement. Will fail if displacement cannot
1710      * be represented directly as address.
1711      *
1712      * @param base general purpose register. May not be null or the zero register.
1713      * @param displacement arbitrary displacement added to base.
1714      * @return AArch64Address referencing memory at {@code base + displacement}.
1715      */
1716     @Override
1717     public AArch64Address makeAddress(Register base, int displacement) {
1718         return makeAddress(base, displacement, zr, /* signExtend */false, /* transferSize */0, zr, /* allowOverwrite */false);
1719     }
1720 
1721     @Override
1722     public AArch64Address getPlaceholder(int instructionStartPosition) {
1723         return AArch64Address.PLACEHOLDER;
1724     }
1725 
1726     public void addressOf(Register dst) {
1727         // This will be fixed up later.
1728         super.adrp(dst);
1729         super.add(64, dst, dst, 0);
1730     }
1731 
1732     /**
1733      * Loads an address into Register d.
1734      *
1735      * @param d general purpose register. May not be null.
1736      * @param a AArch64Address the address of an operand.
1737      */
1738     public void lea(Register d, AArch64Address a) {
1739         a.lea(this, d);
1740     }
1741 }