1 /*
   2  * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 
  26 package org.graalvm.compiler.asm.aarch64;
  27 
  28 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.BASE_REGISTER_ONLY;
  29 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.EXTENDED_REGISTER_OFFSET;
  30 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SCALED;
  31 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_UNSCALED;
  32 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.REGISTER_OFFSET;
  33 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_BASE;
  34 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_INDEX;
  35 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.NO_WORK;
  36 
  37 import org.graalvm.compiler.asm.BranchTargetOutOfBoundsException;
  38 
  39 import static jdk.vm.ci.aarch64.AArch64.CPU;
  40 import static jdk.vm.ci.aarch64.AArch64.r8;
  41 import static jdk.vm.ci.aarch64.AArch64.r9;
  42 import static jdk.vm.ci.aarch64.AArch64.sp;
  43 import static jdk.vm.ci.aarch64.AArch64.zr;
  44 
  45 import org.graalvm.compiler.asm.Label;
  46 import org.graalvm.compiler.core.common.NumUtil;
  47 import org.graalvm.compiler.debug.GraalError;
  48 
  49 import jdk.vm.ci.aarch64.AArch64;
  50 import jdk.vm.ci.code.Register;
  51 import jdk.vm.ci.code.TargetDescription;
  52 
  53 public class AArch64MacroAssembler extends AArch64Assembler {
  54 
  55     private final ScratchRegister[] scratchRegister = new ScratchRegister[]{new ScratchRegister(r8), new ScratchRegister(r9)};
  56 
  57     // Points to the next free scratch register
  58     private int nextFreeScratchRegister = 0;
  59 
  60     public AArch64MacroAssembler(TargetDescription target) {
  61         super(target);
  62     }
  63 
  64     public class ScratchRegister implements AutoCloseable {
  65         private final Register register;
  66 
  67         public ScratchRegister(Register register) {
  68             this.register = register;
  69         }
  70 
  71         public Register getRegister() {
  72             return register;
  73         }
  74 
  75         @Override
  76         public void close() {
  77             assert nextFreeScratchRegister > 0 : "Close called too often";
  78             nextFreeScratchRegister--;
  79         }
  80     }
  81 
  82     public ScratchRegister getScratchRegister() {
  83         return scratchRegister[nextFreeScratchRegister++];
  84     }
  85 
  86     /**
  87      * Specifies what actions have to be taken to turn an arbitrary address of the form
  88      * {@code base + displacement [+ index [<< scale]]} into a valid AArch64Address.
  89      */
  90     public static class AddressGenerationPlan {
  91         public final WorkPlan workPlan;
  92         public final AArch64Address.AddressingMode addressingMode;
  93         public final boolean needsScratch;
  94 
  95         public enum WorkPlan {
  96             /**
  97              * Can be used as-is without extra work.
  98              */
  99             NO_WORK,
 100             /**
 101              * Add scaled displacement to index register.
 102              */
 103             ADD_TO_INDEX,
 104             /**
 105              * Add unscaled displacement to base register.
 106              */
 107             ADD_TO_BASE,
 108         }
 109 
 110         /**
 111          * @param workPlan Work necessary to generate a valid address.
 112          * @param addressingMode Addressing mode of generated address.
 113          * @param needsScratch True if generating address needs a scatch register, false otherwise.
 114          */
 115         public AddressGenerationPlan(WorkPlan workPlan, AArch64Address.AddressingMode addressingMode, boolean needsScratch) {
 116             this.workPlan = workPlan;
 117             this.addressingMode = addressingMode;
 118             this.needsScratch = needsScratch;
 119         }
 120     }
 121 
 122     /**
 123      * Generates an addressplan for an address of the form
 124      * {@code base + displacement [+ index [<< log2(transferSize)]]} with the index register and
 125      * scaling being optional.
 126      *
 127      * @param displacement an arbitrary displacement.
 128      * @param hasIndexRegister true if the address uses an index register, false otherwise. non null
 129      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 130      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 131      * @return AddressGenerationPlan that specifies the actions necessary to generate a valid
 132      *         AArch64Address for the given parameters.
 133      */
 134     public static AddressGenerationPlan generateAddressPlan(long displacement, boolean hasIndexRegister, int transferSize) {
 135         assert transferSize == 0 || transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 136         boolean indexScaled = transferSize != 0;
 137         int log2Scale = NumUtil.log2Ceil(transferSize);
 138         long scaledDisplacement = displacement >> log2Scale;
 139         boolean displacementScalable = indexScaled && (displacement & (transferSize - 1)) == 0;
 140         if (displacement == 0) {
 141             // register offset without any work beforehand.
 142             return new AddressGenerationPlan(NO_WORK, REGISTER_OFFSET, false);
 143         } else {
 144             if (hasIndexRegister) {
 145                 if (displacementScalable) {
 146                     boolean needsScratch = !isArithmeticImmediate(scaledDisplacement);
 147                     return new AddressGenerationPlan(ADD_TO_INDEX, REGISTER_OFFSET, needsScratch);
 148                 } else {
 149                     boolean needsScratch = !isArithmeticImmediate(displacement);
 150                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 151                 }
 152             } else {
 153                 if (displacementScalable && NumUtil.isUnsignedNbit(12, scaledDisplacement)) {
 154                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_SCALED, false);
 155                 } else if (NumUtil.isSignedNbit(9, displacement)) {
 156                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_UNSCALED, false);
 157                 } else {
 158                     boolean needsScratch = !isArithmeticImmediate(displacement);
 159                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 160                 }
 161             }
 162         }
 163     }
 164 
 165     /**
 166      * Returns an AArch64Address pointing to
 167      * {@code base + displacement + index << log2(transferSize)}.
 168      *
 169      * @param base general purpose register. May not be null or the zero register.
 170      * @param displacement arbitrary displacement added to base.
 171      * @param index general purpose register. May not be null or the stack pointer.
 172      * @param signExtendIndex if true consider index register a word register that should be
 173      *            sign-extended before being added.
 174      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 175      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 176      * @param additionalReg additional register used either as a scratch register or as part of the
 177      *            final address, depending on whether allowOverwrite is true or not. May not be null
 178      *            or stackpointer.
 179      * @param allowOverwrite if true allows to change value of base or index register to generate
 180      *            address.
 181      * @return AArch64Address pointing to memory at
 182      *         {@code base + displacement + index << log2(transferSize)}.
 183      */
 184     public AArch64Address makeAddress(Register base, long displacement, Register index, boolean signExtendIndex, int transferSize, Register additionalReg, boolean allowOverwrite) {
 185         AddressGenerationPlan plan = generateAddressPlan(displacement, !index.equals(zr), transferSize);
 186         assert allowOverwrite || !zr.equals(additionalReg) || plan.workPlan == NO_WORK;
 187         assert !plan.needsScratch || !zr.equals(additionalReg);
 188         int log2Scale = NumUtil.log2Ceil(transferSize);
 189         long scaledDisplacement = displacement >> log2Scale;
 190         Register newIndex = index;
 191         Register newBase = base;
 192         int immediate;
 193         switch (plan.workPlan) {
 194             case NO_WORK:
 195                 if (plan.addressingMode == IMMEDIATE_SCALED) {
 196                     immediate = (int) scaledDisplacement;
 197                 } else {
 198                     immediate = (int) displacement;
 199                 }
 200                 break;
 201             case ADD_TO_INDEX:
 202                 newIndex = allowOverwrite ? index : additionalReg;
 203                 assert !newIndex.equals(sp) && !newIndex.equals(zr);
 204                 if (plan.needsScratch) {
 205                     mov(additionalReg, scaledDisplacement);
 206                     add(signExtendIndex ? 32 : 64, newIndex, index, additionalReg);
 207                 } else {
 208                     add(signExtendIndex ? 32 : 64, newIndex, index, (int) scaledDisplacement);
 209                 }
 210                 immediate = 0;
 211                 break;
 212             case ADD_TO_BASE:
 213                 newBase = allowOverwrite ? base : additionalReg;
 214                 assert !newBase.equals(sp) && !newBase.equals(zr);
 215                 if (plan.needsScratch) {
 216                     mov(additionalReg, displacement);
 217                     add(64, newBase, base, additionalReg);
 218                 } else {
 219                     add(64, newBase, base, (int) displacement);
 220                 }
 221                 immediate = 0;
 222                 break;
 223             default:
 224                 throw GraalError.shouldNotReachHere();
 225         }
 226         AArch64Address.AddressingMode addressingMode = plan.addressingMode;
 227         ExtendType extendType = null;
 228         if (addressingMode == REGISTER_OFFSET) {
 229             if (newIndex.equals(zr)) {
 230                 addressingMode = BASE_REGISTER_ONLY;
 231             } else if (signExtendIndex) {
 232                 addressingMode = EXTENDED_REGISTER_OFFSET;
 233                 extendType = ExtendType.SXTW;
 234             }
 235         }
 236         return AArch64Address.createAddress(addressingMode, newBase, newIndex, immediate, transferSize != 0, extendType);
 237     }
 238 
 239     /**
 240      * Returns an AArch64Address pointing to {@code base + displacement}. Specifies the memory
 241      * transfer size to allow some optimizations when building the address.
 242      *
 243      * @param base general purpose register. May not be null or the zero register.
 244      * @param displacement arbitrary displacement added to base.
 245      * @param transferSize the memory transfer size in bytes.
 246      * @param additionalReg additional register used either as a scratch register or as part of the
 247      *            final address, depending on whether allowOverwrite is true or not. May not be
 248      *            null, zero register or stackpointer.
 249      * @param allowOverwrite if true allows to change value of base or index register to generate
 250      *            address.
 251      * @return AArch64Address pointing to memory at {@code base + displacement}.
 252      */
 253     public AArch64Address makeAddress(Register base, long displacement, Register additionalReg, int transferSize, boolean allowOverwrite) {
 254         assert additionalReg.getRegisterCategory().equals(CPU);
 255         return makeAddress(base, displacement, zr, /* sign-extend */false, transferSize, additionalReg, allowOverwrite);
 256     }
 257 
 258     /**
 259      * Returns an AArch64Address pointing to {@code base + displacement}. Fails if address cannot be
 260      * represented without overwriting base register or using a scratch register.
 261      *
 262      * @param base general purpose register. May not be null or the zero register.
 263      * @param displacement arbitrary displacement added to base.
 264      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 265      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 266      * @return AArch64Address pointing to memory at {@code base + displacement}.
 267      */
 268     public AArch64Address makeAddress(Register base, long displacement, int transferSize) {
 269         return makeAddress(base, displacement, zr, /* signExtend */false, transferSize, zr, /* allowOverwrite */false);
 270     }
 271 
 272     /**
 273      * Loads memory address into register.
 274      *
 275      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 276      * @param address address whose value is loaded into dst. May not be null,
 277      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_POST_INDEXED
 278      *            POST_INDEXED} or
 279      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_PRE_INDEXED
 280      *            IMMEDIATE_PRE_INDEXED}
 281      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 282      *            the index register is scaled. Can be 1, 2, 4 or 8.
 283      */
 284     public void loadAddress(Register dst, AArch64Address address, int transferSize) {
 285         assert transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 286         assert dst.getRegisterCategory().equals(CPU);
 287         int shiftAmt = NumUtil.log2Ceil(transferSize);
 288         switch (address.getAddressingMode()) {
 289             case IMMEDIATE_SCALED:
 290                 int scaledImmediate = address.getImmediateRaw() << shiftAmt;
 291                 int lowerBits = scaledImmediate & NumUtil.getNbitNumberInt(12);
 292                 int higherBits = scaledImmediate & ~NumUtil.getNbitNumberInt(12);
 293                 boolean firstAdd = true;
 294                 if (lowerBits != 0) {
 295                     add(64, dst, address.getBase(), lowerBits);
 296                     firstAdd = false;
 297                 }
 298                 if (higherBits != 0) {
 299                     Register src = firstAdd ? address.getBase() : dst;
 300                     add(64, dst, src, higherBits);
 301                 }
 302                 break;
 303             case IMMEDIATE_UNSCALED:
 304                 int immediate = address.getImmediateRaw();
 305                 add(64, dst, address.getBase(), immediate);
 306                 break;
 307             case REGISTER_OFFSET:
 308                 add(64, dst, address.getBase(), address.getOffset(), ShiftType.LSL, address.isScaled() ? shiftAmt : 0);
 309                 break;
 310             case EXTENDED_REGISTER_OFFSET:
 311                 add(64, dst, address.getBase(), address.getOffset(), address.getExtendType(), address.isScaled() ? shiftAmt : 0);
 312                 break;
 313             case PC_LITERAL: {
 314                 addressOf(dst);
 315                 break;
 316             }
 317             case BASE_REGISTER_ONLY:
 318                 movx(dst, address.getBase());
 319                 break;
 320             default:
 321                 throw GraalError.shouldNotReachHere();
 322         }
 323     }
 324 
 325     public void movx(Register dst, Register src) {
 326         mov(64, dst, src);
 327     }
 328 
 329     public void mov(int size, Register dst, Register src) {
 330         if (dst.equals(sp) || src.equals(sp)) {
 331             add(size, dst, src, 0);
 332         } else {
 333             or(size, dst, zr, src);
 334         }
 335     }
 336 
 337     /**
 338      * Generates a 64-bit immediate move code sequence.
 339      *
 340      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 341      * @param imm
 342      */
 343     private void mov64(Register dst, long imm) {
 344         // We have to move all non zero parts of the immediate in 16-bit chunks
 345         boolean firstMove = true;
 346         for (int offset = 0; offset < 64; offset += 16) {
 347             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 348             if (chunk == 0) {
 349                 continue;
 350             }
 351             if (firstMove) {
 352                 movz(64, dst, chunk, offset);
 353                 firstMove = false;
 354             } else {
 355                 movk(64, dst, chunk, offset);
 356             }
 357         }
 358         assert !firstMove;
 359     }
 360 
 361     /**
 362      * Loads immediate into register.
 363      *
 364      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 365      * @param imm immediate loaded into register.
 366      */
 367     public void mov(Register dst, long imm) {
 368         assert dst.getRegisterCategory().equals(CPU);
 369         if (imm == 0L) {
 370             movx(dst, zr);
 371         } else if (LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 372             or(64, dst, zr, imm);
 373         } else if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 374             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 375             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 376             // then sign extend
 377             // them. This allows us to cover immediates like ~1L with 2 instructions.
 378             mov(dst, (int) imm);
 379             sxt(64, 32, dst, dst);
 380         } else {
 381             mov64(dst, imm);
 382         }
 383     }
 384 
 385     /**
 386      * Loads immediate into register.
 387      *
 388      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 389      * @param imm immediate loaded into register.
 390      */
 391     public void mov(Register dst, int imm) {
 392         mov(dst, imm & 0xFFFF_FFFFL);
 393     }
 394 
 395     /**
 396      * Generates a 48-bit immediate move code sequence. The immediate may later be updated by
 397      * HotSpot.
 398      *
 399      * In AArch64 mode the virtual address space is 48-bits in size, so we only need three
 400      * instructions to create a patchable instruction sequence that can reach anywhere.
 401      *
 402      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 403      * @param imm
 404      */
 405     public void movNativeAddress(Register dst, long imm) {
 406         assert (imm & 0xFFFF_0000_0000_0000L) == 0;
 407         // We have to move all non zero parts of the immediate in 16-bit chunks
 408         boolean firstMove = true;
 409         for (int offset = 0; offset < 48; offset += 16) {
 410             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 411             if (firstMove) {
 412                 movz(64, dst, chunk, offset);
 413                 firstMove = false;
 414             } else {
 415                 movk(64, dst, chunk, offset);
 416             }
 417         }
 418         assert !firstMove;
 419     }
 420 
 421     /**
 422      * Generates a 32-bit immediate move code sequence. The immediate may later be updated by
 423      * HotSpot.
 424      *
 425      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 426      * @param imm
 427      */
 428     public void movNarrowAddress(Register dst, long imm) {
 429         assert (imm & 0xFFFF_FFFF_0000_0000L) == 0;
 430         movz(64, dst, (int) (imm >>> 16), 16);
 431         movk(64, dst, (int) (imm & 0xffff), 0);
 432     }
 433 
 434     /**
 435      * @return Number of instructions necessary to load immediate into register.
 436      */
 437     public static int nrInstructionsToMoveImmediate(long imm) {
 438         if (imm == 0L || LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 439             return 1;
 440         }
 441         if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 442             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 443             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 444             // then sign extend
 445             // them. This allows us to cover immediates like ~1L with 2 instructions.
 446             return 2;
 447         }
 448         int nrInstructions = 0;
 449         for (int offset = 0; offset < 64; offset += 16) {
 450             int part = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 451             if (part != 0) {
 452                 nrInstructions++;
 453             }
 454         }
 455         return nrInstructions;
 456     }
 457 
 458     /**
 459      * Loads a srcSize value from address into rt sign-extending it if necessary.
 460      *
 461      * @param targetSize size of target register in bits. Must be 32 or 64.
 462      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 463      *            targetSize.
 464      * @param rt general purpose register. May not be null or stackpointer.
 465      * @param address all addressing modes allowed. May not be null.
 466      */
 467     @Override
 468     public void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
 469         assert targetSize == 32 || targetSize == 64;
 470         assert srcSize <= targetSize;
 471         if (targetSize == srcSize) {
 472             super.ldr(srcSize, rt, address);
 473         } else {
 474             super.ldrs(targetSize, srcSize, rt, address);
 475         }
 476     }
 477 
 478     /**
 479      * Loads a srcSize value from address into rt zero-extending it if necessary.
 480      *
 481      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 482      *            targetSize.
 483      * @param rt general purpose register. May not be null or stackpointer.
 484      * @param address all addressing modes allowed. May not be null.
 485      */
 486     @Override
 487     public void ldr(int srcSize, Register rt, AArch64Address address) {
 488         super.ldr(srcSize, rt, address);
 489     }
 490 
 491     /**
 492      * Conditional move. dst = src1 if condition else src2.
 493      *
 494      * @param size register size. Has to be 32 or 64.
 495      * @param result general purpose register. May not be null or the stackpointer.
 496      * @param trueValue general purpose register. May not be null or the stackpointer.
 497      * @param falseValue general purpose register. May not be null or the stackpointer.
 498      * @param cond any condition flag. May not be null.
 499      */
 500     public void cmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag cond) {
 501         super.csel(size, result, trueValue, falseValue, cond);
 502     }
 503 
 504     /**
 505      * Conditional set. dst = 1 if condition else 0.
 506      *
 507      * @param dst general purpose register. May not be null or stackpointer.
 508      * @param condition any condition. May not be null.
 509      */
 510     public void cset(int size, Register dst, ConditionFlag condition) {
 511         super.csinc(size, dst, zr, zr, condition.negate());
 512     }
 513 
 514     /**
 515      * dst = src1 + src2.
 516      *
 517      * @param size register size. Has to be 32 or 64.
 518      * @param dst general purpose register. May not be null.
 519      * @param src1 general purpose register. May not be null.
 520      * @param src2 general purpose register. May not be null or stackpointer.
 521      */
 522     public void add(int size, Register dst, Register src1, Register src2) {
 523         if (dst.equals(sp) || src1.equals(sp)) {
 524             super.add(size, dst, src1, src2, ExtendType.UXTX, 0);
 525         } else {
 526             super.add(size, dst, src1, src2, ShiftType.LSL, 0);
 527         }
 528     }
 529 
 530     /**
 531      * dst = src1 + src2 and sets condition flags.
 532      *
 533      * @param size register size. Has to be 32 or 64.
 534      * @param dst general purpose register. May not be null.
 535      * @param src1 general purpose register. May not be null.
 536      * @param src2 general purpose register. May not be null or stackpointer.
 537      */
 538     public void adds(int size, Register dst, Register src1, Register src2) {
 539         if (dst.equals(sp) || src1.equals(sp)) {
 540             super.adds(size, dst, src1, src2, ExtendType.UXTX, 0);
 541         } else {
 542             super.adds(size, dst, src1, src2, ShiftType.LSL, 0);
 543         }
 544     }
 545 
 546     /**
 547      * dst = src1 - src2 and sets condition flags.
 548      *
 549      * @param size register size. Has to be 32 or 64.
 550      * @param dst general purpose register. May not be null.
 551      * @param src1 general purpose register. May not be null.
 552      * @param src2 general purpose register. May not be null or stackpointer.
 553      */
 554     public void subs(int size, Register dst, Register src1, Register src2) {
 555         if (dst.equals(sp) || src1.equals(sp)) {
 556             super.subs(size, dst, src1, src2, ExtendType.UXTX, 0);
 557         } else {
 558             super.subs(size, dst, src1, src2, ShiftType.LSL, 0);
 559         }
 560     }
 561 
 562     /**
 563      * dst = src1 - src2.
 564      *
 565      * @param size register size. Has to be 32 or 64.
 566      * @param dst general purpose register. May not be null.
 567      * @param src1 general purpose register. May not be null.
 568      * @param src2 general purpose register. May not be null or stackpointer.
 569      */
 570     public void sub(int size, Register dst, Register src1, Register src2) {
 571         if (dst.equals(sp) || src1.equals(sp)) {
 572             super.sub(size, dst, src1, src2, ExtendType.UXTX, 0);
 573         } else {
 574             super.sub(size, dst, src1, src2, ShiftType.LSL, 0);
 575         }
 576     }
 577 
 578     /**
 579      * dst = src1 + shiftType(src2, shiftAmt & (size - 1)).
 580      *
 581      * @param size register size. Has to be 32 or 64.
 582      * @param dst general purpose register. May not be null or stackpointer.
 583      * @param src1 general purpose register. May not be null or stackpointer.
 584      * @param src2 general purpose register. May not be null or stackpointer.
 585      * @param shiftType any type but ROR.
 586      * @param shiftAmt arbitrary shift amount.
 587      */
 588     @Override
 589     public void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 590         int shift = clampShiftAmt(size, shiftAmt);
 591         super.add(size, dst, src1, src2, shiftType, shift);
 592     }
 593 
 594     /**
 595      * dst = src1 + shiftType(src2, shiftAmt & (size-1)) and sets condition flags.
 596      *
 597      * @param size register size. Has to be 32 or 64.
 598      * @param dst general purpose register. May not be null or stackpointer.
 599      * @param src1 general purpose register. May not be null or stackpointer.
 600      * @param src2 general purpose register. May not be null or stackpointer.
 601      * @param shiftType any type but ROR.
 602      * @param shiftAmt arbitrary shift amount.
 603      */
 604     @Override
 605     public void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 606         int shift = clampShiftAmt(size, shiftAmt);
 607         super.sub(size, dst, src1, src2, shiftType, shift);
 608     }
 609 
 610     /**
 611      * dst = -src1.
 612      *
 613      * @param size register size. Has to be 32 or 64.
 614      * @param dst general purpose register. May not be null or stackpointer.
 615      * @param src general purpose register. May not be null or stackpointer.
 616      */
 617     public void neg(int size, Register dst, Register src) {
 618         sub(size, dst, zr, src);
 619     }
 620 
 621     /**
 622      * dst = src + immediate.
 623      *
 624      * @param size register size. Has to be 32 or 64.
 625      * @param dst general purpose register. May not be null or zero-register.
 626      * @param src general purpose register. May not be null or zero-register.
 627      * @param immediate 32-bit signed int
 628      */
 629     @Override
 630     public void add(int size, Register dst, Register src, int immediate) {
 631         assert (!dst.equals(zr) && !src.equals(zr));
 632         if (immediate < 0) {
 633             sub(size, dst, src, -immediate);
 634         } else if (isAimm(immediate)) {
 635             if (!(dst.equals(src) && immediate == 0)) {
 636                 super.add(size, dst, src, immediate);
 637             }
 638         } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
 639             super.add(size, dst, src, immediate & -(1 << 12));
 640             super.add(size, dst, dst, immediate & ((1 << 12) - 1));
 641         } else {
 642             assert !dst.equals(src);
 643             mov(dst, immediate);
 644             add(size, src, dst, dst);
 645         }
 646     }
 647 
 648     /**
 649      * dst = src + immediate.
 650      *
 651      * @param size register size. Has to be 32 or 64.
 652      * @param dst general purpose register. May not be null or zero-register.
 653      * @param src general purpose register. May not be null or zero-register.
 654      * @param immediate 64-bit signed int
 655      */
 656     public void add(int size, Register dst, Register src, long immediate) {
 657         if (NumUtil.isInt(immediate)) {
 658             add(size, dst, src, (int) immediate);
 659         } else {
 660             assert (!dst.equals(zr) && !src.equals(zr));
 661             assert !dst.equals(src);
 662             assert size == 64;
 663             mov(dst, immediate);
 664             add(size, src, dst, dst);
 665         }
 666     }
 667 
 668     /**
 669      * dst = src + aimm and sets condition flags.
 670      *
 671      * @param size register size. Has to be 32 or 64.
 672      * @param dst general purpose register. May not be null or stackpointer.
 673      * @param src general purpose register. May not be null or zero-register.
 674      * @param immediate arithmetic immediate.
 675      */
 676     @Override
 677     public void adds(int size, Register dst, Register src, int immediate) {
 678         assert (!dst.equals(sp) && !src.equals(zr));
 679         if (immediate < 0) {
 680             subs(size, dst, src, -immediate);
 681         } else if (!(dst.equals(src) && immediate == 0)) {
 682             super.adds(size, dst, src, immediate);
 683         }
 684     }
 685 
 686     /**
 687      * dst = src - immediate.
 688      *
 689      * @param size register size. Has to be 32 or 64.
 690      * @param dst general purpose register. May not be null or zero-register.
 691      * @param src general purpose register. May not be null or zero-register.
 692      * @param immediate 32-bit signed int
 693      */
 694     @Override
 695     public void sub(int size, Register dst, Register src, int immediate) {
 696         assert (!dst.equals(zr) && !src.equals(zr));
 697         if (immediate < 0) {
 698             add(size, dst, src, -immediate);
 699         } else if (isAimm(immediate)) {
 700             if (!(dst.equals(src) && immediate == 0)) {
 701                 super.sub(size, dst, src, immediate);
 702             }
 703         } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
 704             super.sub(size, dst, src, immediate & -(1 << 12));
 705             super.sub(size, dst, dst, immediate & ((1 << 12) - 1));
 706         } else {
 707             assert !dst.equals(src);
 708             mov(dst, immediate);
 709             sub(size, src, dst, dst);
 710         }
 711     }
 712 
 713     /**
 714      * dst = src - aimm and sets condition flags.
 715      *
 716      * @param size register size. Has to be 32 or 64.
 717      * @param dst general purpose register. May not be null or stackpointer.
 718      * @param src general purpose register. May not be null or zero-register.
 719      * @param immediate arithmetic immediate.
 720      */
 721     @Override
 722     public void subs(int size, Register dst, Register src, int immediate) {
 723         assert (!dst.equals(sp) && !src.equals(zr));
 724         if (immediate < 0) {
 725             adds(size, dst, src, -immediate);
 726         } else if (!dst.equals(src) || immediate != 0) {
 727             super.subs(size, dst, src, immediate);
 728         }
 729     }
 730 
 731     /**
 732      * dst = src1 * src2.
 733      *
 734      * @param size register size. Has to be 32 or 64.
 735      * @param dst general purpose register. May not be null or the stackpointer.
 736      * @param src1 general purpose register. May not be null or the stackpointer.
 737      * @param src2 general purpose register. May not be null or the stackpointer.
 738      */
 739     public void mul(int size, Register dst, Register src1, Register src2) {
 740         super.madd(size, dst, src1, src2, zr);
 741     }
 742 
 743     /**
 744      * dst = src3 + src1 * src2.
 745      *
 746      * @param size register size. Has to be 32 or 64.
 747      * @param dst general purpose register. May not be null or the stackpointer.
 748      * @param src1 general purpose register. May not be null or the stackpointer.
 749      * @param src2 general purpose register. May not be null or the stackpointer.
 750      * @param src3 general purpose register. May not be null or the stackpointer.
 751      */
 752     @Override
 753     public void madd(int size, Register dst, Register src1, Register src2, Register src3) {
 754         super.madd(size, dst, src1, src2, src3);
 755     }
 756 
 757     /**
 758      * dst = src3 - src1 * src2.
 759      *
 760      * @param size register size. Has to be 32 or 64.
 761      * @param dst general purpose register. May not be null or the stackpointer.
 762      * @param src1 general purpose register. May not be null or the stackpointer.
 763      * @param src2 general purpose register. May not be null or the stackpointer.
 764      * @param src3 general purpose register. May not be null or the stackpointer.
 765      */
 766     @Override
 767     public void msub(int size, Register dst, Register src1, Register src2, Register src3) {
 768         super.msub(size, dst, src1, src2, src3);
 769     }
 770 
 771     /**
 772      * dst = 0 - src1 * src2.
 773      *
 774      * @param size register size. Has to be 32 or 64.
 775      * @param dst general purpose register. May not be null or the stackpointer.
 776      * @param src1 general purpose register. May not be null or the stackpointer.
 777      * @param src2 general purpose register. May not be null or the stackpointer.
 778      */
 779     public void mneg(int size, Register dst, Register src1, Register src2) {
 780         super.msub(size, dst, src1, src2, zr);
 781     }
 782 
 783     /**
 784      * unsigned multiply high. dst = (src1 * src2) >> size
 785      *
 786      * @param size register size. Has to be 32 or 64.
 787      * @param dst general purpose register. May not be null or the stackpointer.
 788      * @param src1 general purpose register. May not be null or the stackpointer.
 789      * @param src2 general purpose register. May not be null or the stackpointer.
 790      */
 791     public void umulh(int size, Register dst, Register src1, Register src2) {
 792         assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
 793         assert size == 32 || size == 64;
 794         if (size == 64) {
 795             super.umulh(dst, src1, src2);
 796         } else {
 797             // xDst = wSrc1 * wSrc2
 798             super.umaddl(dst, src1, src2, zr);
 799             // xDst = xDst >> 32
 800             lshr(64, dst, dst, 32);
 801         }
 802     }
 803 
 804     /**
 805      * signed multiply high. dst = (src1 * src2) >> size
 806      *
 807      * @param size register size. Has to be 32 or 64.
 808      * @param dst general purpose register. May not be null or the stackpointer.
 809      * @param src1 general purpose register. May not be null or the stackpointer.
 810      * @param src2 general purpose register. May not be null or the stackpointer.
 811      */
 812     public void smulh(int size, Register dst, Register src1, Register src2) {
 813         assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
 814         assert size == 32 || size == 64;
 815         if (size == 64) {
 816             super.smulh(dst, src1, src2);
 817         } else {
 818             // xDst = wSrc1 * wSrc2
 819             super.smaddl(dst, src1, src2, zr);
 820             // xDst = xDst >> 32
 821             lshr(64, dst, dst, 32);
 822         }
 823     }
 824 
 825     /**
 826      * dst = src1 % src2. Signed.
 827      *
 828      * @param size register size. Has to be 32 or 64.
 829      * @param dst general purpose register. May not be null or the stackpointer.
 830      * @param n numerator. General purpose register. May not be null or the stackpointer.
 831      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 832      */
 833     public void rem(int size, Register dst, Register n, Register d) {
 834         assert (!dst.equals(sp) && !n.equals(sp) && !d.equals(sp));
 835         // There is no irem or similar instruction. Instead we use the relation:
 836         // n % d = n - Floor(n / d) * d if nd >= 0
 837         // n % d = n - Ceil(n / d) * d else
 838         // Which is equivalent to n - TruncatingDivision(n, d) * d
 839         super.sdiv(size, dst, n, d);
 840         super.msub(size, dst, dst, d, n);
 841     }
 842 
 843     /**
 844      * dst = src1 % src2. Unsigned.
 845      *
 846      * @param size register size. Has to be 32 or 64.
 847      * @param dst general purpose register. May not be null or the stackpointer.
 848      * @param n numerator. General purpose register. May not be null or the stackpointer.
 849      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 850      */
 851     public void urem(int size, Register dst, Register n, Register d) {
 852         // There is no irem or similar instruction. Instead we use the relation:
 853         // n % d = n - Floor(n / d) * d
 854         // Which is equivalent to n - TruncatingDivision(n, d) * d
 855         super.udiv(size, dst, n, d);
 856         super.msub(size, dst, dst, d, n);
 857     }
 858 
 859     /**
 860      * Add/subtract instruction encoding supports 12-bit immediate values.
 861      *
 862      * @param imm immediate value to be tested.
 863      * @return true if immediate can be used directly for arithmetic instructions (add/sub), false
 864      *         otherwise.
 865      */
 866     public static boolean isArithmeticImmediate(long imm) {
 867         // If we have a negative immediate we just use the opposite operator. I.e.: x - (-5) == x +
 868         // 5.
 869         return NumUtil.isInt(Math.abs(imm)) && isAimm((int) Math.abs(imm));
 870     }
 871 
 872     /**
 873      * Compare instructions are add/subtract instructions and so support 12-bit immediate values.
 874      *
 875      * @param imm immediate value to be tested.
 876      * @return true if immediate can be used directly with comparison instructions, false otherwise.
 877      */
 878     public static boolean isComparisonImmediate(long imm) {
 879         return isArithmeticImmediate(imm);
 880     }
 881 
 882     /**
 883      * Move wide immediate instruction encoding supports 16-bit immediate values which can be
 884      * optionally-shifted by multiples of 16 (i.e. 0, 16, 32, 48).
 885      *
 886      * @return true if immediate can be moved directly into a register, false otherwise.
 887      */
 888     public static boolean isMovableImmediate(long imm) {
 889         // // Positions of first, respectively last set bit.
 890         // int start = Long.numberOfTrailingZeros(imm);
 891         // int end = 64 - Long.numberOfLeadingZeros(imm);
 892         // int length = end - start;
 893         // if (length > 16) {
 894         // return false;
 895         // }
 896         // // We can shift the necessary part of the immediate (i.e. everything between the first
 897         // and
 898         // // last set bit) by as much as 16 - length around to arrive at a valid shift amount
 899         // int tolerance = 16 - length;
 900         // int prevMultiple = NumUtil.roundDown(start, 16);
 901         // int nextMultiple = NumUtil.roundUp(start, 16);
 902         // return start - prevMultiple <= tolerance || nextMultiple - start <= tolerance;
 903         /*
 904          * This is a bit optimistic because the constant could also be for an arithmetic instruction
 905          * which only supports 12-bits. That case needs to be handled in the backend.
 906          */
 907         return NumUtil.isInt(Math.abs(imm)) && NumUtil.isUnsignedNbit(16, (int) Math.abs(imm));
 908     }
 909 
 910     /**
 911      * dst = src << (shiftAmt & (size - 1)).
 912      *
 913      * @param size register size. Has to be 32 or 64.
 914      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 915      * @param src general purpose register. May not be null, stackpointer or zero-register.
 916      * @param shiftAmt amount by which src is shifted.
 917      */
 918     public void shl(int size, Register dst, Register src, long shiftAmt) {
 919         int shift = clampShiftAmt(size, shiftAmt);
 920         super.ubfm(size, dst, src, (size - shift) & (size - 1), size - 1 - shift);
 921     }
 922 
 923     /**
 924      * dst = src1 << (src2 & (size - 1)).
 925      *
 926      * @param size register size. Has to be 32 or 64.
 927      * @param dst general purpose register. May not be null or stackpointer.
 928      * @param src general purpose register. May not be null or stackpointer.
 929      * @param shift general purpose register. May not be null or stackpointer.
 930      */
 931     public void shl(int size, Register dst, Register src, Register shift) {
 932         super.lsl(size, dst, src, shift);
 933     }
 934 
 935     /**
 936      * dst = src >>> (shiftAmt & (size - 1)).
 937      *
 938      * @param size register size. Has to be 32 or 64.
 939      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 940      * @param src general purpose register. May not be null, stackpointer or zero-register.
 941      * @param shiftAmt amount by which src is shifted.
 942      */
 943     public void lshr(int size, Register dst, Register src, long shiftAmt) {
 944         int shift = clampShiftAmt(size, shiftAmt);
 945         super.ubfm(size, dst, src, shift, size - 1);
 946     }
 947 
 948     /**
 949      * dst = src1 >>> (src2 & (size - 1)).
 950      *
 951      * @param size register size. Has to be 32 or 64.
 952      * @param dst general purpose register. May not be null or stackpointer.
 953      * @param src general purpose register. May not be null or stackpointer.
 954      * @param shift general purpose register. May not be null or stackpointer.
 955      */
 956     public void lshr(int size, Register dst, Register src, Register shift) {
 957         super.lsr(size, dst, src, shift);
 958     }
 959 
 960     /**
 961      * dst = src >> (shiftAmt & log2(size)).
 962      *
 963      * @param size register size. Has to be 32 or 64.
 964      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 965      * @param src general purpose register. May not be null, stackpointer or zero-register.
 966      * @param shiftAmt amount by which src is shifted.
 967      */
 968     public void ashr(int size, Register dst, Register src, long shiftAmt) {
 969         int shift = clampShiftAmt(size, shiftAmt);
 970         super.sbfm(size, dst, src, shift, size - 1);
 971     }
 972 
 973     /**
 974      * dst = src1 >> (src2 & log2(size)).
 975      *
 976      * @param size register size. Has to be 32 or 64.
 977      * @param dst general purpose register. May not be null or stackpointer.
 978      * @param src general purpose register. May not be null or stackpointer.
 979      * @param shift general purpose register. May not be null or stackpointer.
 980      */
 981     public void ashr(int size, Register dst, Register src, Register shift) {
 982         super.asr(size, dst, src, shift);
 983     }
 984 
 985     /**
 986      * Clamps shiftAmt into range 0 <= shiftamt < size according to JLS.
 987      *
 988      * @param size size of operation.
 989      * @param shiftAmt arbitrary shift amount.
 990      * @return value between 0 and size - 1 inclusive that is equivalent to shiftAmt according to
 991      *         JLS.
 992      */
 993     private static int clampShiftAmt(int size, long shiftAmt) {
 994         return (int) (shiftAmt & (size - 1));
 995     }
 996 
 997     /**
 998      * dst = src1 & src2.
 999      *
1000      * @param size register size. Has to be 32 or 64.
1001      * @param dst general purpose register. May not be null or stackpointer.
1002      * @param src1 general purpose register. May not be null or stackpointer.
1003      * @param src2 general purpose register. May not be null or stackpointer.
1004      */
1005     public void and(int size, Register dst, Register src1, Register src2) {
1006         super.and(size, dst, src1, src2, ShiftType.LSL, 0);
1007     }
1008 
1009     /**
1010      * dst = src1 ^ src2.
1011      *
1012      * @param size register size. Has to be 32 or 64.
1013      * @param dst general purpose register. May not be null or stackpointer.
1014      * @param src1 general purpose register. May not be null or stackpointer.
1015      * @param src2 general purpose register. May not be null or stackpointer.
1016      */
1017     public void eor(int size, Register dst, Register src1, Register src2) {
1018         super.eor(size, dst, src1, src2, ShiftType.LSL, 0);
1019     }
1020 
1021     /**
1022      * dst = src1 | src2.
1023      *
1024      * @param size register size. Has to be 32 or 64.
1025      * @param dst general purpose register. May not be null or stackpointer.
1026      * @param src1 general purpose register. May not be null or stackpointer.
1027      * @param src2 general purpose register. May not be null or stackpointer.
1028      */
1029     public void or(int size, Register dst, Register src1, Register src2) {
1030         super.orr(size, dst, src1, src2, ShiftType.LSL, 0);
1031     }
1032 
1033     /**
1034      * dst = src | bimm.
1035      *
1036      * @param size register size. Has to be 32 or 64.
1037      * @param dst general purpose register. May not be null or zero-register.
1038      * @param src general purpose register. May not be null or stack-pointer.
1039      * @param bimm logical immediate. See {@link AArch64Assembler.LogicalImmediateTable} for exact
1040      *            definition.
1041      */
1042     public void or(int size, Register dst, Register src, long bimm) {
1043         super.orr(size, dst, src, bimm);
1044     }
1045 
1046     /**
1047      * dst = ~src.
1048      *
1049      * @param size register size. Has to be 32 or 64.
1050      * @param dst general purpose register. May not be null or stackpointer.
1051      * @param src general purpose register. May not be null or stackpointer.
1052      */
1053     public void not(int size, Register dst, Register src) {
1054         super.orn(size, dst, zr, src, ShiftType.LSL, 0);
1055     }
1056 
1057     /**
1058      * dst = src1 & shiftType(src2, imm).
1059      *
1060      * @param size register size. Has to be 32 or 64.
1061      * @param dst general purpose register. May not be null or stackpointer.
1062      * @param src1 general purpose register. May not be null or stackpointer.
1063      * @param src2 general purpose register. May not be null or stackpointer.
1064      * @param shiftType all types allowed, may not be null.
1065      * @param shiftAmt must be in range 0 to size - 1.
1066      */
1067     @Override
1068     public void and(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1069         super.and(size, dst, src1, src2, shiftType, shiftAmt);
1070     }
1071 
1072     /**
1073      * dst = src1 ^ shiftType(src2, imm).
1074      *
1075      * @param size register size. Has to be 32 or 64.
1076      * @param dst general purpose register. May not be null or stackpointer.
1077      * @param src1 general purpose register. May not be null or stackpointer.
1078      * @param src2 general purpose register. May not be null or stackpointer.
1079      * @param shiftType all types allowed, may not be null.
1080      * @param shiftAmt must be in range 0 to size - 1.
1081      */
1082     @Override
1083     public void eor(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1084         super.eor(size, dst, src1, src2, shiftType, shiftAmt);
1085     }
1086 
1087     /**
1088      * dst = src1 | shiftType(src2, imm).
1089      *
1090      * @param size register size. Has to be 32 or 64.
1091      * @param dst general purpose register. May not be null or stackpointer.
1092      * @param src1 general purpose register. May not be null or stackpointer.
1093      * @param src2 general purpose register. May not be null or stackpointer.
1094      * @param shiftType all types allowed, may not be null.
1095      * @param shiftAmt must be in range 0 to size - 1.
1096      */
1097     public void or(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1098         super.orr(size, dst, src1, src2, shiftType, shiftAmt);
1099     }
1100 
1101     /**
1102      * dst = src1 & ~(shiftType(src2, imm)).
1103      *
1104      * @param size register size. Has to be 32 or 64.
1105      * @param dst general purpose register. May not be null or stackpointer.
1106      * @param src1 general purpose register. May not be null or stackpointer.
1107      * @param src2 general purpose register. May not be null or stackpointer.
1108      * @param shiftType all types allowed, may not be null.
1109      * @param shiftAmt must be in range 0 to size - 1.
1110      */
1111     @Override
1112     public void bic(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1113         super.bic(size, dst, src1, src2, shiftType, shiftAmt);
1114     }
1115 
1116     /**
1117      * dst = src1 ^ ~(shiftType(src2, imm)).
1118      *
1119      * @param size register size. Has to be 32 or 64.
1120      * @param dst general purpose register. May not be null or stackpointer.
1121      * @param src1 general purpose register. May not be null or stackpointer.
1122      * @param src2 general purpose register. May not be null or stackpointer.
1123      * @param shiftType all types allowed, may not be null.
1124      * @param shiftAmt must be in range 0 to size - 1.
1125      */
1126     @Override
1127     public void eon(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1128         super.eon(size, dst, src1, src2, shiftType, shiftAmt);
1129     }
1130 
1131     /**
1132      * dst = src1 | ~(shiftType(src2, imm)).
1133      *
1134      * @param size register size. Has to be 32 or 64.
1135      * @param dst general purpose register. May not be null or stackpointer.
1136      * @param src1 general purpose register. May not be null or stackpointer.
1137      * @param src2 general purpose register. May not be null or stackpointer.
1138      * @param shiftType all types allowed, may not be null.
1139      * @param shiftAmt must be in range 0 to size - 1.
1140      */
1141     @Override
1142     public void orn(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1143         super.orn(size, dst, src1, src2, shiftType, shiftAmt);
1144     }
1145 
1146     /**
1147      * Sign-extend value from src into dst.
1148      *
1149      * @param destSize destination register size. Must be 32 or 64.
1150      * @param srcSize source register size. Must be smaller than destSize.
1151      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1152      * @param src general purpose register. May not be null, stackpointer or zero-register.
1153      */
1154     public void sxt(int destSize, int srcSize, Register dst, Register src) {
1155         assert (srcSize < destSize && srcSize > 0);
1156         super.sbfm(destSize, dst, src, 0, srcSize - 1);
1157     }
1158 
1159     /**
1160      * dst = src if condition else -src.
1161      *
1162      * @param size register size. Must be 32 or 64.
1163      * @param dst general purpose register. May not be null or the stackpointer.
1164      * @param src general purpose register. May not be null or the stackpointer.
1165      * @param condition any condition except AV or NV. May not be null.
1166      */
1167     public void csneg(int size, Register dst, Register src, ConditionFlag condition) {
1168         super.csneg(size, dst, src, src, condition.negate());
1169     }
1170 
1171     /**
1172      * @return True if the immediate can be used directly for logical 64-bit instructions.
1173      */
1174     public static boolean isLogicalImmediate(long imm) {
1175         return LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO;
1176     }
1177 
1178     /**
1179      * @return True if the immediate can be used directly for logical 32-bit instructions.
1180      */
1181     public static boolean isLogicalImmediate(int imm) {
1182         return LogicalImmediateTable.isRepresentable(imm) == LogicalImmediateTable.Representable.YES;
1183     }
1184 
1185     /* Float instructions */
1186 
1187     /**
1188      * Moves integer to float, float to integer, or float to float. Does not support integer to
1189      * integer moves.
1190      *
1191      * @param size register size. Has to be 32 or 64.
1192      * @param dst Either floating-point or general-purpose register. If general-purpose register may
1193      *            not be stackpointer or zero register. Cannot be null in any case.
1194      * @param src Either floating-point or general-purpose register. If general-purpose register may
1195      *            not be stackpointer. Cannot be null in any case.
1196      */
1197     @Override
1198     public void fmov(int size, Register dst, Register src) {
1199         assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU)) : "src and dst cannot both be integer registers.";
1200         if (dst.getRegisterCategory().equals(CPU)) {
1201             super.fmovFpu2Cpu(size, dst, src);
1202         } else if (src.getRegisterCategory().equals(CPU)) {
1203             super.fmovCpu2Fpu(size, dst, src);
1204         } else {
1205             super.fmov(size, dst, src);
1206         }
1207     }
1208 
1209     /**
1210      *
1211      * @param size register size. Has to be 32 or 64.
1212      * @param dst floating point register. May not be null.
1213      * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
1214      *            loaded, i.e. (float) imm == imm must be true. In all cases
1215      *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
1216      *            depending on size.
1217      */
1218     @Override
1219     public void fmov(int size, Register dst, double imm) {
1220         if (imm == 0.0) {
1221             assert Double.doubleToRawLongBits(imm) == 0L : "-0.0 is no valid immediate.";
1222             super.fmovCpu2Fpu(size, dst, zr);
1223         } else {
1224             super.fmov(size, dst, imm);
1225         }
1226     }
1227 
1228     /**
1229      *
1230      * @return true if immediate can be loaded directly into floating-point register, false
1231      *         otherwise.
1232      */
1233     public static boolean isDoubleImmediate(double imm) {
1234         return Double.doubleToRawLongBits(imm) == 0L || AArch64Assembler.isDoubleImmediate(imm);
1235     }
1236 
1237     /**
1238      *
1239      * @return true if immediate can be loaded directly into floating-point register, false
1240      *         otherwise.
1241      */
1242     public static boolean isFloatImmediate(float imm) {
1243         return Float.floatToRawIntBits(imm) == 0 || AArch64Assembler.isFloatImmediate(imm);
1244     }
1245 
1246     /**
1247      * Conditional move. dst = src1 if condition else src2.
1248      *
1249      * @param size register size.
1250      * @param result floating point register. May not be null.
1251      * @param trueValue floating point register. May not be null.
1252      * @param falseValue floating point register. May not be null.
1253      * @param condition every condition allowed. May not be null.
1254      */
1255     public void fcmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag condition) {
1256         super.fcsel(size, result, trueValue, falseValue, condition);
1257     }
1258 
1259     /**
1260      * dst = src1 % src2.
1261      *
1262      * @param size register size. Has to be 32 or 64.
1263      * @param dst floating-point register. May not be null.
1264      * @param n numerator. Floating-point register. May not be null.
1265      * @param d denominator. Floating-point register. May not be null.
1266      */
1267     public void frem(int size, Register dst, Register n, Register d) {
1268         // There is no frem instruction, instead we compute the remainder using the relation:
1269         // rem = n - Truncating(n / d) * d
1270         super.fdiv(size, dst, n, d);
1271         super.frintz(size, dst, dst);
1272         super.fmsub(size, dst, dst, d, n);
1273     }
1274 
1275     /* Branches */
1276 
1277     /**
1278      * Compares x and y and sets condition flags.
1279      *
1280      * @param size register size. Has to be 32 or 64.
1281      * @param x general purpose register. May not be null or stackpointer.
1282      * @param y general purpose register. May not be null or stackpointer.
1283      */
1284     public void cmp(int size, Register x, Register y) {
1285         assert size == 32 || size == 64;
1286         super.subs(size, zr, x, y, ShiftType.LSL, 0);
1287     }
1288 
1289     /**
1290      * Compares x to y and sets condition flags.
1291      *
1292      * @param size register size. Has to be 32 or 64.
1293      * @param x general purpose register. May not be null or stackpointer.
1294      * @param y comparison immediate, {@link #isComparisonImmediate(long)} has to be true for it.
1295      */
1296     public void cmp(int size, Register x, int y) {
1297         assert size == 32 || size == 64;
1298         if (y < 0) {
1299             super.adds(size, zr, x, -y);
1300         } else {
1301             super.subs(size, zr, x, y);
1302         }
1303     }
1304 
1305     /**
1306      * Sets condition flags according to result of x & y.
1307      *
1308      * @param size register size. Has to be 32 or 64.
1309      * @param dst general purpose register. May not be null or stack-pointer.
1310      * @param x general purpose register. May not be null or stackpointer.
1311      * @param y general purpose register. May not be null or stackpointer.
1312      */
1313     public void ands(int size, Register dst, Register x, Register y) {
1314         super.ands(size, dst, x, y, ShiftType.LSL, 0);
1315     }
1316 
1317     /**
1318      * Sets overflow flag according to result of x * y.
1319      *
1320      * @param size register size. Has to be 32 or 64.
1321      * @param dst general purpose register. May not be null or stack-pointer.
1322      * @param x general purpose register. May not be null or stackpointer.
1323      * @param y general purpose register. May not be null or stackpointer.
1324      */
1325     public void mulvs(int size, Register dst, Register x, Register y) {
1326         try (ScratchRegister sc1 = getScratchRegister();
1327                         ScratchRegister sc2 = getScratchRegister()) {
1328             switch (size) {
1329                 case 64: {
1330                     // Be careful with registers: it's possible that x, y, and dst are the same
1331                     // register.
1332                     Register rscratch1 = sc1.getRegister();
1333                     Register rscratch2 = sc2.getRegister();
1334                     mul(64, rscratch1, x, y);     // Result bits 0..63
1335                     smulh(64, rscratch2, x, y);  // Result bits 64..127
1336                     // Top is pure sign ext
1337                     subs(64, zr, rscratch2, rscratch1, ShiftType.ASR, 63);
1338                     // Copy all 64 bits of the result into dst
1339                     mov(64, dst, rscratch1);
1340                     mov(rscratch1, 0x80000000);
1341                     // Develop 0 (EQ), or 0x80000000 (NE)
1342                     cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
1343                     cmp(32, rscratch1, 1);
1344                     // 0x80000000 - 1 => VS
1345                     break;
1346                 }
1347                 case 32: {
1348                     Register rscratch1 = sc1.getRegister();
1349                     smaddl(rscratch1, x, y, zr);
1350                     // Copy the low 32 bits of the result into dst
1351                     mov(32, dst, rscratch1);
1352                     subs(64, zr, rscratch1, rscratch1, ExtendType.SXTW, 0);
1353                     // NE => overflow
1354                     mov(rscratch1, 0x80000000);
1355                     // Develop 0 (EQ), or 0x80000000 (NE)
1356                     cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
1357                     cmp(32, rscratch1, 1);
1358                     // 0x80000000 - 1 => VS
1359                     break;
1360                 }
1361             }
1362         }
1363     }
1364 
1365     /**
1366      * When patching up Labels we have to know what kind of code to generate.
1367      */
1368     public enum PatchLabelKind {
1369         BRANCH_CONDITIONALLY(0x0),
1370         BRANCH_UNCONDITIONALLY(0x1),
1371         BRANCH_NONZERO(0x2),
1372         BRANCH_ZERO(0x3),
1373         BRANCH_BIT_NONZERO(0x4),
1374         BRANCH_BIT_ZERO(0x5),
1375         JUMP_ADDRESS(0x6),
1376         ADR(0x7);
1377 
1378         /**
1379          * Offset by which additional information for branch conditionally, branch zero and branch
1380          * non zero has to be shifted.
1381          */
1382         public static final int INFORMATION_OFFSET = 5;
1383 
1384         public final int encoding;
1385 
1386         PatchLabelKind(int encoding) {
1387             this.encoding = encoding;
1388         }
1389 
1390         /**
1391          * @return PatchLabelKind with given encoding.
1392          */
1393         private static PatchLabelKind fromEncoding(int encoding) {
1394             return values()[encoding & NumUtil.getNbitNumberInt(INFORMATION_OFFSET)];
1395         }
1396 
1397     }
1398 
1399     public void adr(Register dst, Label label) {
1400         // TODO Handle case where offset is too large for a single jump instruction
1401         if (label.isBound()) {
1402             int offset = label.position() - position();
1403             super.adr(dst, offset);
1404         } else {
1405             label.addPatchAt(position());
1406             // Encode condition flag so that we know how to patch the instruction later
1407             emitInt(PatchLabelKind.ADR.encoding | dst.encoding << PatchLabelKind.INFORMATION_OFFSET);
1408         }
1409     }
1410 
1411     /**
1412      * Compare register and branch if non-zero.
1413      *
1414      * @param size Instruction size in bits. Should be either 32 or 64.
1415      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1416      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1417      */
1418     public void cbnz(int size, Register cmp, Label label) {
1419         // TODO Handle case where offset is too large for a single jump instruction
1420         if (label.isBound()) {
1421             int offset = label.position() - position();
1422             super.cbnz(size, cmp, offset);
1423         } else {
1424             label.addPatchAt(position());
1425             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1426             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1427             // Encode condition flag so that we know how to patch the instruction later
1428             emitInt(PatchLabelKind.BRANCH_NONZERO.encoding | regEncoding | sizeEncoding);
1429         }
1430     }
1431 
1432     /**
1433      * Compare register and branch if zero.
1434      *
1435      * @param size Instruction size in bits. Should be either 32 or 64.
1436      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1437      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1438      */
1439     public void cbz(int size, Register cmp, Label label) {
1440         // TODO Handle case where offset is too large for a single jump instruction
1441         if (label.isBound()) {
1442             int offset = label.position() - position();
1443             super.cbz(size, cmp, offset);
1444         } else {
1445             label.addPatchAt(position());
1446             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1447             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1448             // Encode condition flag so that we know how to patch the instruction later
1449             emitInt(PatchLabelKind.BRANCH_ZERO.encoding | regEncoding | sizeEncoding);
1450         }
1451     }
1452 
1453     /**
1454      * Test a single bit and branch if the bit is nonzero.
1455      *
1456      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1457      * @param uimm6 Unsigned 6-bit bit index.
1458      * @param label Can only handle 16-bit word-aligned offsets for now. May be unbound. Non null.
1459      */
1460     public void tbnz(Register cmp, int uimm6, Label label) {
1461         assert NumUtil.isUnsignedNbit(6, uimm6);
1462         if (label.isBound()) {
1463             int offset = label.position() - position();
1464             super.tbnz(cmp, uimm6, offset);
1465         } else {
1466             label.addPatchAt(position());
1467             int indexEncoding = uimm6 << PatchLabelKind.INFORMATION_OFFSET;
1468             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 6);
1469             emitInt(PatchLabelKind.BRANCH_BIT_NONZERO.encoding | indexEncoding | regEncoding);
1470         }
1471     }
1472 
1473     /**
1474      * Test a single bit and branch if the bit is zero.
1475      *
1476      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1477      * @param uimm6 Unsigned 6-bit bit index.
1478      * @param label Can only handle 16-bit word-aligned offsets for now. May be unbound. Non null.
1479      */
1480     public void tbz(Register cmp, int uimm6, Label label) {
1481         assert NumUtil.isUnsignedNbit(6, uimm6);
1482         if (label.isBound()) {
1483             int offset = label.position() - position();
1484             super.tbz(cmp, uimm6, offset);
1485         } else {
1486             label.addPatchAt(position());
1487             int indexEncoding = uimm6 << PatchLabelKind.INFORMATION_OFFSET;
1488             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 6);
1489             emitInt(PatchLabelKind.BRANCH_BIT_ZERO.encoding | indexEncoding | regEncoding);
1490         }
1491     }
1492 
1493     /**
1494      * Branches to label if condition is true.
1495      *
1496      * @param condition any condition value allowed. Non null.
1497      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1498      */
1499     public void branchConditionally(ConditionFlag condition, Label label) {
1500         // TODO Handle case where offset is too large for a single jump instruction
1501         if (label.isBound()) {
1502             int offset = label.position() - position();
1503             super.b(condition, offset);
1504         } else {
1505             label.addPatchAt(position());
1506             // Encode condition flag so that we know how to patch the instruction later
1507             emitInt(PatchLabelKind.BRANCH_CONDITIONALLY.encoding | condition.encoding << PatchLabelKind.INFORMATION_OFFSET);
1508         }
1509     }
1510 
1511     /**
1512      * Branches if condition is true. Address of jump is patched up by HotSpot c++ code.
1513      *
1514      * @param condition any condition value allowed. Non null.
1515      */
1516     public void branchConditionally(ConditionFlag condition) {
1517         // Correct offset is fixed up by HotSpot later.
1518         super.b(condition, 0);
1519     }
1520 
1521     /**
1522      * Jumps to label.
1523      *
1524      * param label Can only handle signed 28-bit offsets. May be unbound. Non null.
1525      */
1526     @Override
1527     public void jmp(Label label) {
1528         // TODO Handle case where offset is too large for a single jump instruction
1529         if (label.isBound()) {
1530             int offset = label.position() - position();
1531             super.b(offset);
1532         } else {
1533             label.addPatchAt(position());
1534             emitInt(PatchLabelKind.BRANCH_UNCONDITIONALLY.encoding);
1535         }
1536     }
1537 
1538     /**
1539      * Jump to address in dest.
1540      *
1541      * @param dest General purpose register. May not be null, zero-register or stackpointer.
1542      */
1543     public void jmp(Register dest) {
1544         super.br(dest);
1545     }
1546 
1547     /**
1548      * Immediate jump instruction fixed up by HotSpot c++ code.
1549      */
1550     public void jmp() {
1551         // Offset has to be fixed up by c++ code.
1552         super.b(0);
1553     }
1554 
1555     /**
1556      *
1557      * @return true if immediate offset can be used in a single branch instruction.
1558      */
1559     public static boolean isBranchImmediateOffset(long imm) {
1560         return NumUtil.isSignedNbit(28, imm);
1561     }
1562 
1563     /* system instructions */
1564 
1565     /**
1566      * Exception codes used when calling hlt instruction.
1567      */
1568     public enum AArch64ExceptionCode {
1569         NO_SWITCH_TARGET(0x0),
1570         BREAKPOINT(0x1);
1571 
1572         public final int encoding;
1573 
1574         AArch64ExceptionCode(int encoding) {
1575             this.encoding = encoding;
1576         }
1577     }
1578 
1579     /**
1580      * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
1581      * UNALLOCATED instruction.
1582      *
1583      * @param exceptionCode exception code specifying why halt was called. Non null.
1584      */
1585     public void hlt(AArch64ExceptionCode exceptionCode) {
1586         super.hlt(exceptionCode.encoding);
1587     }
1588 
1589     /**
1590      * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
1591      * exception level.
1592      *
1593      * @param exceptionCode exception code specifying why break was called. Non null.
1594      */
1595     public void brk(AArch64ExceptionCode exceptionCode) {
1596         super.brk(exceptionCode.encoding);
1597     }
1598 
1599     public void pause() {
1600         throw GraalError.unimplemented();
1601     }
1602 
1603     /**
1604      * Executes no-op instruction. No registers or flags are updated, except for PC.
1605      */
1606     public void nop() {
1607         super.hint(SystemHint.NOP);
1608     }
1609 
1610     /**
1611      * Consumption of Speculative Data Barrier. This is a memory barrier that controls speculative
1612      * execution and data value prediction.
1613      */
1614     public void csdb() {
1615         super.hint(SystemHint.CSDB);
1616     }
1617 
1618     /**
1619      * Same as {@link #nop()}.
1620      */
1621     @Override
1622     public void ensureUniquePC() {
1623         nop();
1624     }
1625 
1626     /**
1627      * Aligns PC.
1628      *
1629      * @param modulus Has to be positive multiple of 4.
1630      */
1631     @Override
1632     public void align(int modulus) {
1633         assert modulus > 0 && (modulus & 0x3) == 0 : "Modulus has to be a positive multiple of 4.";
1634         if (position() % modulus == 0) {
1635             return;
1636         }
1637         int offset = modulus - position() % modulus;
1638         for (int i = 0; i < offset; i += 4) {
1639             nop();
1640         }
1641     }
1642 
1643     /**
1644      * Patches jump targets when label gets bound.
1645      */
1646     @Override
1647     protected void patchJumpTarget(int branch, int jumpTarget) {
1648         int instruction = getInt(branch);
1649         int branchOffset = jumpTarget - branch;
1650         PatchLabelKind type = PatchLabelKind.fromEncoding(instruction);
1651         switch (type) {
1652             case BRANCH_CONDITIONALLY:
1653                 ConditionFlag cf = ConditionFlag.fromEncoding(instruction >>> PatchLabelKind.INFORMATION_OFFSET);
1654                 super.b(cf, branchOffset, branch);
1655                 break;
1656             case BRANCH_UNCONDITIONALLY:
1657                 super.b(branchOffset, branch);
1658                 break;
1659             case JUMP_ADDRESS:
1660                 int offset = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1661                 emitInt(jumpTarget - offset, branch);
1662                 break;
1663             case BRANCH_NONZERO:
1664             case BRANCH_ZERO: {
1665                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1666                 int sizeEncoding = information & 1;
1667                 int regEncoding = information >>> 1;
1668                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1669                 // 1 => 64; 0 => 32
1670                 int size = sizeEncoding * 32 + 32;
1671                 switch (type) {
1672                     case BRANCH_NONZERO:
1673                         super.cbnz(size, reg, branchOffset, branch);
1674                         break;
1675                     case BRANCH_ZERO:
1676                         super.cbz(size, reg, branchOffset, branch);
1677                         break;
1678                 }
1679                 break;
1680             }
1681             case BRANCH_BIT_NONZERO:
1682             case BRANCH_BIT_ZERO: {
1683                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1684                 int sizeEncoding = information & NumUtil.getNbitNumberInt(6);
1685                 int regEncoding = information >>> 6;
1686                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1687                 if (!NumUtil.isSignedNbit(16, branchOffset)) {
1688                     throw new BranchTargetOutOfBoundsException(true, "Branch target %d out of bounds", branchOffset);
1689                 }
1690                 switch (type) {
1691                     case BRANCH_BIT_NONZERO:
1692                         super.tbnz(reg, sizeEncoding, branchOffset, branch);
1693                         break;
1694                     case BRANCH_BIT_ZERO:
1695                         super.tbz(reg, sizeEncoding, branchOffset, branch);
1696                         break;
1697                 }
1698                 break;
1699             }
1700             case ADR: {
1701                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1702                 int regEncoding = information;
1703                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1704                 super.adr(reg, branchOffset, branch);
1705                 break;
1706             }
1707             default:
1708                 throw GraalError.shouldNotReachHere();
1709         }
1710     }
1711 
1712     /**
1713      * Generates an address of the form {@code base + displacement}.
1714      *
1715      * Does not change base register to fulfill this requirement. Will fail if displacement cannot
1716      * be represented directly as address.
1717      *
1718      * @param base general purpose register. May not be null or the zero register.
1719      * @param displacement arbitrary displacement added to base.
1720      * @return AArch64Address referencing memory at {@code base + displacement}.
1721      */
1722     @Override
1723     public AArch64Address makeAddress(Register base, int displacement) {
1724         return makeAddress(base, displacement, zr, /* signExtend */false, /* transferSize */0, zr, /* allowOverwrite */false);
1725     }
1726 
1727     @Override
1728     public AArch64Address getPlaceholder(int instructionStartPosition) {
1729         return AArch64Address.PLACEHOLDER;
1730     }
1731 
1732     public void addressOf(Register dst) {
1733         // This will be fixed up later.
1734         super.adrp(dst);
1735         super.add(64, dst, dst, 0);
1736     }
1737 
1738     /**
1739      * Loads an address into Register d.
1740      *
1741      * @param d general purpose register. May not be null.
1742      * @param a AArch64Address the address of an operand.
1743      */
1744     public void lea(Register d, AArch64Address a) {
1745         a.lea(this, d);
1746     }
1747 }