1 /*
   2  * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 
  26 package org.graalvm.compiler.asm.aarch64;
  27 
  28 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.BASE_REGISTER_ONLY;
  29 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.EXTENDED_REGISTER_OFFSET;
  30 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SCALED;
  31 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_UNSCALED;
  32 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.REGISTER_OFFSET;
  33 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_BASE;
  34 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_INDEX;
  35 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.NO_WORK;
  36 import static jdk.vm.ci.aarch64.AArch64.CPU;
  37 import static jdk.vm.ci.aarch64.AArch64.r8;
  38 import static jdk.vm.ci.aarch64.AArch64.r9;
  39 import static jdk.vm.ci.aarch64.AArch64.sp;
  40 import static jdk.vm.ci.aarch64.AArch64.zr;
  41 
  42 import org.graalvm.compiler.asm.Label;
  43 import org.graalvm.compiler.core.common.NumUtil;
  44 import org.graalvm.compiler.debug.GraalError;
  45 
  46 import jdk.vm.ci.aarch64.AArch64;
  47 import jdk.vm.ci.code.Register;
  48 import jdk.vm.ci.code.TargetDescription;
  49 
  50 public class AArch64MacroAssembler extends AArch64Assembler {
  51 
  52     private final ScratchRegister[] scratchRegister = new ScratchRegister[]{new ScratchRegister(r8), new ScratchRegister(r9)};
  53 
  54     // Points to the next free scratch register
  55     private int nextFreeScratchRegister = 0;
  56 
  57     public AArch64MacroAssembler(TargetDescription target) {
  58         super(target);
  59     }
  60 
  61     public class ScratchRegister implements AutoCloseable {
  62         private final Register register;
  63 
  64         public ScratchRegister(Register register) {
  65             this.register = register;
  66         }
  67 
  68         public Register getRegister() {
  69             return register;
  70         }
  71 
  72         @Override
  73         public void close() {
  74             assert nextFreeScratchRegister > 0 : "Close called too often";
  75             nextFreeScratchRegister--;
  76         }
  77     }
  78 
  79     public ScratchRegister getScratchRegister() {
  80         return scratchRegister[nextFreeScratchRegister++];
  81     }
  82 
  83     /**
  84      * Specifies what actions have to be taken to turn an arbitrary address of the form
  85      * {@code base + displacement [+ index [<< scale]]} into a valid AArch64Address.
  86      */
  87     public static class AddressGenerationPlan {
  88         public final WorkPlan workPlan;
  89         public final AArch64Address.AddressingMode addressingMode;
  90         public final boolean needsScratch;
  91 
  92         public enum WorkPlan {
  93             /**
  94              * Can be used as-is without extra work.
  95              */
  96             NO_WORK,
  97             /**
  98              * Add scaled displacement to index register.
  99              */
 100             ADD_TO_INDEX,
 101             /**
 102              * Add unscaled displacement to base register.
 103              */
 104             ADD_TO_BASE,
 105         }
 106 
 107         /**
 108          * @param workPlan Work necessary to generate a valid address.
 109          * @param addressingMode Addressing mode of generated address.
 110          * @param needsScratch True if generating address needs a scatch register, false otherwise.
 111          */
 112         public AddressGenerationPlan(WorkPlan workPlan, AArch64Address.AddressingMode addressingMode, boolean needsScratch) {
 113             this.workPlan = workPlan;
 114             this.addressingMode = addressingMode;
 115             this.needsScratch = needsScratch;
 116         }
 117     }
 118 
 119     /**
 120      * Generates an addressplan for an address of the form
 121      * {@code base + displacement [+ index [<< log2(transferSize)]]} with the index register and
 122      * scaling being optional.
 123      *
 124      * @param displacement an arbitrary displacement.
 125      * @param hasIndexRegister true if the address uses an index register, false otherwise. non null
 126      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 127      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 128      * @return AddressGenerationPlan that specifies the actions necessary to generate a valid
 129      *         AArch64Address for the given parameters.
 130      */
 131     public static AddressGenerationPlan generateAddressPlan(long displacement, boolean hasIndexRegister, int transferSize) {
 132         assert transferSize == 0 || transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 133         boolean indexScaled = transferSize != 0;
 134         int log2Scale = NumUtil.log2Ceil(transferSize);
 135         long scaledDisplacement = displacement >> log2Scale;
 136         boolean displacementScalable = indexScaled && (displacement & (transferSize - 1)) == 0;
 137         if (displacement == 0) {
 138             // register offset without any work beforehand.
 139             return new AddressGenerationPlan(NO_WORK, REGISTER_OFFSET, false);
 140         } else {
 141             if (hasIndexRegister) {
 142                 if (displacementScalable) {
 143                     boolean needsScratch = !isArithmeticImmediate(scaledDisplacement);
 144                     return new AddressGenerationPlan(ADD_TO_INDEX, REGISTER_OFFSET, needsScratch);
 145                 } else {
 146                     boolean needsScratch = !isArithmeticImmediate(displacement);
 147                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 148                 }
 149             } else {
 150                 if (displacementScalable && NumUtil.isUnsignedNbit(12, scaledDisplacement)) {
 151                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_SCALED, false);
 152                 } else if (NumUtil.isSignedNbit(9, displacement)) {
 153                     return new AddressGenerationPlan(NO_WORK, IMMEDIATE_UNSCALED, false);
 154                 } else {
 155                     boolean needsScratch = !isArithmeticImmediate(displacement);
 156                     return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
 157                 }
 158             }
 159         }
 160     }
 161 
 162     /**
 163      * Returns an AArch64Address pointing to
 164      * {@code base + displacement + index << log2(transferSize)}.
 165      *
 166      * @param base general purpose register. May not be null or the zero register.
 167      * @param displacement arbitrary displacement added to base.
 168      * @param index general purpose register. May not be null or the stack pointer.
 169      * @param signExtendIndex if true consider index register a word register that should be
 170      *            sign-extended before being added.
 171      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 172      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 173      * @param additionalReg additional register used either as a scratch register or as part of the
 174      *            final address, depending on whether allowOverwrite is true or not. May not be null
 175      *            or stackpointer.
 176      * @param allowOverwrite if true allows to change value of base or index register to generate
 177      *            address.
 178      * @return AArch64Address pointing to memory at
 179      *         {@code base + displacement + index << log2(transferSize)}.
 180      */
 181     public AArch64Address makeAddress(Register base, long displacement, Register index, boolean signExtendIndex, int transferSize, Register additionalReg, boolean allowOverwrite) {
 182         AddressGenerationPlan plan = generateAddressPlan(displacement, !index.equals(zr), transferSize);
 183         assert allowOverwrite || !zr.equals(additionalReg) || plan.workPlan == NO_WORK;
 184         assert !plan.needsScratch || !zr.equals(additionalReg);
 185         int log2Scale = NumUtil.log2Ceil(transferSize);
 186         long scaledDisplacement = displacement >> log2Scale;
 187         Register newIndex = index;
 188         Register newBase = base;
 189         int immediate;
 190         switch (plan.workPlan) {
 191             case NO_WORK:
 192                 if (plan.addressingMode == IMMEDIATE_SCALED) {
 193                     immediate = (int) scaledDisplacement;
 194                 } else {
 195                     immediate = (int) displacement;
 196                 }
 197                 break;
 198             case ADD_TO_INDEX:
 199                 newIndex = allowOverwrite ? index : additionalReg;
 200                 assert !newIndex.equals(sp) && !newIndex.equals(zr);
 201                 if (plan.needsScratch) {
 202                     mov(additionalReg, scaledDisplacement);
 203                     add(signExtendIndex ? 32 : 64, newIndex, index, additionalReg);
 204                 } else {
 205                     add(signExtendIndex ? 32 : 64, newIndex, index, (int) scaledDisplacement);
 206                 }
 207                 immediate = 0;
 208                 break;
 209             case ADD_TO_BASE:
 210                 newBase = allowOverwrite ? base : additionalReg;
 211                 assert !newBase.equals(sp) && !newBase.equals(zr);
 212                 if (plan.needsScratch) {
 213                     mov(additionalReg, displacement);
 214                     add(64, newBase, base, additionalReg);
 215                 } else {
 216                     add(64, newBase, base, (int) displacement);
 217                 }
 218                 immediate = 0;
 219                 break;
 220             default:
 221                 throw GraalError.shouldNotReachHere();
 222         }
 223         AArch64Address.AddressingMode addressingMode = plan.addressingMode;
 224         ExtendType extendType = null;
 225         if (addressingMode == REGISTER_OFFSET) {
 226             if (newIndex.equals(zr)) {
 227                 addressingMode = BASE_REGISTER_ONLY;
 228             } else if (signExtendIndex) {
 229                 addressingMode = EXTENDED_REGISTER_OFFSET;
 230                 extendType = ExtendType.SXTW;
 231             }
 232         }
 233         return AArch64Address.createAddress(addressingMode, newBase, newIndex, immediate, transferSize != 0, extendType);
 234     }
 235 
 236     /**
 237      * Returns an AArch64Address pointing to {@code base + displacement}. Specifies the memory
 238      * transfer size to allow some optimizations when building the address.
 239      *
 240      * @param base general purpose register. May not be null or the zero register.
 241      * @param displacement arbitrary displacement added to base.
 242      * @param transferSize the memory transfer size in bytes.
 243      * @param additionalReg additional register used either as a scratch register or as part of the
 244      *            final address, depending on whether allowOverwrite is true or not. May not be
 245      *            null, zero register or stackpointer.
 246      * @param allowOverwrite if true allows to change value of base or index register to generate
 247      *            address.
 248      * @return AArch64Address pointing to memory at {@code base + displacement}.
 249      */
 250     public AArch64Address makeAddress(Register base, long displacement, Register additionalReg, int transferSize, boolean allowOverwrite) {
 251         assert additionalReg.getRegisterCategory().equals(CPU);
 252         return makeAddress(base, displacement, zr, /* sign-extend */false, transferSize, additionalReg, allowOverwrite);
 253     }
 254 
 255     /**
 256      * Returns an AArch64Address pointing to {@code base + displacement}. Fails if address cannot be
 257      * represented without overwriting base register or using a scratch register.
 258      *
 259      * @param base general purpose register. May not be null or the zero register.
 260      * @param displacement arbitrary displacement added to base.
 261      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 262      *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
 263      * @return AArch64Address pointing to memory at {@code base + displacement}.
 264      */
 265     public AArch64Address makeAddress(Register base, long displacement, int transferSize) {
 266         return makeAddress(base, displacement, zr, /* signExtend */false, transferSize, zr, /* allowOverwrite */false);
 267     }
 268 
 269     /**
 270      * Loads memory address into register.
 271      *
 272      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 273      * @param address address whose value is loaded into dst. May not be null,
 274      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_POST_INDEXED
 275      *            POST_INDEXED} or
 276      *            {@link org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode#IMMEDIATE_PRE_INDEXED
 277      *            IMMEDIATE_PRE_INDEXED}
 278      * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
 279      *            the index register is scaled. Can be 1, 2, 4 or 8.
 280      */
 281     public void loadAddress(Register dst, AArch64Address address, int transferSize) {
 282         assert transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
 283         assert dst.getRegisterCategory().equals(CPU);
 284         int shiftAmt = NumUtil.log2Ceil(transferSize);
 285         switch (address.getAddressingMode()) {
 286             case IMMEDIATE_SCALED:
 287                 int scaledImmediate = address.getImmediateRaw() << shiftAmt;
 288                 int lowerBits = scaledImmediate & NumUtil.getNbitNumberInt(12);
 289                 int higherBits = scaledImmediate & ~NumUtil.getNbitNumberInt(12);
 290                 boolean firstAdd = true;
 291                 if (lowerBits != 0) {
 292                     add(64, dst, address.getBase(), lowerBits);
 293                     firstAdd = false;
 294                 }
 295                 if (higherBits != 0) {
 296                     Register src = firstAdd ? address.getBase() : dst;
 297                     add(64, dst, src, higherBits);
 298                 }
 299                 break;
 300             case IMMEDIATE_UNSCALED:
 301                 int immediate = address.getImmediateRaw();
 302                 add(64, dst, address.getBase(), immediate);
 303                 break;
 304             case REGISTER_OFFSET:
 305                 add(64, dst, address.getBase(), address.getOffset(), ShiftType.LSL, address.isScaled() ? shiftAmt : 0);
 306                 break;
 307             case EXTENDED_REGISTER_OFFSET:
 308                 add(64, dst, address.getBase(), address.getOffset(), address.getExtendType(), address.isScaled() ? shiftAmt : 0);
 309                 break;
 310             case PC_LITERAL: {
 311                 addressOf(dst);
 312                 break;
 313             }
 314             case BASE_REGISTER_ONLY:
 315                 movx(dst, address.getBase());
 316                 break;
 317             default:
 318                 throw GraalError.shouldNotReachHere();
 319         }
 320     }
 321 
 322     public void movx(Register dst, Register src) {
 323         mov(64, dst, src);
 324     }
 325 
 326     public void mov(int size, Register dst, Register src) {
 327         if (dst.equals(sp) || src.equals(sp)) {
 328             add(size, dst, src, 0);
 329         } else {
 330             or(size, dst, zr, src);
 331         }
 332     }
 333 
 334     /**
 335      * Generates a 64-bit immediate move code sequence.
 336      *
 337      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 338      * @param imm
 339      */
 340     private void mov64(Register dst, long imm) {
 341         // We have to move all non zero parts of the immediate in 16-bit chunks
 342         boolean firstMove = true;
 343         for (int offset = 0; offset < 64; offset += 16) {
 344             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 345             if (chunk == 0) {
 346                 continue;
 347             }
 348             if (firstMove) {
 349                 movz(64, dst, chunk, offset);
 350                 firstMove = false;
 351             } else {
 352                 movk(64, dst, chunk, offset);
 353             }
 354         }
 355         assert !firstMove;
 356     }
 357 
 358     /**
 359      * Loads immediate into register.
 360      *
 361      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 362      * @param imm immediate loaded into register.
 363      */
 364     public void mov(Register dst, long imm) {
 365         assert dst.getRegisterCategory().equals(CPU);
 366         if (imm == 0L) {
 367             movx(dst, zr);
 368         } else if (LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 369             or(64, dst, zr, imm);
 370         } else if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 371             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 372             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 373             // then sign extend
 374             // them. This allows us to cover immediates like ~1L with 2 instructions.
 375             mov(dst, (int) imm);
 376             sxt(64, 32, dst, dst);
 377         } else {
 378             mov64(dst, imm);
 379         }
 380     }
 381 
 382     /**
 383      * Loads immediate into register.
 384      *
 385      * @param dst general purpose register. May not be null, zero-register or stackpointer.
 386      * @param imm immediate loaded into register.
 387      */
 388     public void mov(Register dst, int imm) {
 389         mov(dst, imm & 0xFFFF_FFFFL);
 390     }
 391 
 392     /**
 393      * Generates a 48-bit immediate move code sequence. The immediate may later be updated by
 394      * HotSpot.
 395      *
 396      * In AArch64 mode the virtual address space is 48-bits in size, so we only need three
 397      * instructions to create a patchable instruction sequence that can reach anywhere.
 398      *
 399      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 400      * @param imm
 401      */
 402     public void movNativeAddress(Register dst, long imm) {
 403         assert (imm & 0xFFFF_0000_0000_0000L) == 0;
 404         // We have to move all non zero parts of the immediate in 16-bit chunks
 405         boolean firstMove = true;
 406         for (int offset = 0; offset < 48; offset += 16) {
 407             int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 408             if (firstMove) {
 409                 movz(64, dst, chunk, offset);
 410                 firstMove = false;
 411             } else {
 412                 movk(64, dst, chunk, offset);
 413             }
 414         }
 415         assert !firstMove;
 416     }
 417 
 418     /**
 419      * Generates a 32-bit immediate move code sequence. The immediate may later be updated by
 420      * HotSpot.
 421      *
 422      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 423      * @param imm
 424      */
 425     public void movNarrowAddress(Register dst, long imm) {
 426         assert (imm & 0xFFFF_FFFF_0000_0000L) == 0;
 427         movz(64, dst, (int) (imm >>> 16), 16);
 428         movk(64, dst, (int) (imm & 0xffff), 0);
 429     }
 430 
 431     /**
 432      * @return Number of instructions necessary to load immediate into register.
 433      */
 434     public static int nrInstructionsToMoveImmediate(long imm) {
 435         if (imm == 0L || LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
 436             return 1;
 437         }
 438         if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
 439             // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
 440             // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
 441             // then sign extend
 442             // them. This allows us to cover immediates like ~1L with 2 instructions.
 443             return 2;
 444         }
 445         int nrInstructions = 0;
 446         for (int offset = 0; offset < 64; offset += 16) {
 447             int part = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
 448             if (part != 0) {
 449                 nrInstructions++;
 450             }
 451         }
 452         return nrInstructions;
 453     }
 454 
 455     /**
 456      * Loads a srcSize value from address into rt sign-extending it if necessary.
 457      *
 458      * @param targetSize size of target register in bits. Must be 32 or 64.
 459      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 460      *            targetSize.
 461      * @param rt general purpose register. May not be null or stackpointer.
 462      * @param address all addressing modes allowed. May not be null.
 463      */
 464     @Override
 465     public void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
 466         assert targetSize == 32 || targetSize == 64;
 467         assert srcSize <= targetSize;
 468         if (targetSize == srcSize) {
 469             super.ldr(srcSize, rt, address);
 470         } else {
 471             super.ldrs(targetSize, srcSize, rt, address);
 472         }
 473     }
 474 
 475     /**
 476      * Loads a srcSize value from address into rt zero-extending it if necessary.
 477      *
 478      * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
 479      *            targetSize.
 480      * @param rt general purpose register. May not be null or stackpointer.
 481      * @param address all addressing modes allowed. May not be null.
 482      */
 483     @Override
 484     public void ldr(int srcSize, Register rt, AArch64Address address) {
 485         super.ldr(srcSize, rt, address);
 486     }
 487 
 488     /**
 489      * Conditional move. dst = src1 if condition else src2.
 490      *
 491      * @param size register size. Has to be 32 or 64.
 492      * @param result general purpose register. May not be null or the stackpointer.
 493      * @param trueValue general purpose register. May not be null or the stackpointer.
 494      * @param falseValue general purpose register. May not be null or the stackpointer.
 495      * @param cond any condition flag. May not be null.
 496      */
 497     public void cmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag cond) {
 498         super.csel(size, result, trueValue, falseValue, cond);
 499     }
 500 
 501     /**
 502      * Conditional set. dst = 1 if condition else 0.
 503      *
 504      * @param dst general purpose register. May not be null or stackpointer.
 505      * @param condition any condition. May not be null.
 506      */
 507     public void cset(Register dst, ConditionFlag condition) {
 508         super.csinc(32, dst, zr, zr, condition.negate());
 509     }
 510 
 511     /**
 512      * dst = src1 + src2.
 513      *
 514      * @param size register size. Has to be 32 or 64.
 515      * @param dst general purpose register. May not be null.
 516      * @param src1 general purpose register. May not be null.
 517      * @param src2 general purpose register. May not be null or stackpointer.
 518      */
 519     public void add(int size, Register dst, Register src1, Register src2) {
 520         if (dst.equals(sp) || src1.equals(sp)) {
 521             super.add(size, dst, src1, src2, ExtendType.UXTX, 0);
 522         } else {
 523             super.add(size, dst, src1, src2, ShiftType.LSL, 0);
 524         }
 525     }
 526 
 527     /**
 528      * dst = src1 + src2 and sets condition flags.
 529      *
 530      * @param size register size. Has to be 32 or 64.
 531      * @param dst general purpose register. May not be null.
 532      * @param src1 general purpose register. May not be null.
 533      * @param src2 general purpose register. May not be null or stackpointer.
 534      */
 535     public void adds(int size, Register dst, Register src1, Register src2) {
 536         if (dst.equals(sp) || src1.equals(sp)) {
 537             super.adds(size, dst, src1, src2, ExtendType.UXTX, 0);
 538         } else {
 539             super.adds(size, dst, src1, src2, ShiftType.LSL, 0);
 540         }
 541     }
 542 
 543     /**
 544      * dst = src1 - src2 and sets condition flags.
 545      *
 546      * @param size register size. Has to be 32 or 64.
 547      * @param dst general purpose register. May not be null.
 548      * @param src1 general purpose register. May not be null.
 549      * @param src2 general purpose register. May not be null or stackpointer.
 550      */
 551     public void subs(int size, Register dst, Register src1, Register src2) {
 552         if (dst.equals(sp) || src1.equals(sp)) {
 553             super.subs(size, dst, src1, src2, ExtendType.UXTX, 0);
 554         } else {
 555             super.subs(size, dst, src1, src2, ShiftType.LSL, 0);
 556         }
 557     }
 558 
 559     /**
 560      * dst = src1 - src2.
 561      *
 562      * @param size register size. Has to be 32 or 64.
 563      * @param dst general purpose register. May not be null.
 564      * @param src1 general purpose register. May not be null.
 565      * @param src2 general purpose register. May not be null or stackpointer.
 566      */
 567     public void sub(int size, Register dst, Register src1, Register src2) {
 568         if (dst.equals(sp) || src1.equals(sp)) {
 569             super.sub(size, dst, src1, src2, ExtendType.UXTX, 0);
 570         } else {
 571             super.sub(size, dst, src1, src2, ShiftType.LSL, 0);
 572         }
 573     }
 574 
 575     /**
 576      * dst = src1 + shiftType(src2, shiftAmt & (size - 1)).
 577      *
 578      * @param size register size. Has to be 32 or 64.
 579      * @param dst general purpose register. May not be null or stackpointer.
 580      * @param src1 general purpose register. May not be null or stackpointer.
 581      * @param src2 general purpose register. May not be null or stackpointer.
 582      * @param shiftType any type but ROR.
 583      * @param shiftAmt arbitrary shift amount.
 584      */
 585     @Override
 586     public void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 587         int shift = clampShiftAmt(size, shiftAmt);
 588         super.add(size, dst, src1, src2, shiftType, shift);
 589     }
 590 
 591     /**
 592      * dst = src1 + shiftType(src2, shiftAmt & (size-1)) and sets condition flags.
 593      *
 594      * @param size register size. Has to be 32 or 64.
 595      * @param dst general purpose register. May not be null or stackpointer.
 596      * @param src1 general purpose register. May not be null or stackpointer.
 597      * @param src2 general purpose register. May not be null or stackpointer.
 598      * @param shiftType any type but ROR.
 599      * @param shiftAmt arbitrary shift amount.
 600      */
 601     @Override
 602     public void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
 603         int shift = clampShiftAmt(size, shiftAmt);
 604         super.sub(size, dst, src1, src2, shiftType, shift);
 605     }
 606 
 607     /**
 608      * dst = -src1.
 609      *
 610      * @param size register size. Has to be 32 or 64.
 611      * @param dst general purpose register. May not be null or stackpointer.
 612      * @param src general purpose register. May not be null or stackpointer.
 613      */
 614     public void neg(int size, Register dst, Register src) {
 615         sub(size, dst, zr, src);
 616     }
 617 
 618     /**
 619      * dst = src + immediate.
 620      *
 621      * @param size register size. Has to be 32 or 64.
 622      * @param dst general purpose register. May not be null or zero-register.
 623      * @param src general purpose register. May not be null or zero-register.
 624      * @param immediate 32-bit signed int
 625      */
 626     @Override
 627     public void add(int size, Register dst, Register src, int immediate) {
 628         assert (!dst.equals(zr) && !src.equals(zr));
 629         if (immediate < 0) {
 630             sub(size, dst, src, -immediate);
 631         } else if (isAimm(immediate)) {
 632             if (!(dst.equals(src) && immediate == 0)) {
 633                 super.add(size, dst, src, immediate);
 634             }
 635         } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
 636             super.add(size, dst, src, immediate & -(1 << 12));
 637             super.add(size, dst, dst, immediate & ((1 << 12) - 1));
 638         } else {
 639             assert !dst.equals(src);
 640             mov(dst, immediate);
 641             add(size, src, dst, dst);
 642         }
 643     }
 644 
 645     /**
 646      * dst = src + immediate.
 647      *
 648      * @param size register size. Has to be 32 or 64.
 649      * @param dst general purpose register. May not be null or zero-register.
 650      * @param src general purpose register. May not be null or zero-register.
 651      * @param immediate 64-bit signed int
 652      */
 653     public void add(int size, Register dst, Register src, long immediate) {
 654         if (NumUtil.isInt(immediate)) {
 655             add(size, dst, src, (int) immediate);
 656         } else {
 657             assert (!dst.equals(zr) && !src.equals(zr));
 658             assert !dst.equals(src);
 659             assert size == 64;
 660             mov(dst, immediate);
 661             add(size, src, dst, dst);
 662         }
 663     }
 664 
 665     /**
 666      * dst = src + aimm and sets condition flags.
 667      *
 668      * @param size register size. Has to be 32 or 64.
 669      * @param dst general purpose register. May not be null or stackpointer.
 670      * @param src general purpose register. May not be null or zero-register.
 671      * @param immediate arithmetic immediate.
 672      */
 673     @Override
 674     public void adds(int size, Register dst, Register src, int immediate) {
 675         assert (!dst.equals(sp) && !src.equals(zr));
 676         if (immediate < 0) {
 677             subs(size, dst, src, -immediate);
 678         } else if (!(dst.equals(src) && immediate == 0)) {
 679             super.adds(size, dst, src, immediate);
 680         }
 681     }
 682 
 683     /**
 684      * dst = src - immediate.
 685      *
 686      * @param size register size. Has to be 32 or 64.
 687      * @param dst general purpose register. May not be null or zero-register.
 688      * @param src general purpose register. May not be null or zero-register.
 689      * @param immediate 32-bit signed int
 690      */
 691     @Override
 692     public void sub(int size, Register dst, Register src, int immediate) {
 693         assert (!dst.equals(zr) && !src.equals(zr));
 694         if (immediate < 0) {
 695             add(size, dst, src, -immediate);
 696         } else if (isAimm(immediate)) {
 697             if (!(dst.equals(src) && immediate == 0)) {
 698                 super.sub(size, dst, src, immediate);
 699             }
 700         } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
 701             super.sub(size, dst, src, immediate & -(1 << 12));
 702             super.sub(size, dst, dst, immediate & ((1 << 12) - 1));
 703         } else {
 704             assert !dst.equals(src);
 705             mov(dst, immediate);
 706             sub(size, src, dst, dst);
 707         }
 708     }
 709 
 710     /**
 711      * dst = src - aimm and sets condition flags.
 712      *
 713      * @param size register size. Has to be 32 or 64.
 714      * @param dst general purpose register. May not be null or stackpointer.
 715      * @param src general purpose register. May not be null or zero-register.
 716      * @param immediate arithmetic immediate.
 717      */
 718     @Override
 719     public void subs(int size, Register dst, Register src, int immediate) {
 720         assert (!dst.equals(sp) && !src.equals(zr));
 721         if (immediate < 0) {
 722             adds(size, dst, src, -immediate);
 723         } else if (!dst.equals(src) || immediate != 0) {
 724             super.subs(size, dst, src, immediate);
 725         }
 726     }
 727 
 728     /**
 729      * dst = src1 * src2.
 730      *
 731      * @param size register size. Has to be 32 or 64.
 732      * @param dst general purpose register. May not be null or the stackpointer.
 733      * @param src1 general purpose register. May not be null or the stackpointer.
 734      * @param src2 general purpose register. May not be null or the stackpointer.
 735      */
 736     public void mul(int size, Register dst, Register src1, Register src2) {
 737         super.madd(size, dst, src1, src2, zr);
 738     }
 739 
 740     /**
 741      * unsigned multiply high. dst = (src1 * src2) >> size
 742      *
 743      * @param size register size. Has to be 32 or 64.
 744      * @param dst general purpose register. May not be null or the stackpointer.
 745      * @param src1 general purpose register. May not be null or the stackpointer.
 746      * @param src2 general purpose register. May not be null or the stackpointer.
 747      */
 748     public void umulh(int size, Register dst, Register src1, Register src2) {
 749         assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
 750         assert size == 32 || size == 64;
 751         if (size == 64) {
 752             super.umulh(dst, src1, src2);
 753         } else {
 754             // xDst = wSrc1 * wSrc2
 755             super.umaddl(dst, src1, src2, zr);
 756             // xDst = xDst >> 32
 757             lshr(64, dst, dst, 32);
 758         }
 759     }
 760 
 761     /**
 762      * signed multiply high. dst = (src1 * src2) >> size
 763      *
 764      * @param size register size. Has to be 32 or 64.
 765      * @param dst general purpose register. May not be null or the stackpointer.
 766      * @param src1 general purpose register. May not be null or the stackpointer.
 767      * @param src2 general purpose register. May not be null or the stackpointer.
 768      */
 769     public void smulh(int size, Register dst, Register src1, Register src2) {
 770         assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
 771         assert size == 32 || size == 64;
 772         if (size == 64) {
 773             super.smulh(dst, src1, src2);
 774         } else {
 775             // xDst = wSrc1 * wSrc2
 776             super.smaddl(dst, src1, src2, zr);
 777             // xDst = xDst >> 32
 778             lshr(64, dst, dst, 32);
 779         }
 780     }
 781 
 782     /**
 783      * dst = src1 % src2. Signed.
 784      *
 785      * @param size register size. Has to be 32 or 64.
 786      * @param dst general purpose register. May not be null or the stackpointer.
 787      * @param n numerator. General purpose register. May not be null or the stackpointer.
 788      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 789      */
 790     public void rem(int size, Register dst, Register n, Register d) {
 791         assert (!dst.equals(sp) && !n.equals(sp) && !d.equals(sp));
 792         // There is no irem or similar instruction. Instead we use the relation:
 793         // n % d = n - Floor(n / d) * d if nd >= 0
 794         // n % d = n - Ceil(n / d) * d else
 795         // Which is equivalent to n - TruncatingDivision(n, d) * d
 796         super.sdiv(size, dst, n, d);
 797         super.msub(size, dst, dst, d, n);
 798     }
 799 
 800     /**
 801      * dst = src1 % src2. Unsigned.
 802      *
 803      * @param size register size. Has to be 32 or 64.
 804      * @param dst general purpose register. May not be null or the stackpointer.
 805      * @param n numerator. General purpose register. May not be null or the stackpointer.
 806      * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
 807      */
 808     public void urem(int size, Register dst, Register n, Register d) {
 809         // There is no irem or similar instruction. Instead we use the relation:
 810         // n % d = n - Floor(n / d) * d
 811         // Which is equivalent to n - TruncatingDivision(n, d) * d
 812         super.udiv(size, dst, n, d);
 813         super.msub(size, dst, dst, d, n);
 814     }
 815 
 816     /**
 817      * Add/subtract instruction encoding supports 12-bit immediate values.
 818      *
 819      * @param imm immediate value to be tested.
 820      * @return true if immediate can be used directly for arithmetic instructions (add/sub), false
 821      *         otherwise.
 822      */
 823     public static boolean isArithmeticImmediate(long imm) {
 824         // If we have a negative immediate we just use the opposite operator. I.e.: x - (-5) == x +
 825         // 5.
 826         return NumUtil.isInt(Math.abs(imm)) && isAimm((int) Math.abs(imm));
 827     }
 828 
 829     /**
 830      * Compare instructions are add/subtract instructions and so support 12-bit immediate values.
 831      *
 832      * @param imm immediate value to be tested.
 833      * @return true if immediate can be used directly with comparison instructions, false otherwise.
 834      */
 835     public static boolean isComparisonImmediate(long imm) {
 836         return isArithmeticImmediate(imm);
 837     }
 838 
 839     /**
 840      * Move wide immediate instruction encoding supports 16-bit immediate values which can be
 841      * optionally-shifted by multiples of 16 (i.e. 0, 16, 32, 48).
 842      *
 843      * @return true if immediate can be moved directly into a register, false otherwise.
 844      */
 845     public static boolean isMovableImmediate(long imm) {
 846         // // Positions of first, respectively last set bit.
 847         // int start = Long.numberOfTrailingZeros(imm);
 848         // int end = 64 - Long.numberOfLeadingZeros(imm);
 849         // int length = end - start;
 850         // if (length > 16) {
 851         // return false;
 852         // }
 853         // // We can shift the necessary part of the immediate (i.e. everything between the first
 854         // and
 855         // // last set bit) by as much as 16 - length around to arrive at a valid shift amount
 856         // int tolerance = 16 - length;
 857         // int prevMultiple = NumUtil.roundDown(start, 16);
 858         // int nextMultiple = NumUtil.roundUp(start, 16);
 859         // return start - prevMultiple <= tolerance || nextMultiple - start <= tolerance;
 860         /*
 861          * This is a bit optimistic because the constant could also be for an arithmetic instruction
 862          * which only supports 12-bits. That case needs to be handled in the backend.
 863          */
 864         return NumUtil.isInt(Math.abs(imm)) && NumUtil.isUnsignedNbit(16, (int) Math.abs(imm));
 865     }
 866 
 867     /**
 868      * dst = src << (shiftAmt & (size - 1)).
 869      *
 870      * @param size register size. Has to be 32 or 64.
 871      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 872      * @param src general purpose register. May not be null, stackpointer or zero-register.
 873      * @param shiftAmt amount by which src is shifted.
 874      */
 875     public void shl(int size, Register dst, Register src, long shiftAmt) {
 876         int shift = clampShiftAmt(size, shiftAmt);
 877         super.ubfm(size, dst, src, (size - shift) & (size - 1), size - 1 - shift);
 878     }
 879 
 880     /**
 881      * dst = src1 << (src2 & (size - 1)).
 882      *
 883      * @param size register size. Has to be 32 or 64.
 884      * @param dst general purpose register. May not be null or stackpointer.
 885      * @param src general purpose register. May not be null or stackpointer.
 886      * @param shift general purpose register. May not be null or stackpointer.
 887      */
 888     public void shl(int size, Register dst, Register src, Register shift) {
 889         super.lsl(size, dst, src, shift);
 890     }
 891 
 892     /**
 893      * dst = src >>> (shiftAmt & (size - 1)).
 894      *
 895      * @param size register size. Has to be 32 or 64.
 896      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 897      * @param src general purpose register. May not be null, stackpointer or zero-register.
 898      * @param shiftAmt amount by which src is shifted.
 899      */
 900     public void lshr(int size, Register dst, Register src, long shiftAmt) {
 901         int shift = clampShiftAmt(size, shiftAmt);
 902         super.ubfm(size, dst, src, shift, size - 1);
 903     }
 904 
 905     /**
 906      * dst = src1 >>> (src2 & (size - 1)).
 907      *
 908      * @param size register size. Has to be 32 or 64.
 909      * @param dst general purpose register. May not be null or stackpointer.
 910      * @param src general purpose register. May not be null or stackpointer.
 911      * @param shift general purpose register. May not be null or stackpointer.
 912      */
 913     public void lshr(int size, Register dst, Register src, Register shift) {
 914         super.lsr(size, dst, src, shift);
 915     }
 916 
 917     /**
 918      * dst = src >> (shiftAmt & log2(size)).
 919      *
 920      * @param size register size. Has to be 32 or 64.
 921      * @param dst general purpose register. May not be null, stackpointer or zero-register.
 922      * @param src general purpose register. May not be null, stackpointer or zero-register.
 923      * @param shiftAmt amount by which src is shifted.
 924      */
 925     public void ashr(int size, Register dst, Register src, long shiftAmt) {
 926         int shift = clampShiftAmt(size, shiftAmt);
 927         super.sbfm(size, dst, src, shift, size - 1);
 928     }
 929 
 930     /**
 931      * dst = src1 >> (src2 & log2(size)).
 932      *
 933      * @param size register size. Has to be 32 or 64.
 934      * @param dst general purpose register. May not be null or stackpointer.
 935      * @param src general purpose register. May not be null or stackpointer.
 936      * @param shift general purpose register. May not be null or stackpointer.
 937      */
 938     public void ashr(int size, Register dst, Register src, Register shift) {
 939         super.asr(size, dst, src, shift);
 940     }
 941 
 942     /**
 943      * Clamps shiftAmt into range 0 <= shiftamt < size according to JLS.
 944      *
 945      * @param size size of operation.
 946      * @param shiftAmt arbitrary shift amount.
 947      * @return value between 0 and size - 1 inclusive that is equivalent to shiftAmt according to
 948      *         JLS.
 949      */
 950     private static int clampShiftAmt(int size, long shiftAmt) {
 951         return (int) (shiftAmt & (size - 1));
 952     }
 953 
 954     /**
 955      * dst = src1 & src2.
 956      *
 957      * @param size register size. Has to be 32 or 64.
 958      * @param dst general purpose register. May not be null or stackpointer.
 959      * @param src1 general purpose register. May not be null or stackpointer.
 960      * @param src2 general purpose register. May not be null or stackpointer.
 961      */
 962     public void and(int size, Register dst, Register src1, Register src2) {
 963         super.and(size, dst, src1, src2, ShiftType.LSL, 0);
 964     }
 965 
 966     /**
 967      * dst = src1 ^ src2.
 968      *
 969      * @param size register size. Has to be 32 or 64.
 970      * @param dst general purpose register. May not be null or stackpointer.
 971      * @param src1 general purpose register. May not be null or stackpointer.
 972      * @param src2 general purpose register. May not be null or stackpointer.
 973      */
 974     public void eor(int size, Register dst, Register src1, Register src2) {
 975         super.eor(size, dst, src1, src2, ShiftType.LSL, 0);
 976     }
 977 
 978     /**
 979      * dst = src1 | src2.
 980      *
 981      * @param size register size. Has to be 32 or 64.
 982      * @param dst general purpose register. May not be null or stackpointer.
 983      * @param src1 general purpose register. May not be null or stackpointer.
 984      * @param src2 general purpose register. May not be null or stackpointer.
 985      */
 986     public void or(int size, Register dst, Register src1, Register src2) {
 987         super.orr(size, dst, src1, src2, ShiftType.LSL, 0);
 988     }
 989 
 990     /**
 991      * dst = src | bimm.
 992      *
 993      * @param size register size. Has to be 32 or 64.
 994      * @param dst general purpose register. May not be null or zero-register.
 995      * @param src general purpose register. May not be null or stack-pointer.
 996      * @param bimm logical immediate. See {@link AArch64Assembler.LogicalImmediateTable} for exact
 997      *            definition.
 998      */
 999     public void or(int size, Register dst, Register src, long bimm) {
1000         super.orr(size, dst, src, bimm);
1001     }
1002 
1003     /**
1004      * dst = ~src.
1005      *
1006      * @param size register size. Has to be 32 or 64.
1007      * @param dst general purpose register. May not be null or stackpointer.
1008      * @param src general purpose register. May not be null or stackpointer.
1009      */
1010     public void not(int size, Register dst, Register src) {
1011         super.orn(size, dst, zr, src, ShiftType.LSL, 0);
1012     }
1013 
1014     /**
1015      * Sign-extend value from src into dst.
1016      *
1017      * @param destSize destination register size. Must be 32 or 64.
1018      * @param srcSize source register size. Must be smaller than destSize.
1019      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1020      * @param src general purpose register. May not be null, stackpointer or zero-register.
1021      */
1022     public void sxt(int destSize, int srcSize, Register dst, Register src) {
1023         assert (srcSize < destSize && srcSize > 0);
1024         super.sbfm(destSize, dst, src, 0, srcSize - 1);
1025     }
1026 
1027     /**
1028      * dst = src if condition else -src.
1029      *
1030      * @param size register size. Must be 32 or 64.
1031      * @param dst general purpose register. May not be null or the stackpointer.
1032      * @param src general purpose register. May not be null or the stackpointer.
1033      * @param condition any condition except AV or NV. May not be null.
1034      */
1035     public void csneg(int size, Register dst, Register src, ConditionFlag condition) {
1036         super.csneg(size, dst, src, src, condition.negate());
1037     }
1038 
1039     /**
1040      * @return True if the immediate can be used directly for logical 64-bit instructions.
1041      */
1042     public static boolean isLogicalImmediate(long imm) {
1043         return LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO;
1044     }
1045 
1046     /**
1047      * @return True if the immediate can be used directly for logical 32-bit instructions.
1048      */
1049     public static boolean isLogicalImmediate(int imm) {
1050         return LogicalImmediateTable.isRepresentable(imm) == LogicalImmediateTable.Representable.YES;
1051     }
1052 
1053     /* Float instructions */
1054 
1055     /**
1056      * Moves integer to float, float to integer, or float to float. Does not support integer to
1057      * integer moves.
1058      *
1059      * @param size register size. Has to be 32 or 64.
1060      * @param dst Either floating-point or general-purpose register. If general-purpose register may
1061      *            not be stackpointer or zero register. Cannot be null in any case.
1062      * @param src Either floating-point or general-purpose register. If general-purpose register may
1063      *            not be stackpointer. Cannot be null in any case.
1064      */
1065     @Override
1066     public void fmov(int size, Register dst, Register src) {
1067         assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU)) : "src and dst cannot both be integer registers.";
1068         if (dst.getRegisterCategory().equals(CPU)) {
1069             super.fmovFpu2Cpu(size, dst, src);
1070         } else if (src.getRegisterCategory().equals(CPU)) {
1071             super.fmovCpu2Fpu(size, dst, src);
1072         } else {
1073             super.fmov(size, dst, src);
1074         }
1075     }
1076 
1077     /**
1078      *
1079      * @param size register size. Has to be 32 or 64.
1080      * @param dst floating point register. May not be null.
1081      * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
1082      *            loaded, i.e. (float) imm == imm must be true. In all cases
1083      *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
1084      *            depending on size.
1085      */
1086     @Override
1087     public void fmov(int size, Register dst, double imm) {
1088         if (imm == 0.0) {
1089             assert Double.doubleToRawLongBits(imm) == 0L : "-0.0 is no valid immediate.";
1090             super.fmovCpu2Fpu(size, dst, zr);
1091         } else {
1092             super.fmov(size, dst, imm);
1093         }
1094     }
1095 
1096     /**
1097      *
1098      * @return true if immediate can be loaded directly into floating-point register, false
1099      *         otherwise.
1100      */
1101     public static boolean isDoubleImmediate(double imm) {
1102         return Double.doubleToRawLongBits(imm) == 0L || AArch64Assembler.isDoubleImmediate(imm);
1103     }
1104 
1105     /**
1106      *
1107      * @return true if immediate can be loaded directly into floating-point register, false
1108      *         otherwise.
1109      */
1110     public static boolean isFloatImmediate(float imm) {
1111         return Float.floatToRawIntBits(imm) == 0 || AArch64Assembler.isFloatImmediate(imm);
1112     }
1113 
1114     /**
1115      * Conditional move. dst = src1 if condition else src2.
1116      *
1117      * @param size register size.
1118      * @param result floating point register. May not be null.
1119      * @param trueValue floating point register. May not be null.
1120      * @param falseValue floating point register. May not be null.
1121      * @param condition every condition allowed. May not be null.
1122      */
1123     public void fcmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag condition) {
1124         super.fcsel(size, result, trueValue, falseValue, condition);
1125     }
1126 
1127     /**
1128      * dst = src1 % src2.
1129      *
1130      * @param size register size. Has to be 32 or 64.
1131      * @param dst floating-point register. May not be null.
1132      * @param n numerator. Floating-point register. May not be null.
1133      * @param d denominator. Floating-point register. May not be null.
1134      */
1135     public void frem(int size, Register dst, Register n, Register d) {
1136         // There is no frem instruction, instead we compute the remainder using the relation:
1137         // rem = n - Truncating(n / d) * d
1138         super.fdiv(size, dst, n, d);
1139         super.frintz(size, dst, dst);
1140         super.fmsub(size, dst, dst, d, n);
1141     }
1142 
1143     /* Branches */
1144 
1145     /**
1146      * Compares x and y and sets condition flags.
1147      *
1148      * @param size register size. Has to be 32 or 64.
1149      * @param x general purpose register. May not be null or stackpointer.
1150      * @param y general purpose register. May not be null or stackpointer.
1151      */
1152     public void cmp(int size, Register x, Register y) {
1153         assert size == 32 || size == 64;
1154         super.subs(size, zr, x, y, ShiftType.LSL, 0);
1155     }
1156 
1157     /**
1158      * Compares x to y and sets condition flags.
1159      *
1160      * @param size register size. Has to be 32 or 64.
1161      * @param x general purpose register. May not be null or stackpointer.
1162      * @param y comparison immediate, {@link #isComparisonImmediate(long)} has to be true for it.
1163      */
1164     public void cmp(int size, Register x, int y) {
1165         assert size == 32 || size == 64;
1166         if (y < 0) {
1167             super.adds(size, zr, x, -y);
1168         } else {
1169             super.subs(size, zr, x, y);
1170         }
1171     }
1172 
1173     /**
1174      * Sets condition flags according to result of x & y.
1175      *
1176      * @param size register size. Has to be 32 or 64.
1177      * @param dst general purpose register. May not be null or stack-pointer.
1178      * @param x general purpose register. May not be null or stackpointer.
1179      * @param y general purpose register. May not be null or stackpointer.
1180      */
1181     public void ands(int size, Register dst, Register x, Register y) {
1182         super.ands(size, dst, x, y, ShiftType.LSL, 0);
1183     }
1184 
1185     /**
1186      * Sets overflow flag according to result of x * y.
1187      *
1188      * @param size register size. Has to be 32 or 64.
1189      * @param dst general purpose register. May not be null or stack-pointer.
1190      * @param x general purpose register. May not be null or stackpointer.
1191      * @param y general purpose register. May not be null or stackpointer.
1192      */
1193     public void mulvs(int size, Register dst, Register x, Register y) {
1194         try (ScratchRegister sc1 = getScratchRegister();
1195                         ScratchRegister sc2 = getScratchRegister()) {
1196             switch (size) {
1197                 case 64: {
1198                     // Be careful with registers: it's possible that x, y, and dst are the same
1199                     // register.
1200                     Register rscratch1 = sc1.getRegister();
1201                     Register rscratch2 = sc2.getRegister();
1202                     mul(64, rscratch1, x, y);     // Result bits 0..63
1203                     smulh(64, rscratch2, x, y);  // Result bits 64..127
1204                     // Top is pure sign ext
1205                     subs(64, zr, rscratch2, rscratch1, ShiftType.ASR, 63);
1206                     // Copy all 64 bits of the result into dst
1207                     mov(64, dst, rscratch1);
1208                     mov(rscratch1, 0x80000000);
1209                     // Develop 0 (EQ), or 0x80000000 (NE)
1210                     cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
1211                     cmp(32, rscratch1, 1);
1212                     // 0x80000000 - 1 => VS
1213                     break;
1214                 }
1215                 case 32: {
1216                     Register rscratch1 = sc1.getRegister();
1217                     smaddl(rscratch1, x, y, zr);
1218                     // Copy the low 32 bits of the result into dst
1219                     mov(32, dst, rscratch1);
1220                     subs(64, zr, rscratch1, rscratch1, ExtendType.SXTW, 0);
1221                     // NE => overflow
1222                     mov(rscratch1, 0x80000000);
1223                     // Develop 0 (EQ), or 0x80000000 (NE)
1224                     cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
1225                     cmp(32, rscratch1, 1);
1226                     // 0x80000000 - 1 => VS
1227                     break;
1228                 }
1229             }
1230         }
1231     }
1232 
1233     /**
1234      * When patching up Labels we have to know what kind of code to generate.
1235      */
1236     public enum PatchLabelKind {
1237         BRANCH_CONDITIONALLY(0x0),
1238         BRANCH_UNCONDITIONALLY(0x1),
1239         BRANCH_NONZERO(0x2),
1240         BRANCH_ZERO(0x3),
1241         BRANCH_BIT_NONZERO(0x4),
1242         BRANCH_BIT_ZERO(0x5),
1243         JUMP_ADDRESS(0x6),
1244         ADR(0x7);
1245 
1246         /**
1247          * Offset by which additional information for branch conditionally, branch zero and branch
1248          * non zero has to be shifted.
1249          */
1250         public static final int INFORMATION_OFFSET = 5;
1251 
1252         public final int encoding;
1253 
1254         PatchLabelKind(int encoding) {
1255             this.encoding = encoding;
1256         }
1257 
1258         /**
1259          * @return PatchLabelKind with given encoding.
1260          */
1261         private static PatchLabelKind fromEncoding(int encoding) {
1262             return values()[encoding & NumUtil.getNbitNumberInt(INFORMATION_OFFSET)];
1263         }
1264 
1265     }
1266 
1267     public void adr(Register dst, Label label) {
1268         // TODO Handle case where offset is too large for a single jump instruction
1269         if (label.isBound()) {
1270             int offset = label.position() - position();
1271             super.adr(dst, offset);
1272         } else {
1273             label.addPatchAt(position());
1274             // Encode condition flag so that we know how to patch the instruction later
1275             emitInt(PatchLabelKind.ADR.encoding | dst.encoding << PatchLabelKind.INFORMATION_OFFSET);
1276         }
1277     }
1278 
1279     /**
1280      * Compare register and branch if non-zero.
1281      *
1282      * @param size Instruction size in bits. Should be either 32 or 64.
1283      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1284      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1285      */
1286     public void cbnz(int size, Register cmp, Label label) {
1287         // TODO Handle case where offset is too large for a single jump instruction
1288         if (label.isBound()) {
1289             int offset = label.position() - position();
1290             super.cbnz(size, cmp, offset);
1291         } else {
1292             label.addPatchAt(position());
1293             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1294             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1295             // Encode condition flag so that we know how to patch the instruction later
1296             emitInt(PatchLabelKind.BRANCH_NONZERO.encoding | regEncoding | sizeEncoding);
1297         }
1298     }
1299 
1300     /**
1301      * Compare register and branch if zero.
1302      *
1303      * @param size Instruction size in bits. Should be either 32 or 64.
1304      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1305      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1306      */
1307     public void cbz(int size, Register cmp, Label label) {
1308         // TODO Handle case where offset is too large for a single jump instruction
1309         if (label.isBound()) {
1310             int offset = label.position() - position();
1311             super.cbz(size, cmp, offset);
1312         } else {
1313             label.addPatchAt(position());
1314             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
1315             int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
1316             // Encode condition flag so that we know how to patch the instruction later
1317             emitInt(PatchLabelKind.BRANCH_ZERO.encoding | regEncoding | sizeEncoding);
1318         }
1319     }
1320 
1321     /**
1322      * Test a single bit and branch if the bit is nonzero.
1323      *
1324      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1325      * @param uimm6 Unsigned 6-bit bit index.
1326      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1327      */
1328     public void tbnz(Register cmp, int uimm6, Label label) {
1329         assert NumUtil.isUnsignedNbit(6, uimm6);
1330         if (label.isBound()) {
1331             int offset = label.position() - position();
1332             super.tbnz(cmp, uimm6, offset);
1333         } else {
1334             label.addPatchAt(position());
1335             int indexEncoding = uimm6 << PatchLabelKind.INFORMATION_OFFSET;
1336             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 6);
1337             emitInt(PatchLabelKind.BRANCH_BIT_NONZERO.encoding | indexEncoding | regEncoding);
1338         }
1339     }
1340 
1341     /**
1342      * Test a single bit and branch if the bit is zero.
1343      *
1344      * @param cmp general purpose register. May not be null, zero-register or stackpointer.
1345      * @param uimm6 Unsigned 6-bit bit index.
1346      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1347      */
1348     public void tbz(Register cmp, int uimm6, Label label) {
1349         assert NumUtil.isUnsignedNbit(6, uimm6);
1350         if (label.isBound()) {
1351             int offset = label.position() - position();
1352             super.tbz(cmp, uimm6, offset);
1353         } else {
1354             label.addPatchAt(position());
1355             int indexEncoding = uimm6 << PatchLabelKind.INFORMATION_OFFSET;
1356             int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 6);
1357             emitInt(PatchLabelKind.BRANCH_BIT_ZERO.encoding | indexEncoding | regEncoding);
1358         }
1359     }
1360 
1361     /**
1362      * Branches to label if condition is true.
1363      *
1364      * @param condition any condition value allowed. Non null.
1365      * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
1366      */
1367     public void branchConditionally(ConditionFlag condition, Label label) {
1368         // TODO Handle case where offset is too large for a single jump instruction
1369         if (label.isBound()) {
1370             int offset = label.position() - position();
1371             super.b(condition, offset);
1372         } else {
1373             label.addPatchAt(position());
1374             // Encode condition flag so that we know how to patch the instruction later
1375             emitInt(PatchLabelKind.BRANCH_CONDITIONALLY.encoding | condition.encoding << PatchLabelKind.INFORMATION_OFFSET);
1376         }
1377     }
1378 
1379     /**
1380      * Branches if condition is true. Address of jump is patched up by HotSpot c++ code.
1381      *
1382      * @param condition any condition value allowed. Non null.
1383      */
1384     public void branchConditionally(ConditionFlag condition) {
1385         // Correct offset is fixed up by HotSpot later.
1386         super.b(condition, 0);
1387     }
1388 
1389     /**
1390      * Jumps to label.
1391      *
1392      * param label Can only handle signed 28-bit offsets. May be unbound. Non null.
1393      */
1394     @Override
1395     public void jmp(Label label) {
1396         // TODO Handle case where offset is too large for a single jump instruction
1397         if (label.isBound()) {
1398             int offset = label.position() - position();
1399             super.b(offset);
1400         } else {
1401             label.addPatchAt(position());
1402             emitInt(PatchLabelKind.BRANCH_UNCONDITIONALLY.encoding);
1403         }
1404     }
1405 
1406     /**
1407      * Jump to address in dest.
1408      *
1409      * @param dest General purpose register. May not be null, zero-register or stackpointer.
1410      */
1411     public void jmp(Register dest) {
1412         super.br(dest);
1413     }
1414 
1415     /**
1416      * Immediate jump instruction fixed up by HotSpot c++ code.
1417      */
1418     public void jmp() {
1419         // Offset has to be fixed up by c++ code.
1420         super.b(0);
1421     }
1422 
1423     /**
1424      *
1425      * @return true if immediate offset can be used in a single branch instruction.
1426      */
1427     public static boolean isBranchImmediateOffset(long imm) {
1428         return NumUtil.isSignedNbit(28, imm);
1429     }
1430 
1431     /* system instructions */
1432 
1433     /**
1434      * Exception codes used when calling hlt instruction.
1435      */
1436     public enum AArch64ExceptionCode {
1437         NO_SWITCH_TARGET(0x0),
1438         BREAKPOINT(0x1);
1439 
1440         public final int encoding;
1441 
1442         AArch64ExceptionCode(int encoding) {
1443             this.encoding = encoding;
1444         }
1445     }
1446 
1447     /**
1448      * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
1449      * UNALLOCATED instruction.
1450      *
1451      * @param exceptionCode exception code specifying why halt was called. Non null.
1452      */
1453     public void hlt(AArch64ExceptionCode exceptionCode) {
1454         super.hlt(exceptionCode.encoding);
1455     }
1456 
1457     /**
1458      * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
1459      * exception level.
1460      *
1461      * @param exceptionCode exception code specifying why break was called. Non null.
1462      */
1463     public void brk(AArch64ExceptionCode exceptionCode) {
1464         super.brk(exceptionCode.encoding);
1465     }
1466 
1467     public void pause() {
1468         throw GraalError.unimplemented();
1469     }
1470 
1471     /**
1472      * Executes no-op instruction. No registers or flags are updated, except for PC.
1473      */
1474     public void nop() {
1475         super.hint(SystemHint.NOP);
1476     }
1477 
1478     /**
1479      * Consumption of Speculative Data Barrier. This is a memory barrier that controls speculative
1480      * execution and data value prediction.
1481      */
1482     public void csdb() {
1483         super.hint(SystemHint.CSDB);
1484     }
1485 
1486     /**
1487      * Same as {@link #nop()}.
1488      */
1489     @Override
1490     public void ensureUniquePC() {
1491         nop();
1492     }
1493 
1494     /**
1495      * Aligns PC.
1496      *
1497      * @param modulus Has to be positive multiple of 4.
1498      */
1499     @Override
1500     public void align(int modulus) {
1501         assert modulus > 0 && (modulus & 0x3) == 0 : "Modulus has to be a positive multiple of 4.";
1502         if (position() % modulus == 0) {
1503             return;
1504         }
1505         int offset = modulus - position() % modulus;
1506         for (int i = 0; i < offset; i += 4) {
1507             nop();
1508         }
1509     }
1510 
1511     /**
1512      * Patches jump targets when label gets bound.
1513      */
1514     @Override
1515     protected void patchJumpTarget(int branch, int jumpTarget) {
1516         int instruction = getInt(branch);
1517         int branchOffset = jumpTarget - branch;
1518         PatchLabelKind type = PatchLabelKind.fromEncoding(instruction);
1519         switch (type) {
1520             case BRANCH_CONDITIONALLY:
1521                 ConditionFlag cf = ConditionFlag.fromEncoding(instruction >>> PatchLabelKind.INFORMATION_OFFSET);
1522                 super.b(cf, branchOffset, branch);
1523                 break;
1524             case BRANCH_UNCONDITIONALLY:
1525                 super.b(branchOffset, branch);
1526                 break;
1527             case JUMP_ADDRESS:
1528                 int offset = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1529                 emitInt(jumpTarget - offset, branch);
1530                 break;
1531             case BRANCH_NONZERO:
1532             case BRANCH_ZERO: {
1533                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1534                 int sizeEncoding = information & 1;
1535                 int regEncoding = information >>> 1;
1536                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1537                 // 1 => 64; 0 => 32
1538                 int size = sizeEncoding * 32 + 32;
1539                 switch (type) {
1540                     case BRANCH_NONZERO:
1541                         super.cbnz(size, reg, branchOffset, branch);
1542                         break;
1543                     case BRANCH_ZERO:
1544                         super.cbz(size, reg, branchOffset, branch);
1545                         break;
1546                 }
1547                 break;
1548             }
1549             case BRANCH_BIT_NONZERO:
1550             case BRANCH_BIT_ZERO: {
1551                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1552                 int sizeEncoding = information & NumUtil.getNbitNumberInt(6);
1553                 int regEncoding = information >>> 6;
1554                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1555                 switch (type) {
1556                     case BRANCH_BIT_NONZERO:
1557                         super.tbnz(reg, sizeEncoding, branchOffset, branch);
1558                         break;
1559                     case BRANCH_BIT_ZERO:
1560                         super.tbz(reg, sizeEncoding, branchOffset, branch);
1561                         break;
1562                 }
1563                 break;
1564             }
1565             case ADR: {
1566                 int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
1567                 int regEncoding = information;
1568                 Register reg = AArch64.cpuRegisters.get(regEncoding);
1569                 super.adr(reg, branchOffset, branch);
1570                 break;
1571             }
1572             default:
1573                 throw GraalError.shouldNotReachHere();
1574         }
1575     }
1576 
1577     /**
1578      * Generates an address of the form {@code base + displacement}.
1579      *
1580      * Does not change base register to fulfill this requirement. Will fail if displacement cannot
1581      * be represented directly as address.
1582      *
1583      * @param base general purpose register. May not be null or the zero register.
1584      * @param displacement arbitrary displacement added to base.
1585      * @return AArch64Address referencing memory at {@code base + displacement}.
1586      */
1587     @Override
1588     public AArch64Address makeAddress(Register base, int displacement) {
1589         return makeAddress(base, displacement, zr, /* signExtend */false, /* transferSize */0, zr, /* allowOverwrite */false);
1590     }
1591 
1592     @Override
1593     public AArch64Address getPlaceholder(int instructionStartPosition) {
1594         return AArch64Address.PLACEHOLDER;
1595     }
1596 
1597     public void addressOf(Register dst) {
1598         // This will be fixed up later.
1599         super.adrp(dst);
1600         super.add(64, dst, dst, 0);
1601     }
1602 
1603     /**
1604      * Loads an address into Register d.
1605      *
1606      * @param d general purpose register. May not be null.
1607      * @param a AArch64Address the address of an operand.
1608      */
1609     public void lea(Register d, AArch64Address a) {
1610         a.lea(this, d);
1611     }
1612 }