1 /*
   2  * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.graalvm.compiler.asm.aarch64;
  24 
  25 import static jdk.vm.ci.aarch64.AArch64.cpuRegisters;
  26 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADD;
  27 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADDS;
  28 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADR;
  29 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.AND;
  30 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ANDS;
  31 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ASRV;
  32 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BFM;
  33 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BIC;
  34 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BICS;
  35 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BLR;
  36 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BR;
  37 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BRK;
  38 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLREX;
  39 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLS;
  40 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLZ;
  41 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSEL;
  42 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSINC;
  43 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSNEG;
  44 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.DMB;
  45 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EON;
  46 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EOR;
  47 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EXTR;
  48 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FABS;
  49 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FADD;
  50 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCCMP;
  51 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCMP;
  52 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCMPZERO;
  53 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCSEL;
  54 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTDS;
  55 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTSD;
  56 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTZS;
  57 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FDIV;
  58 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMADD;
  59 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMOV;
  60 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMSUB;
  61 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMUL;
  62 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FNEG;
  63 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FRINTZ;
  64 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FSQRT;
  65 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FSUB;
  66 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.HINT;
  67 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.HLT;
  68 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDAR;
  69 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDAXR;
  70 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDP;
  71 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDR;
  72 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDRS;
  73 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDXR;
  74 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LSLV;
  75 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LSRV;
  76 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MADD;
  77 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVK;
  78 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVN;
  79 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVZ;
  80 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MSUB;
  81 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ORN;
  82 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ORR;
  83 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RBIT;
  84 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RET;
  85 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.REVW;
  86 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.REVX;
  87 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RORV;
  88 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SBFM;
  89 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SCVTF;
  90 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SDIV;
  91 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STLR;
  92 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STLXR;
  93 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STP;
  94 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STR;
  95 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STXR;
  96 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUB;
  97 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUBS;
  98 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBZ;
  99 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBNZ;
 100 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UBFM;
 101 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UDIV;
 102 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP32;
 103 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP64;
 104 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General32;
 105 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General64;
 106 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.floatFromSize;
 107 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.generalFromSize;
 108 import static jdk.vm.ci.aarch64.AArch64.CPU;
 109 import static jdk.vm.ci.aarch64.AArch64.SIMD;
 110 import static jdk.vm.ci.aarch64.AArch64.r0;
 111 import static jdk.vm.ci.aarch64.AArch64.sp;
 112 import static jdk.vm.ci.aarch64.AArch64.zr;
 113 
 114 import java.util.Arrays;
 115 
 116 import org.graalvm.compiler.asm.Assembler;
 117 import org.graalvm.compiler.core.common.NumUtil;
 118 import org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode;
 119 import org.graalvm.compiler.debug.GraalError;
 120 
 121 import jdk.vm.ci.code.Register;
 122 import jdk.vm.ci.code.TargetDescription;
 123 
 124 public abstract class AArch64Assembler extends Assembler {
 125 
 126     public static class LogicalImmediateTable {
 127 
 128         private static final Immediate[] IMMEDIATE_TABLE = buildImmediateTable();
 129 
 130         private static final int ImmediateOffset = 10;
 131         private static final int ImmediateRotateOffset = 16;
 132         private static final int ImmediateSizeOffset = 22;
 133 
 134         /**
 135          * Specifies whether immediate can be represented in all cases (YES), as a 64bit instruction
 136          * (SIXTY_FOUR_BIT_ONLY) or not at all (NO).
 137          */
 138         enum Representable {
 139             YES,
 140             SIXTY_FOUR_BIT_ONLY,
 141             NO
 142         }
 143 
 144         /**
 145          * Tests whether an immediate can be encoded for logical instructions.
 146          *
 147          * @param is64bit if true immediate is considered a 64-bit pattern. If false we may use a
 148          *            64-bit instruction to load the 32-bit pattern into a register.
 149          * @return enum specifying whether immediate can be used for 32- and 64-bit logical
 150          *         instructions ({@code #Representable.YES}), for 64-bit instructions only (
 151          *         {@link Representable#SIXTY_FOUR_BIT_ONLY}) or not at all (
 152          *         {@link Representable#NO}).
 153          */
 154         public static Representable isRepresentable(boolean is64bit, long immediate) {
 155             int pos = getLogicalImmTablePos(is64bit, immediate);
 156             if (pos < 0) {
 157                 // if 32bit instruction we can try again as 64bit immediate which may succeed.
 158                 // i.e. 0xffffffff fails as a 32bit immediate but works as 64bit one.
 159                 if (!is64bit) {
 160                     assert NumUtil.isUnsignedNbit(32, immediate);
 161                     pos = getLogicalImmTablePos(true, immediate);
 162                     return pos >= 0 ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.NO;
 163                 }
 164                 return Representable.NO;
 165             }
 166             Immediate imm = IMMEDIATE_TABLE[pos];
 167             return imm.only64bit() ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.YES;
 168         }
 169 
 170         public static Representable isRepresentable(int immediate) {
 171             return isRepresentable(false, immediate & 0xFFFF_FFFFL);
 172         }
 173 
 174         public static int getLogicalImmEncoding(boolean is64bit, long value) {
 175             int pos = getLogicalImmTablePos(is64bit, value);
 176             assert pos >= 0 : "Value cannot be represented as logical immediate: " + value + ", is64bit=" + is64bit;
 177             Immediate imm = IMMEDIATE_TABLE[pos];
 178             assert is64bit || !imm.only64bit() : "Immediate can only be represented for 64bit, but 32bit instruction specified";
 179             return IMMEDIATE_TABLE[pos].encoding;
 180         }
 181 
 182         /**
 183          * @param is64bit if true also allow 64-bit only encodings to be returned.
 184          * @return If positive the return value is the position into the IMMEDIATE_TABLE for the
 185          *         given immediate, if negative the immediate cannot be encoded.
 186          */
 187         private static int getLogicalImmTablePos(boolean is64bit, long value) {
 188             Immediate imm;
 189             if (!is64bit) {
 190                 // 32bit instructions can only have 32bit immediates.
 191                 if (!NumUtil.isUnsignedNbit(32, value)) {
 192                     return -1;
 193                 }
 194                 // If we have a 32bit instruction (and therefore immediate) we have to duplicate it
 195                 // across 64bit to find it in the table.
 196                 imm = new Immediate(value << 32 | value);
 197             } else {
 198                 imm = new Immediate(value);
 199             }
 200             int pos = Arrays.binarySearch(IMMEDIATE_TABLE, imm);
 201             if (pos < 0) {
 202                 return -1;
 203             }
 204             if (!is64bit && IMMEDIATE_TABLE[pos].only64bit()) {
 205                 return -1;
 206             }
 207             return pos;
 208         }
 209 
 210         /**
 211          * To quote 5.4.2: [..] an immediate is a 32 or 64 bit pattern viewed as a vector of
 212          * identical elements of size e = 2, 4, 8, 16, 32 or (in the case of bimm64) 64 bits. Each
 213          * element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by
 214          * 0 to e-1 bits. It is encoded in the following: 10-16: rotation amount (6bit) starting
 215          * from 1s in the LSB (i.e. 0111->1011->1101->1110) 16-22: This stores a combination of the
 216          * number of set bits and the pattern size. The pattern size is encoded as follows (x is
 217          * used to store the number of 1 bits - 1) e pattern 2 1111xx 4 1110xx 8 110xxx 16 10xxxx 32
 218          * 0xxxxx 64 xxxxxx 22: if set we have an instruction with 64bit pattern?
 219          */
 220         private static final class Immediate implements Comparable<Immediate> {
 221             public final long imm;
 222             public final int encoding;
 223 
 224             Immediate(long imm, boolean is64, int s, int r) {
 225                 this.imm = imm;
 226                 this.encoding = computeEncoding(is64, s, r);
 227             }
 228 
 229             // Used to be able to binary search for an immediate in the table.
 230             Immediate(long imm) {
 231                 this(imm, false, 0, 0);
 232             }
 233 
 234             /**
 235              * Returns true if this pattern is only representable as 64bit.
 236              */
 237             public boolean only64bit() {
 238                 return (encoding & (1 << ImmediateSizeOffset)) != 0;
 239             }
 240 
 241             private static int computeEncoding(boolean is64, int s, int r) {
 242                 int sf = is64 ? 1 : 0;
 243                 return sf << ImmediateSizeOffset | r << ImmediateRotateOffset | s << ImmediateOffset;
 244             }
 245 
 246             @Override
 247             public int compareTo(Immediate o) {
 248                 return Long.compare(imm, o.imm);
 249             }
 250         }
 251 
 252         private static Immediate[] buildImmediateTable() {
 253             final int nrImmediates = 5334;
 254             final Immediate[] table = new Immediate[nrImmediates];
 255             int nrImms = 0;
 256             for (int logE = 1; logE <= 6; logE++) {
 257                 int e = 1 << logE;
 258                 long mask = NumUtil.getNbitNumberLong(e);
 259                 for (int nrOnes = 1; nrOnes < e; nrOnes++) {
 260                     long val = (1L << nrOnes) - 1;
 261                     // r specifies how much we rotate the value
 262                     for (int r = 0; r < e; r++) {
 263                         long immediate = (val >>> r | val << (e - r)) & mask;
 264                         // Duplicate pattern to fill whole 64bit range.
 265                         switch (logE) {
 266                             case 1:
 267                                 immediate |= immediate << 2;
 268                                 immediate |= immediate << 4;
 269                                 immediate |= immediate << 8;
 270                                 immediate |= immediate << 16;
 271                                 immediate |= immediate << 32;
 272                                 break;
 273                             case 2:
 274                                 immediate |= immediate << 4;
 275                                 immediate |= immediate << 8;
 276                                 immediate |= immediate << 16;
 277                                 immediate |= immediate << 32;
 278                                 break;
 279                             case 3:
 280                                 immediate |= immediate << 8;
 281                                 immediate |= immediate << 16;
 282                                 immediate |= immediate << 32;
 283                                 break;
 284                             case 4:
 285                                 immediate |= immediate << 16;
 286                                 immediate |= immediate << 32;
 287                                 break;
 288                             case 5:
 289                                 immediate |= immediate << 32;
 290                                 break;
 291                         }
 292                         // 5 - logE can underflow to -1, but we shift this bogus result
 293                         // out of the masked area.
 294                         int sizeEncoding = (1 << (5 - logE)) - 1;
 295                         int s = ((sizeEncoding << (logE + 1)) & 0x3f) | (nrOnes - 1);
 296                         table[nrImms++] = new Immediate(immediate, /* is64bit */e == 64, s, r);
 297                     }
 298                 }
 299             }
 300             Arrays.sort(table);
 301             assert nrImms == nrImmediates : nrImms + " instead of " + nrImmediates + " in table.";
 302             assert checkDuplicates(table) : "Duplicate values in table.";
 303             return table;
 304         }
 305 
 306         private static boolean checkDuplicates(Immediate[] table) {
 307             for (int i = 0; i < table.length - 1; i++) {
 308                 if (table[i].imm >= table[i + 1].imm) {
 309                     return false;
 310                 }
 311             }
 312             return true;
 313         }
 314     }
 315 
 316     private static final int RdOffset = 0;
 317     private static final int Rs1Offset = 5;
 318     private static final int Rs2Offset = 16;
 319     private static final int Rs3Offset = 10;
 320     private static final int RtOffset = 0;
 321     private static final int RnOffset = 5;
 322     private static final int Rt2Offset = 10;
 323 
 324     /* Helper functions */
 325     private static int rd(Register reg) {
 326         return reg.encoding << RdOffset;
 327     }
 328 
 329     private static int rs1(Register reg) {
 330         return reg.encoding << Rs1Offset;
 331     }
 332 
 333     private static int rs2(Register reg) {
 334         return reg.encoding << Rs2Offset;
 335     }
 336 
 337     private static int rs3(Register reg) {
 338         return reg.encoding << Rs3Offset;
 339     }
 340 
 341     private static int rt(Register reg) {
 342         return reg.encoding << RtOffset;
 343     }
 344 
 345     private static int rt2(Register reg) {
 346         return reg.encoding << Rt2Offset;
 347     }
 348 
 349     private static int rn(Register reg) {
 350         return reg.encoding << RnOffset;
 351     }
 352 
 353     private static int maskField(int sizeInBits, int n) {
 354         assert NumUtil.isSignedNbit(sizeInBits, n);
 355         return n & NumUtil.getNbitNumberInt(sizeInBits);
 356     }
 357 
 358     /**
 359      * Enumeration of all different instruction kinds: General32/64 are the general instructions
 360      * (integer, branch, etc.), for 32-, respectively 64-bit operands. FP32/64 is the encoding for
 361      * the 32/64bit float operations
 362      */
 363     protected enum InstructionType {
 364         General32(0b00 << 30, 32, true),
 365         General64(0b10 << 30, 64, true),
 366         FP32(0x00000000, 32, false),
 367         FP64(0x00400000, 64, false);
 368 
 369         public final int encoding;
 370         public final int width;
 371         public final boolean isGeneral;
 372 
 373         InstructionType(int encoding, int width, boolean isGeneral) {
 374             this.encoding = encoding;
 375             this.width = width;
 376             this.isGeneral = isGeneral;
 377         }
 378 
 379         public static InstructionType generalFromSize(int size) {
 380             assert size == 32 || size == 64;
 381             return size == 32 ? General32 : General64;
 382         }
 383 
 384         public static InstructionType floatFromSize(int size) {
 385             assert size == 32 || size == 64;
 386             return size == 32 ? FP32 : FP64;
 387         }
 388 
 389     }
 390 
 391     private static final int ImmediateOffset = 10;
 392     private static final int ImmediateRotateOffset = 16;
 393     private static final int ImmediateSizeOffset = 22;
 394     private static final int ExtendTypeOffset = 13;
 395 
 396     private static final int AddSubImmOp = 0x11000000;
 397     private static final int AddSubShift12 = 0b01 << 22;
 398     private static final int AddSubSetFlag = 0x20000000;
 399 
 400     private static final int LogicalImmOp = 0x12000000;
 401 
 402     private static final int MoveWideImmOp = 0x12800000;
 403     private static final int MoveWideImmOffset = 5;
 404     private static final int MoveWideShiftOffset = 21;
 405 
 406     private static final int BitfieldImmOp = 0x13000000;
 407 
 408     private static final int AddSubShiftedOp = 0x0B000000;
 409     private static final int ShiftTypeOffset = 22;
 410 
 411     private static final int AddSubExtendedOp = 0x0B200000;
 412 
 413     private static final int MulOp = 0x1B000000;
 414     private static final int DataProcessing1SourceOp = 0x5AC00000;
 415     private static final int DataProcessing2SourceOp = 0x1AC00000;
 416 
 417     private static final int Fp1SourceOp = 0x1E204000;
 418     private static final int Fp2SourceOp = 0x1E200800;
 419     private static final int Fp3SourceOp = 0x1F000000;
 420 
 421     private static final int FpConvertOp = 0x1E200000;
 422     private static final int FpImmOp = 0x1E201000;
 423     private static final int FpImmOffset = 13;
 424 
 425     private static final int FpCmpOp = 0x1E202000;
 426 
 427     private static final int PcRelImmHiOffset = 5;
 428     private static final int PcRelImmLoOffset = 29;
 429 
 430     private static final int PcRelImmOp = 0x10000000;
 431 
 432     private static final int UnconditionalBranchImmOp = 0x14000000;
 433     private static final int UnconditionalBranchRegOp = 0xD6000000;
 434     private static final int CompareBranchOp = 0x34000000;
 435 
 436     private static final int ConditionalBranchImmOffset = 5;
 437 
 438     private static final int ConditionalSelectOp = 0x1A800000;
 439     private static final int ConditionalConditionOffset = 12;
 440 
 441     private static final int LoadStoreScaledOp = 0b111_0_01_00 << 22;
 442     private static final int LoadStoreUnscaledOp = 0b111_0_00_00 << 22;
 443 
 444     private static final int LoadStoreRegisterOp = 0b111_0_00_00_1 << 21 | 0b10 << 10;
 445 
 446     private static final int LoadLiteralOp = 0x18000000;
 447 
 448     private static final int LoadStorePostIndexedOp = 0b111_0_00_00_0 << 21 | 0b01 << 10;
 449     private static final int LoadStorePreIndexedOp = 0b111_0_00_00_0 << 21 | 0b11 << 10;
 450 
 451     private static final int LoadStoreUnscaledImmOffset = 12;
 452     private static final int LoadStoreScaledImmOffset = 10;
 453     private static final int LoadStoreScaledRegOffset = 12;
 454     private static final int LoadStoreIndexedImmOffset = 12;
 455     private static final int LoadStoreTransferSizeOffset = 30;
 456     private static final int LoadStoreFpFlagOffset = 26;
 457     private static final int LoadLiteralImmeOffset = 5;
 458 
 459     private static final int LoadStorePairOp = 0b101_0 << 26;
 460     @SuppressWarnings("unused") private static final int LoadStorePairPostIndexOp = 0b101_0_001 << 23;
 461     @SuppressWarnings("unused") private static final int LoadStorePairPreIndexOp = 0b101_0_011 << 23;
 462     private static final int LoadStorePairImm7Offset = 15;
 463 
 464     private static final int LogicalShiftOp = 0x0A000000;
 465 
 466     private static final int ExceptionOp = 0xD4000000;
 467     private static final int SystemImmediateOffset = 5;
 468 
 469     @SuppressWarnings("unused") private static final int SimdImmediateOffset = 16;
 470 
 471     private static final int BarrierOp = 0xD503301F;
 472     private static final int BarrierKindOffset = 8;
 473 
 474     /**
 475      * Encoding for all instructions.
 476      */
 477     public enum Instruction {
 478         BCOND(0x54000000),
 479         CBNZ(0x01000000),
 480         CBZ(0x00000000),
 481         TBZ(0x36000000),
 482         TBNZ(0x37000000),
 483 
 484         B(0x00000000),
 485         BL(0x80000000),
 486         BR(0x001F0000),
 487         BLR(0x003F0000),
 488         RET(0x005F0000),
 489 
 490         LDR(0x00000000),
 491         LDRS(0x00800000),
 492         LDXR(0x081f7c00),
 493         LDAR(0x8dffc00),
 494         LDAXR(0x85ffc00),
 495 
 496         STR(0x00000000),
 497         STXR(0x08007c00),
 498         STLR(0x089ffc00),
 499         STLXR(0x0800fc00),
 500 
 501         LDP(0b1 << 22),
 502         STP(0b0 << 22),
 503 
 504         ADR(0x00000000),
 505         ADRP(0x80000000),
 506 
 507         ADD(0x00000000),
 508         ADDS(ADD.encoding | AddSubSetFlag),
 509         SUB(0x40000000),
 510         SUBS(SUB.encoding | AddSubSetFlag),
 511 
 512         NOT(0x00200000),
 513         AND(0x00000000),
 514         BIC(AND.encoding | NOT.encoding),
 515         ORR(0x20000000),
 516         ORN(ORR.encoding | NOT.encoding),
 517         EOR(0x40000000),
 518         EON(EOR.encoding | NOT.encoding),
 519         ANDS(0x60000000),
 520         BICS(ANDS.encoding | NOT.encoding),
 521 
 522         ASRV(0x00002800),
 523         RORV(0x00002C00),
 524         LSRV(0x00002400),
 525         LSLV(0x00002000),
 526 
 527         CLS(0x00001400),
 528         CLZ(0x00001000),
 529         RBIT(0x00000000),
 530         REVX(0x00000C00),
 531         REVW(0x00000800),
 532 
 533         MOVN(0x00000000),
 534         MOVZ(0x40000000),
 535         MOVK(0x60000000),
 536 
 537         CSEL(0x00000000),
 538         CSNEG(0x40000400),
 539         CSINC(0x00000400),
 540 
 541         BFM(0x20000000),
 542         SBFM(0x00000000),
 543         UBFM(0x40000000),
 544         EXTR(0x13800000),
 545 
 546         MADD(0x00000000),
 547         MSUB(0x00008000),
 548         SDIV(0x00000C00),
 549         UDIV(0x00000800),
 550 
 551         FMOV(0x00000000),
 552         FMOVCPU2FPU(0x00070000),
 553         FMOVFPU2CPU(0x00060000),
 554 
 555         FCVTDS(0x00028000),
 556         FCVTSD(0x00020000),
 557 
 558         FCVTZS(0x00180000),
 559         SCVTF(0x00020000),
 560 
 561         FABS(0x00008000),
 562         FSQRT(0x00018000),
 563         FNEG(0x00010000),
 564 
 565         FRINTZ(0x00058000),
 566 
 567         FADD(0x00002000),
 568         FSUB(0x00003000),
 569         FMUL(0x00000000),
 570         FDIV(0x00001000),
 571         FMAX(0x00004000),
 572         FMIN(0x00005000),
 573 
 574         FMADD(0x00000000),
 575         FMSUB(0x00008000),
 576 
 577         FCMP(0x00000000),
 578         FCMPZERO(0x00000008),
 579         FCCMP(0x1E200400),
 580         FCSEL(0x1E200C00),
 581 
 582         INS(0x4e081c00),
 583         UMOV(0x4e083c00),
 584 
 585         CNT(0xe205800),
 586         USRA(0x6f001400),
 587 
 588         HLT(0x00400000),
 589         BRK(0x00200000),
 590 
 591         CLREX(0xd5033f5f),
 592         HINT(0xD503201F),
 593         DMB(0x000000A0),
 594 
 595         BLR_NATIVE(0xc0000000);
 596 
 597         public final int encoding;
 598 
 599         Instruction(int encoding) {
 600             this.encoding = encoding;
 601         }
 602 
 603     }
 604 
 605     public enum ShiftType {
 606         LSL(0),
 607         LSR(1),
 608         ASR(2),
 609         ROR(3);
 610 
 611         public final int encoding;
 612 
 613         ShiftType(int encoding) {
 614             this.encoding = encoding;
 615         }
 616     }
 617 
 618     public enum ExtendType {
 619         UXTB(0),
 620         UXTH(1),
 621         UXTW(2),
 622         UXTX(3),
 623         SXTB(4),
 624         SXTH(5),
 625         SXTW(6),
 626         SXTX(7);
 627 
 628         public final int encoding;
 629 
 630         ExtendType(int encoding) {
 631             this.encoding = encoding;
 632         }
 633     }
 634 
 635     /**
 636      * Condition Flags for branches. See 4.3
 637      */
 638     public enum ConditionFlag {
 639         // Integer | Floating-point meanings
 640         /** Equal | Equal. */
 641         EQ(0x0),
 642 
 643         /** Not Equal | Not equal or unordered. */
 644         NE(0x1),
 645 
 646         /** Unsigned Higher or Same | Greater than, equal or unordered. */
 647         HS(0x2),
 648 
 649         /** Unsigned lower | less than. */
 650         LO(0x3),
 651 
 652         /** Minus (negative) | less than. */
 653         MI(0x4),
 654 
 655         /** Plus (positive or zero) | greater than, equal or unordered. */
 656         PL(0x5),
 657 
 658         /** Overflow set | unordered. */
 659         VS(0x6),
 660 
 661         /** Overflow clear | ordered. */
 662         VC(0x7),
 663 
 664         /** Unsigned higher | greater than or unordered. */
 665         HI(0x8),
 666 
 667         /** Unsigned lower or same | less than or equal. */
 668         LS(0x9),
 669 
 670         /** Signed greater than or equal | greater than or equal. */
 671         GE(0xA),
 672 
 673         /** Signed less than | less than or unordered. */
 674         LT(0xB),
 675 
 676         /** Signed greater than | greater than. */
 677         GT(0xC),
 678 
 679         /** Signed less than or equal | less than, equal or unordered. */
 680         LE(0xD),
 681 
 682         /** Always | always. */
 683         AL(0xE),
 684 
 685         /** Always | always (identical to AL, just to have valid 0b1111 encoding). */
 686         NV(0xF);
 687 
 688         public final int encoding;
 689 
 690         ConditionFlag(int encoding) {
 691             this.encoding = encoding;
 692         }
 693 
 694         /**
 695          * @return ConditionFlag specified by decoding.
 696          */
 697         public static ConditionFlag fromEncoding(int encoding) {
 698             return values()[encoding];
 699         }
 700 
 701         public ConditionFlag negate() {
 702             switch (this) {
 703                 case EQ:
 704                     return NE;
 705                 case NE:
 706                     return EQ;
 707                 case HS:
 708                     return LO;
 709                 case LO:
 710                     return HS;
 711                 case MI:
 712                     return PL;
 713                 case PL:
 714                     return MI;
 715                 case VS:
 716                     return VC;
 717                 case VC:
 718                     return VS;
 719                 case HI:
 720                     return LS;
 721                 case LS:
 722                     return HI;
 723                 case GE:
 724                     return LT;
 725                 case LT:
 726                     return GE;
 727                 case GT:
 728                     return LE;
 729                 case LE:
 730                     return GT;
 731                 case AL:
 732                 case NV:
 733                 default:
 734                     throw GraalError.shouldNotReachHere();
 735             }
 736         }
 737     }
 738 
 739     public AArch64Assembler(TargetDescription target) {
 740         super(target);
 741     }
 742 
 743     /* Conditional Branch (5.2.1) */
 744 
 745     /**
 746      * Branch conditionally.
 747      *
 748      * @param condition may not be null.
 749      * @param imm21 Signed 21-bit offset, has to be word aligned.
 750      */
 751     protected void b(ConditionFlag condition, int imm21) {
 752         b(condition, imm21, -1);
 753     }
 754 
 755     /**
 756      * Branch conditionally. Inserts instruction into code buffer at pos.
 757      *
 758      * @param condition may not be null.
 759      * @param imm21 Signed 21-bit offset, has to be word aligned.
 760      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 761      */
 762     protected void b(ConditionFlag condition, int imm21, int pos) {
 763         if (pos == -1) {
 764             emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding);
 765         } else {
 766             emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding, pos);
 767         }
 768     }
 769 
 770     /**
 771      * Compare register and branch if non-zero.
 772      *
 773      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 774      * @param size Instruction size in bits. Should be either 32 or 64.
 775      * @param imm21 Signed 21-bit offset, has to be word aligned.
 776      */
 777     protected void cbnz(int size, Register reg, int imm21) {
 778         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, -1);
 779     }
 780 
 781     /**
 782      * Compare register and branch if non-zero.
 783      *
 784      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 785      * @param size Instruction size in bits. Should be either 32 or 64.
 786      * @param imm21 Signed 21-bit offset, has to be word aligned.
 787      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 788      */
 789     protected void cbnz(int size, Register reg, int imm21, int pos) {
 790         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, pos);
 791     }
 792 
 793     /**
 794      * Compare and branch if zero.
 795      *
 796      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 797      * @param size Instruction size in bits. Should be either 32 or 64.
 798      * @param imm21 Signed 21-bit offset, has to be word aligned.
 799      */
 800     protected void cbz(int size, Register reg, int imm21) {
 801         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, -1);
 802     }
 803 
 804     /**
 805      * Compare register and branch if zero.
 806      *
 807      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 808      * @param size Instruction size in bits. Should be either 32 or 64.
 809      * @param imm21 Signed 21-bit offset, has to be word aligned.
 810      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 811      */
 812     protected void cbz(int size, Register reg, int imm21, int pos) {
 813         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, pos);
 814     }
 815 
 816     /**
 817      * Test a single bit and branch if the bit is nonzero.
 818      *
 819      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 820      * @param uimm6 Unsigned 6-bit bit index.
 821      * @param imm16 signed 16 bit offset
 822      */
 823     protected void tbnz(Register reg, int uimm6, int imm16) {
 824         tbnz(reg, uimm6, imm16, -1);
 825     }
 826 
 827     /**
 828      * Test a single bit and branch if the bit is zero.
 829      *
 830      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 831      * @param uimm6 Unsigned 6-bit bit index.
 832      * @param imm16 signed 16 bit offset
 833      */
 834     protected void tbz(Register reg, int uimm6, int imm16) {
 835         tbz(reg, uimm6, imm16, -1);
 836     }
 837 
 838     /**
 839      * Test a single bit and branch if the bit is nonzero.
 840      *
 841      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 842      * @param uimm6 Unsigned 6-bit bit index.
 843      * @param imm16 signed 16 bit offset
 844      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 845      */
 846     protected void tbnz(Register reg, int uimm6, int imm16, int pos) {
 847         assert reg.getRegisterCategory().equals(CPU);
 848         assert NumUtil.isUnsignedNbit(6, uimm6);
 849         assert NumUtil.isSignedNbit(18, imm16);
 850         assert (imm16 & 3) == 0;
 851         // size bit is overloaded as top bit of uimm6 bit index
 852         int size = (((uimm6 >> 5) & 1) == 0 ? 32 : 64);
 853         // remaining 5 bits are encoded lower down
 854         int uimm5 = uimm6 >> 1;
 855         int offset = (imm16 & NumUtil.getNbitNumberInt(16)) >> 2;
 856         InstructionType type = generalFromSize(size);
 857         int encoding = type.encoding | TBNZ.encoding | (uimm5 << 19) | (offset << 5) | rd(reg);
 858         if (pos == -1) {
 859             emitInt(encoding);
 860         } else {
 861             emitInt(encoding, pos);
 862         }
 863     }
 864 
 865     /**
 866      * Test a single bit and branch if the bit is zero.
 867      *
 868      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 869      * @param uimm6 Unsigned 6-bit bit index.
 870      * @param imm16 signed 16 bit offset
 871      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 872      */
 873     protected void tbz(Register reg, int uimm6, int imm16, int pos) {
 874         assert reg.getRegisterCategory().equals(CPU);
 875         assert NumUtil.isUnsignedNbit(6, uimm6);
 876         assert NumUtil.isSignedNbit(18, imm16);
 877         assert (imm16 & 3) == 0;
 878         // size bit is overloaded as top bit of uimm6 bit index
 879         int size = (((uimm6 >> 5) & 1) == 0 ? 32 : 64);
 880         // remaining 5 bits are encoded lower down
 881         int uimm5 = uimm6 >> 1;
 882         int offset = (imm16 & NumUtil.getNbitNumberInt(16)) >> 2;
 883         InstructionType type = generalFromSize(size);
 884         int encoding = type.encoding | TBZ.encoding | (uimm5 << 19) | (offset << 5) | rd(reg);
 885         if (pos == -1) {
 886             emitInt(encoding);
 887         } else {
 888             emitInt(encoding, pos);
 889         }
 890     }
 891 
 892     private void conditionalBranchInstruction(Register reg, int imm21, InstructionType type, Instruction instr, int pos) {
 893         assert reg.getRegisterCategory().equals(CPU);
 894         int instrEncoding = instr.encoding | CompareBranchOp;
 895         if (pos == -1) {
 896             emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg));
 897         } else {
 898             emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg), pos);
 899         }
 900     }
 901 
 902     private static int getConditionalBranchImm(int imm21) {
 903         assert NumUtil.isSignedNbit(21, imm21) && (imm21 & 0x3) == 0 : "Immediate has to be 21bit signed number and word aligned";
 904         int imm = (imm21 & NumUtil.getNbitNumberInt(21)) >> 2;
 905         return imm << ConditionalBranchImmOffset;
 906     }
 907 
 908     /* Unconditional Branch (immediate) (5.2.2) */
 909 
 910     /**
 911      * @param imm28 Signed 28-bit offset, has to be word aligned.
 912      */
 913     protected void b(int imm28) {
 914         unconditionalBranchImmInstruction(imm28, Instruction.B, -1);
 915     }
 916 
 917     /**
 918      *
 919      * @param imm28 Signed 28-bit offset, has to be word aligned.
 920      * @param pos Position where instruction is inserted into code buffer.
 921      */
 922     protected void b(int imm28, int pos) {
 923         unconditionalBranchImmInstruction(imm28, Instruction.B, pos);
 924     }
 925 
 926     /**
 927      * Branch and link return address to register X30.
 928      *
 929      * @param imm28 Signed 28-bit offset, has to be word aligned.
 930      */
 931     public void bl(int imm28) {
 932         unconditionalBranchImmInstruction(imm28, Instruction.BL, -1);
 933     }
 934 
 935     private void unconditionalBranchImmInstruction(int imm28, Instruction instr, int pos) {
 936         assert NumUtil.isSignedNbit(28, imm28) && (imm28 & 0x3) == 0 : "Immediate has to be 28bit signed number and word aligned";
 937         int imm = (imm28 & NumUtil.getNbitNumberInt(28)) >> 2;
 938         int instrEncoding = instr.encoding | UnconditionalBranchImmOp;
 939         if (pos == -1) {
 940             emitInt(instrEncoding | imm);
 941         } else {
 942             emitInt(instrEncoding | imm, pos);
 943         }
 944     }
 945 
 946     /* Unconditional Branch (register) (5.2.3) */
 947 
 948     /**
 949      * Branches to address in register and writes return address into register X30.
 950      *
 951      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 952      */
 953     public void blr(Register reg) {
 954         unconditionalBranchRegInstruction(BLR, reg);
 955     }
 956 
 957     /**
 958      * Branches to address in register.
 959      *
 960      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 961      */
 962     protected void br(Register reg) {
 963         unconditionalBranchRegInstruction(BR, reg);
 964     }
 965 
 966     /**
 967      * Return to address in register.
 968      *
 969      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 970      */
 971     public void ret(Register reg) {
 972         unconditionalBranchRegInstruction(RET, reg);
 973     }
 974 
 975     private void unconditionalBranchRegInstruction(Instruction instr, Register reg) {
 976         assert reg.getRegisterCategory().equals(CPU);
 977         assert !reg.equals(zr);
 978         assert !reg.equals(sp);
 979         emitInt(instr.encoding | UnconditionalBranchRegOp | rs1(reg));
 980     }
 981 
 982     /* Load-Store Single Register (5.3.1) */
 983 
 984     /**
 985      * Loads a srcSize value from address into rt zero-extending it.
 986      *
 987      * @param srcSize size of memory read in bits. Must be 8, 16, 32 or 64.
 988      * @param rt general purpose register. May not be null or stackpointer.
 989      * @param address all addressing modes allowed. May not be null.
 990      */
 991     public void ldr(int srcSize, Register rt, AArch64Address address) {
 992         assert rt.getRegisterCategory().equals(CPU);
 993         assert srcSize == 8 || srcSize == 16 || srcSize == 32 || srcSize == 64;
 994         int transferSize = NumUtil.log2Ceil(srcSize / 8);
 995         loadStoreInstruction(LDR, rt, address, General32, transferSize);
 996     }
 997 
 998     /**
 999      * Loads a srcSize value from address into rt sign-extending it.
1000      *
1001      * @param targetSize size of target register in bits. Must be 32 or 64.
1002      * @param srcSize size of memory read in bits. Must be 8, 16 or 32, but may not be equivalent to
1003      *            targetSize.
1004      * @param rt general purpose register. May not be null or stackpointer.
1005      * @param address all addressing modes allowed. May not be null.
1006      */
1007     protected void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
1008         assert rt.getRegisterCategory().equals(CPU);
1009         assert (srcSize == 8 || srcSize == 16 || srcSize == 32) && srcSize != targetSize;
1010         int transferSize = NumUtil.log2Ceil(srcSize / 8);
1011         loadStoreInstruction(LDRS, rt, address, generalFromSize(targetSize), transferSize);
1012     }
1013 
1014     public enum PrefetchMode {
1015         PLDL1KEEP(0b00000),
1016         PLDL1STRM(0b00001),
1017         PLDL2KEEP(0b00010),
1018         PLDL2STRM(0b00011),
1019         PLDL3KEEP(0b00100),
1020         PLDL3STRM(0b00101),
1021 
1022         PLIL1KEEP(0b01000),
1023         PLIL1STRM(0b01001),
1024         PLIL2KEEP(0b01010),
1025         PLIL2STRM(0b01011),
1026         PLIL3KEEP(0b01100),
1027         PLIL3STRM(0b01101),
1028 
1029         PSTL1KEEP(0b10000),
1030         PSTL1STRM(0b10001),
1031         PSTL2KEEP(0b10010),
1032         PSTL2STRM(0b10011),
1033         PSTL3KEEP(0b10100),
1034         PSTL3STRM(0b10101);
1035 
1036         private final int encoding;
1037 
1038         PrefetchMode(int encoding) {
1039             this.encoding = encoding;
1040         }
1041 
1042         private static PrefetchMode[] modes = {
1043                         PLDL1KEEP,
1044                         PLDL1STRM,
1045                         PLDL2KEEP,
1046                         PLDL2STRM,
1047                         PLDL3KEEP,
1048                         PLDL3STRM,
1049 
1050                         null,
1051                         null,
1052 
1053                         PLIL1KEEP,
1054                         PLIL1STRM,
1055                         PLIL2KEEP,
1056                         PLIL2STRM,
1057                         PLIL3KEEP,
1058                         PLIL3STRM,
1059 
1060                         null,
1061                         null,
1062 
1063                         PSTL1KEEP,
1064                         PSTL1STRM,
1065                         PSTL2KEEP,
1066                         PSTL2STRM,
1067                         PSTL3KEEP,
1068                         PSTL3STRM
1069         };
1070 
1071         public static PrefetchMode lookup(int enc) {
1072             assert enc >= 00 && enc < modes.length;
1073             return modes[enc];
1074         }
1075 
1076         public Register toRegister() {
1077             return cpuRegisters.get(encoding);
1078         }
1079     }
1080 
1081     /*
1082      * implements a prefetch at a 64-bit aligned address using a scaled 12 bit or unscaled 9 bit
1083      * displacement addressing mode
1084      *
1085      * @param rt general purpose register. May not be null, zr or stackpointer.
1086      *
1087      * @param address only displacement addressing modes allowed. May not be null.
1088      */
1089     public void prfm(AArch64Address address, PrefetchMode mode) {
1090         assert (address.getAddressingMode() == AddressingMode.IMMEDIATE_SCALED ||
1091                         address.getAddressingMode() == AddressingMode.IMMEDIATE_UNSCALED ||
1092                         address.getAddressingMode() == AddressingMode.REGISTER_OFFSET);
1093         assert mode != null;
1094         final int srcSize = 64;
1095         final int transferSize = NumUtil.log2Ceil(srcSize / 8);
1096         final Register rt = mode.toRegister();
1097         // this looks weird but that's because loadStoreInstruction is weird
1098         // instruction select fields are size [31:30], v [26] and opc [25:24]
1099         // prfm requires size == 0b11, v == 0b0 and opc == 0b11
1100         // passing LDRS ensures opc[1] == 0b1
1101         // (n.b. passing LDR/STR makes no difference to opc[1:0]!!)
1102         // passing General64 ensures opc[0] == 0b1 and v = 0b0
1103         // (n.b. passing General32 ensures opc[0] == 0b0 and v = 0b0)
1104         // srcSize 64 ensures size == 0b11
1105         loadStoreInstruction(LDRS, rt, address, General64, transferSize);
1106     }
1107 
1108     /**
1109      * Stores register rt into memory pointed by address.
1110      *
1111      * @param destSize number of bits written to memory. Must be 8, 16, 32 or 64.
1112      * @param rt general purpose register. May not be null or stackpointer.
1113      * @param address all addressing modes allowed. May not be null.
1114      */
1115     public void str(int destSize, Register rt, AArch64Address address) {
1116         assert rt.getRegisterCategory().equals(CPU);
1117         assert destSize == 8 || destSize == 16 || destSize == 32 || destSize == 64;
1118         int transferSize = NumUtil.log2Ceil(destSize / 8);
1119         loadStoreInstruction(STR, rt, address, General64, transferSize);
1120     }
1121 
1122     private void loadStoreInstruction(Instruction instr, Register reg, AArch64Address address, InstructionType type, int log2TransferSize) {
1123         assert log2TransferSize >= 0 && log2TransferSize < 4;
1124         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1125         int is32Bit = type.width == 32 ? 1 << ImmediateSizeOffset : 0;
1126         int isFloat = !type.isGeneral ? 1 << LoadStoreFpFlagOffset : 0;
1127         int memop = instr.encoding | transferSizeEncoding | is32Bit | isFloat | rt(reg);
1128         switch (address.getAddressingMode()) {
1129             case IMMEDIATE_SCALED:
1130                 emitInt(memop | LoadStoreScaledOp | address.getImmediate() << LoadStoreScaledImmOffset | rs1(address.getBase()));
1131                 break;
1132             case IMMEDIATE_UNSCALED:
1133                 emitInt(memop | LoadStoreUnscaledOp | address.getImmediate() << LoadStoreUnscaledImmOffset | rs1(address.getBase()));
1134                 break;
1135             case BASE_REGISTER_ONLY:
1136                 emitInt(memop | LoadStoreScaledOp | rs1(address.getBase()));
1137                 break;
1138             case EXTENDED_REGISTER_OFFSET:
1139             case REGISTER_OFFSET:
1140                 ExtendType extendType = address.getAddressingMode() == AddressingMode.EXTENDED_REGISTER_OFFSET ? address.getExtendType() : ExtendType.UXTX;
1141                 boolean shouldScale = address.isScaled() && log2TransferSize != 0;
1142                 emitInt(memop | LoadStoreRegisterOp | rs2(address.getOffset()) | extendType.encoding << ExtendTypeOffset | (shouldScale ? 1 : 0) << LoadStoreScaledRegOffset | rs1(address.getBase()));
1143                 break;
1144             case PC_LITERAL:
1145                 assert log2TransferSize >= 2 : "PC literal loads only works for load/stores of 32-bit and larger";
1146                 transferSizeEncoding = (log2TransferSize - 2) << LoadStoreTransferSizeOffset;
1147                 emitInt(transferSizeEncoding | isFloat | LoadLiteralOp | rd(reg) | address.getImmediate() << LoadLiteralImmeOffset);
1148                 break;
1149             case IMMEDIATE_POST_INDEXED:
1150                 emitInt(memop | LoadStorePostIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
1151                 break;
1152             case IMMEDIATE_PRE_INDEXED:
1153                 emitInt(memop | LoadStorePreIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
1154                 break;
1155             default:
1156                 throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
1157         }
1158     }
1159 
1160     /**
1161      * Load Pair of Registers calculates an address from a base register value and an immediate
1162      * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
1163      * two registers.
1164      */
1165     public void ldp(int size, Register rt, Register rt2, AArch64Address address) {
1166         assert size == 32 || size == 64;
1167         loadStorePairInstruction(LDP, rt, rt2, address, generalFromSize(size));
1168     }
1169 
1170     /**
1171      * Store Pair of Registers calculates an address from a base register value and an immediate
1172      * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
1173      * two registers.
1174      */
1175     public void stp(int size, Register rt, Register rt2, AArch64Address address) {
1176         assert size == 32 || size == 64;
1177         loadStorePairInstruction(STP, rt, rt2, address, generalFromSize(size));
1178     }
1179 
1180     private void loadStorePairInstruction(Instruction instr, Register rt, Register rt2, AArch64Address address, InstructionType type) {
1181         int scaledOffset = maskField(7, address.getImmediateRaw());  // LDP/STP use a 7-bit scaled
1182                                                                      // offset
1183         int memop = type.encoding | instr.encoding | scaledOffset << LoadStorePairImm7Offset | rt2(rt2) | rn(address.getBase()) | rt(rt);
1184         switch (address.getAddressingMode()) {
1185             case IMMEDIATE_SCALED:
1186                 emitInt(memop | LoadStorePairOp | (0b010 << 23));
1187                 break;
1188             case IMMEDIATE_POST_INDEXED:
1189                 emitInt(memop | LoadStorePairOp | (0b001 << 23));
1190                 break;
1191             case IMMEDIATE_PRE_INDEXED:
1192                 emitInt(memop | LoadStorePairOp | (0b011 << 23));
1193                 break;
1194             default:
1195                 throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
1196         }
1197     }
1198 
1199     /* Load-Store Exclusive (5.3.6) */
1200 
1201     /**
1202      * Load address exclusive. Natural alignment of address is required.
1203      *
1204      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1205      * @param rt general purpose register. May not be null or stackpointer.
1206      * @param rn general purpose register.
1207      */
1208     protected void ldxr(int size, Register rt, Register rn) {
1209         assert size == 8 || size == 16 || size == 32 || size == 64;
1210         int transferSize = NumUtil.log2Ceil(size / 8);
1211         exclusiveLoadInstruction(LDXR, rt, rn, transferSize);
1212     }
1213 
1214     /**
1215      * Store address exclusive. Natural alignment of address is required. rs and rt may not point to
1216      * the same register.
1217      *
1218      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1219      * @param rs general purpose register. Set to exclusive access status. 0 means success,
1220      *            everything else failure. May not be null, or stackpointer.
1221      * @param rt general purpose register. May not be null or stackpointer.
1222      * @param rn general purpose register.
1223      */
1224     protected void stxr(int size, Register rs, Register rt, Register rn) {
1225         assert size == 8 || size == 16 || size == 32 || size == 64;
1226         int transferSize = NumUtil.log2Ceil(size / 8);
1227         exclusiveStoreInstruction(STXR, rs, rt, rn, transferSize);
1228     }
1229 
1230     /* Load-Acquire/Store-Release (5.3.7) */
1231 
1232     /* non exclusive access */
1233     /**
1234      * Load acquire. Natural alignment of address is required.
1235      *
1236      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1237      * @param rt general purpose register. May not be null or stackpointer.
1238      * @param rn general purpose register.
1239      */
1240     protected void ldar(int size, Register rt, Register rn) {
1241         assert size == 8 || size == 16 || size == 32 || size == 64;
1242         int transferSize = NumUtil.log2Ceil(size / 8);
1243         exclusiveLoadInstruction(LDAR, rt, rn, transferSize);
1244     }
1245 
1246     /**
1247      * Store-release. Natural alignment of address is required.
1248      *
1249      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1250      * @param rt general purpose register. May not be null or stackpointer.
1251      * @param rn general purpose register.
1252      */
1253     protected void stlr(int size, Register rt, Register rn) {
1254         assert size == 8 || size == 16 || size == 32 || size == 64;
1255         int transferSize = NumUtil.log2Ceil(size / 8);
1256         // Hack: Passing the zero-register means it is ignored when building the encoding.
1257         exclusiveStoreInstruction(STLR, r0, rt, rn, transferSize);
1258     }
1259 
1260     /* exclusive access */
1261     /**
1262      * Load acquire exclusive. Natural alignment of address is required.
1263      *
1264      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1265      * @param rt general purpose register. May not be null or stackpointer.
1266      * @param rn general purpose register.
1267      */
1268     public void ldaxr(int size, Register rt, Register rn) {
1269         assert size == 8 || size == 16 || size == 32 || size == 64;
1270         int transferSize = NumUtil.log2Ceil(size / 8);
1271         exclusiveLoadInstruction(LDAXR, rt, rn, transferSize);
1272     }
1273 
1274     /**
1275      * Store-release exclusive. Natural alignment of address is required. rs and rt may not point to
1276      * the same register.
1277      *
1278      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1279      * @param rs general purpose register. Set to exclusive access status. 0 means success,
1280      *            everything else failure. May not be null, or stackpointer.
1281      * @param rt general purpose register. May not be null or stackpointer.
1282      * @param rn general purpose register.
1283      */
1284     public void stlxr(int size, Register rs, Register rt, Register rn) {
1285         assert size == 8 || size == 16 || size == 32 || size == 64;
1286         int transferSize = NumUtil.log2Ceil(size / 8);
1287         exclusiveStoreInstruction(STLXR, rs, rt, rn, transferSize);
1288     }
1289 
1290     private void exclusiveLoadInstruction(Instruction instr, Register reg, Register rn, int log2TransferSize) {
1291         assert log2TransferSize >= 0 && log2TransferSize < 4;
1292         assert reg.getRegisterCategory().equals(CPU);
1293         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1294         emitInt(transferSizeEncoding | instr.encoding | 1 << ImmediateSizeOffset | rn(rn) | rt(reg));
1295     }
1296 
1297     /**
1298      * Stores data from rt into address and sets rs to the returned exclusive access status.
1299      *
1300      * @param rs general purpose register into which the exclusive access status is written. May not
1301      *            be null.
1302      * @param rt general purpose register containing data to be written to memory at address. May
1303      *            not be null
1304      * @param rn general purpose register containing the address specifying where rt is written to.
1305      * @param log2TransferSize log2Ceil of memory transfer size.
1306      */
1307     private void exclusiveStoreInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize) {
1308         assert log2TransferSize >= 0 && log2TransferSize < 4;
1309         assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
1310         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1311         emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt));
1312     }
1313 
1314     /* PC-relative Address Calculation (5.4.4) */
1315 
1316     /**
1317      * Address of page: sign extends 21-bit offset, shifts if left by 12 and adds it to the value of
1318      * the PC with its bottom 12-bits cleared, writing the result to dst.
1319      *
1320      * @param dst general purpose register. May not be null, zero-register or stackpointer.
1321      * @param imm Signed 33-bit offset with lower 12bits clear.
1322      */
1323     // protected void adrp(Register dst, long imm) {
1324     // assert (imm & NumUtil.getNbitNumberInt(12)) == 0 : "Lower 12-bit of immediate must be zero.";
1325     // assert NumUtil.isSignedNbit(33, imm);
1326     // addressCalculationInstruction(dst, (int) (imm >>> 12), Instruction.ADRP);
1327     // }
1328 
1329     /**
1330      * Adds a 21-bit signed offset to the program counter and writes the result to dst.
1331      *
1332      * @param dst general purpose register. May not be null, zero-register or stackpointer.
1333      * @param imm21 Signed 21-bit offset.
1334      */
1335     public void adr(Register dst, int imm21) {
1336         emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21));
1337     }
1338 
1339     public void adr(Register dst, int imm21, int pos) {
1340         emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21), pos);
1341     }
1342 
1343     private static int getPcRelativeImmEncoding(int imm21) {
1344         assert NumUtil.isSignedNbit(21, imm21);
1345         int imm = imm21 & NumUtil.getNbitNumberInt(21);
1346         // higher 19 bit
1347         int immHi = (imm >> 2) << PcRelImmHiOffset;
1348         // lower 2 bit
1349         int immLo = (imm & 0x3) << PcRelImmLoOffset;
1350         return immHi | immLo;
1351     }
1352 
1353     /* Arithmetic (Immediate) (5.4.1) */
1354 
1355     /**
1356      * dst = src + aimm.
1357      *
1358      * @param size register size. Has to be 32 or 64.
1359      * @param dst general purpose register. May not be null or zero-register.
1360      * @param src general purpose register. May not be null or zero-register.
1361      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1362      *            the lower 12-bit cleared.
1363      */
1364     protected void add(int size, Register dst, Register src, int aimm) {
1365         assert !dst.equals(zr);
1366         assert !src.equals(zr);
1367         addSubImmInstruction(ADD, dst, src, aimm, generalFromSize(size));
1368     }
1369 
1370     /**
1371      * dst = src + aimm and sets condition flags.
1372      *
1373      * @param size register size. Has to be 32 or 64.
1374      * @param dst general purpose register. May not be null or stackpointer.
1375      * @param src general purpose register. May not be null or zero-register.
1376      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1377      *            the lower 12-bit cleared.
1378      */
1379     protected void adds(int size, Register dst, Register src, int aimm) {
1380         assert !dst.equals(sp);
1381         assert !src.equals(zr);
1382         addSubImmInstruction(ADDS, dst, src, aimm, generalFromSize(size));
1383     }
1384 
1385     /**
1386      * dst = src - aimm.
1387      *
1388      * @param size register size. Has to be 32 or 64.
1389      * @param dst general purpose register. May not be null or zero-register.
1390      * @param src general purpose register. May not be null or zero-register.
1391      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1392      *            the lower 12-bit cleared.
1393      */
1394     protected void sub(int size, Register dst, Register src, int aimm) {
1395         assert !dst.equals(zr);
1396         assert !src.equals(zr);
1397         addSubImmInstruction(SUB, dst, src, aimm, generalFromSize(size));
1398     }
1399 
1400     /**
1401      * dst = src - aimm and sets condition flags.
1402      *
1403      * @param size register size. Has to be 32 or 64.
1404      * @param dst general purpose register. May not be null or stackpointer.
1405      * @param src general purpose register. May not be null or zero-register.
1406      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1407      *            the lower 12-bit cleared.
1408      */
1409     protected void subs(int size, Register dst, Register src, int aimm) {
1410         assert !dst.equals(sp);
1411         assert !src.equals(zr);
1412         addSubImmInstruction(SUBS, dst, src, aimm, generalFromSize(size));
1413     }
1414 
1415     private void addSubImmInstruction(Instruction instr, Register dst, Register src, int aimm, InstructionType type) {
1416         emitInt(type.encoding | instr.encoding | AddSubImmOp | encodeAimm(aimm) | rd(dst) | rs1(src));
1417     }
1418 
1419     /**
1420      * Encodes arithmetic immediate.
1421      *
1422      * @param imm Immediate has to be either an unsigned 12-bit value or an unsigned 24-bit value
1423      *            with the lower 12 bits zero.
1424      * @return Representation of immediate for use with arithmetic instructions.
1425      */
1426     private static int encodeAimm(int imm) {
1427         assert isAimm(imm) : "Immediate has to be legal arithmetic immediate value " + imm;
1428         if (NumUtil.isUnsignedNbit(12, imm)) {
1429             return imm << ImmediateOffset;
1430         } else {
1431             // First 12-bit are zero, so shift immediate 12-bit and set flag to indicate
1432             // shifted immediate value.
1433             return (imm >>> 12 << ImmediateOffset) | AddSubShift12;
1434         }
1435     }
1436 
1437     /**
1438      * Checks whether immediate can be encoded as an arithmetic immediate.
1439      *
1440      * @param imm Immediate has to be either an unsigned 12bit value or un unsigned 24bit value with
1441      *            the lower 12 bits 0.
1442      * @return true if valid arithmetic immediate, false otherwise.
1443      */
1444     protected static boolean isAimm(int imm) {
1445         return NumUtil.isUnsignedNbit(12, imm) || NumUtil.isUnsignedNbit(12, imm >>> 12) && (imm & 0xfff) == 0;
1446     }
1447 
1448     /* Logical (immediate) (5.4.2) */
1449 
1450     /**
1451      * dst = src & bimm.
1452      *
1453      * @param size register size. Has to be 32 or 64.
1454      * @param dst general purpose register. May not be null or zero-register.
1455      * @param src general purpose register. May not be null or stack-pointer.
1456      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1457      */
1458     public void and(int size, Register dst, Register src, long bimm) {
1459         assert !dst.equals(zr);
1460         assert !src.equals(sp);
1461         logicalImmInstruction(AND, dst, src, bimm, generalFromSize(size));
1462     }
1463 
1464     /**
1465      * dst = src & bimm and sets condition flags.
1466      *
1467      * @param size register size. Has to be 32 or 64.
1468      * @param dst general purpose register. May not be null or stack-pointer.
1469      * @param src general purpose register. May not be null or stack-pointer.
1470      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1471      */
1472     public void ands(int size, Register dst, Register src, long bimm) {
1473         assert !dst.equals(sp);
1474         assert !src.equals(sp);
1475         logicalImmInstruction(ANDS, dst, src, bimm, generalFromSize(size));
1476     }
1477 
1478     /**
1479      * dst = src ^ bimm.
1480      *
1481      * @param size register size. Has to be 32 or 64.
1482      * @param dst general purpose register. May not be null or zero-register.
1483      * @param src general purpose register. May not be null or stack-pointer.
1484      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1485      */
1486     public void eor(int size, Register dst, Register src, long bimm) {
1487         assert !dst.equals(zr);
1488         assert !src.equals(sp);
1489         logicalImmInstruction(EOR, dst, src, bimm, generalFromSize(size));
1490     }
1491 
1492     /**
1493      * dst = src | bimm.
1494      *
1495      * @param size register size. Has to be 32 or 64.
1496      * @param dst general purpose register. May not be null or zero-register.
1497      * @param src general purpose register. May not be null or stack-pointer.
1498      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1499      */
1500     protected void orr(int size, Register dst, Register src, long bimm) {
1501         assert !dst.equals(zr);
1502         assert !src.equals(sp);
1503         logicalImmInstruction(ORR, dst, src, bimm, generalFromSize(size));
1504     }
1505 
1506     private void logicalImmInstruction(Instruction instr, Register dst, Register src, long bimm, InstructionType type) {
1507         // Mask higher bits off, since we always pass longs around even for the 32-bit instruction.
1508         long bimmValue;
1509         if (type == General32) {
1510             assert (bimm >> 32) == 0 || (bimm >> 32) == -1L : "Higher order bits for 32-bit instruction must either all be 0 or 1.";
1511             bimmValue = bimm & NumUtil.getNbitNumberLong(32);
1512         } else {
1513             bimmValue = bimm;
1514         }
1515         int immEncoding = LogicalImmediateTable.getLogicalImmEncoding(type == General64, bimmValue);
1516         emitInt(type.encoding | instr.encoding | LogicalImmOp | immEncoding | rd(dst) | rs1(src));
1517     }
1518 
1519     /* Move (wide immediate) (5.4.3) */
1520 
1521     /**
1522      * dst = uimm16 << shiftAmt.
1523      *
1524      * @param size register size. Has to be 32 or 64.
1525      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1526      * @param uimm16 16-bit unsigned immediate
1527      * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
1528      *            than size.
1529      */
1530     protected void movz(int size, Register dst, int uimm16, int shiftAmt) {
1531         moveWideImmInstruction(MOVZ, dst, uimm16, shiftAmt, generalFromSize(size));
1532     }
1533 
1534     /**
1535      * dst = ~(uimm16 << shiftAmt).
1536      *
1537      * @param size register size. Has to be 32 or 64.
1538      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1539      * @param uimm16 16-bit unsigned immediate
1540      * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
1541      *            than size.
1542      */
1543     protected void movn(int size, Register dst, int uimm16, int shiftAmt) {
1544         moveWideImmInstruction(MOVN, dst, uimm16, shiftAmt, generalFromSize(size));
1545     }
1546 
1547     /**
1548      * dst<pos+15:pos> = uimm16.
1549      *
1550      * @param size register size. Has to be 32 or 64.
1551      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1552      * @param uimm16 16-bit unsigned immediate
1553      * @param pos position into which uimm16 is inserted. Can be any multiple of 16 smaller than
1554      *            size.
1555      */
1556     protected void movk(int size, Register dst, int uimm16, int pos) {
1557         moveWideImmInstruction(MOVK, dst, uimm16, pos, generalFromSize(size));
1558     }
1559 
1560     private void moveWideImmInstruction(Instruction instr, Register dst, int uimm16, int shiftAmt, InstructionType type) {
1561         assert dst.getRegisterCategory().equals(CPU);
1562         assert NumUtil.isUnsignedNbit(16, uimm16) : "Immediate has to be unsigned 16bit";
1563         assert shiftAmt == 0 || shiftAmt == 16 || (type == InstructionType.General64 && (shiftAmt == 32 || shiftAmt == 48)) : "Invalid shift amount: " + shiftAmt;
1564         int shiftValue = shiftAmt >> 4;
1565         emitInt(type.encoding | instr.encoding | MoveWideImmOp | rd(dst) | uimm16 << MoveWideImmOffset | shiftValue << MoveWideShiftOffset);
1566     }
1567 
1568     /* Bitfield Operations (5.4.5) */
1569 
1570     /**
1571      * Bitfield move.
1572      *
1573      * @param size register size. Has to be 32 or 64.
1574      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1575      * @param src general purpose register. May not be null, stackpointer or zero-register.
1576      * @param r must be in the range 0 to size - 1
1577      * @param s must be in the range 0 to size - 1
1578      */
1579     protected void bfm(int size, Register dst, Register src, int r, int s) {
1580         bitfieldInstruction(BFM, dst, src, r, s, generalFromSize(size));
1581     }
1582 
1583     /**
1584      * Unsigned bitfield move.
1585      *
1586      * @param size register size. Has to be 32 or 64.
1587      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1588      * @param src general purpose register. May not be null, stackpointer or zero-register.
1589      * @param r must be in the range 0 to size - 1
1590      * @param s must be in the range 0 to size - 1
1591      */
1592     protected void ubfm(int size, Register dst, Register src, int r, int s) {
1593         bitfieldInstruction(UBFM, dst, src, r, s, generalFromSize(size));
1594     }
1595 
1596     /**
1597      * Signed bitfield move.
1598      *
1599      * @param size register size. Has to be 32 or 64.
1600      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1601      * @param src general purpose register. May not be null, stackpointer or zero-register.
1602      * @param r must be in the range 0 to size - 1
1603      * @param s must be in the range 0 to size - 1
1604      */
1605     protected void sbfm(int size, Register dst, Register src, int r, int s) {
1606         bitfieldInstruction(SBFM, dst, src, r, s, generalFromSize(size));
1607     }
1608 
1609     private void bitfieldInstruction(Instruction instr, Register dst, Register src, int r, int s, InstructionType type) {
1610         assert !dst.equals(sp) && !dst.equals(zr);
1611         assert !src.equals(sp) && !src.equals(zr);
1612         assert s >= 0 && s < type.width && r >= 0 && r < type.width;
1613         int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
1614         emitInt(type.encoding | instr.encoding | BitfieldImmOp | sf | r << ImmediateRotateOffset | s << ImmediateOffset | rd(dst) | rs1(src));
1615     }
1616 
1617     /* Extract (Immediate) (5.4.6) */
1618 
1619     /**
1620      * Extract. dst = src1:src2<lsb+31:lsb>
1621      *
1622      * @param size register size. Has to be 32 or 64.
1623      * @param dst general purpose register. May not be null or stackpointer.
1624      * @param src1 general purpose register. May not be null or stackpointer.
1625      * @param src2 general purpose register. May not be null or stackpointer.
1626      * @param lsb must be in range 0 to size - 1.
1627      */
1628     protected void extr(int size, Register dst, Register src1, Register src2, int lsb) {
1629         assert !dst.equals(sp);
1630         assert !src1.equals(sp);
1631         assert !src2.equals(sp);
1632         InstructionType type = generalFromSize(size);
1633         assert lsb >= 0 && lsb < type.width;
1634         int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
1635         emitInt(type.encoding | EXTR.encoding | sf | lsb << ImmediateOffset | rd(dst) | rs1(src1) | rs2(src2));
1636     }
1637 
1638     /* Arithmetic (shifted register) (5.5.1) */
1639 
1640     /**
1641      * dst = src1 + shiftType(src2, imm).
1642      *
1643      * @param size register size. Has to be 32 or 64.
1644      * @param dst general purpose register. May not be null or stackpointer.
1645      * @param src1 general purpose register. May not be null or stackpointer.
1646      * @param src2 general purpose register. May not be null or stackpointer.
1647      * @param shiftType any type but ROR.
1648      * @param imm must be in range 0 to size - 1.
1649      */
1650     protected void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1651         addSubShiftedInstruction(ADD, dst, src1, src2, shiftType, imm, generalFromSize(size));
1652     }
1653 
1654     /**
1655      * dst = src1 + shiftType(src2, imm) and sets condition flags.
1656      *
1657      * @param size register size. Has to be 32 or 64.
1658      * @param dst general purpose register. May not be null or stackpointer.
1659      * @param src1 general purpose register. May not be null or stackpointer.
1660      * @param src2 general purpose register. May not be null or stackpointer.
1661      * @param shiftType any type but ROR.
1662      * @param imm must be in range 0 to size - 1.
1663      */
1664     public void adds(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1665         addSubShiftedInstruction(ADDS, dst, src1, src2, shiftType, imm, generalFromSize(size));
1666     }
1667 
1668     /**
1669      * dst = src1 - shiftType(src2, imm).
1670      *
1671      * @param size register size. Has to be 32 or 64.
1672      * @param dst general purpose register. May not be null or stackpointer.
1673      * @param src1 general purpose register. May not be null or stackpointer.
1674      * @param src2 general purpose register. May not be null or stackpointer.
1675      * @param shiftType any type but ROR.
1676      * @param imm must be in range 0 to size - 1.
1677      */
1678     protected void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1679         addSubShiftedInstruction(SUB, dst, src1, src2, shiftType, imm, generalFromSize(size));
1680     }
1681 
1682     /**
1683      * dst = src1 - shiftType(src2, imm) and sets condition flags.
1684      *
1685      * @param size register size. Has to be 32 or 64.
1686      * @param dst general purpose register. May not be null or stackpointer.
1687      * @param src1 general purpose register. May not be null or stackpointer.
1688      * @param src2 general purpose register. May not be null or stackpointer.
1689      * @param shiftType any type but ROR.
1690      * @param imm must be in range 0 to size - 1.
1691      */
1692     public void subs(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1693         addSubShiftedInstruction(SUBS, dst, src1, src2, shiftType, imm, generalFromSize(size));
1694     }
1695 
1696     private void addSubShiftedInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int imm, InstructionType type) {
1697         assert shiftType != ShiftType.ROR;
1698         assert imm >= 0 && imm < type.width;
1699         emitInt(type.encoding | instr.encoding | AddSubShiftedOp | imm << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1700     }
1701 
1702     /* Arithmetic (extended register) (5.5.2) */
1703     /**
1704      * dst = src1 + extendType(src2) << imm.
1705      *
1706      * @param size register size. Has to be 32 or 64.
1707      * @param dst general purpose register. May not be null or zero-register..
1708      * @param src1 general purpose register. May not be null or zero-register.
1709      * @param src2 general purpose register. May not be null or stackpointer.
1710      * @param extendType defines how src2 is extended to the same size as src1.
1711      * @param shiftAmt must be in range 0 to 4.
1712      */
1713     public void add(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1714         assert !dst.equals(zr);
1715         assert !src1.equals(zr);
1716         assert !src2.equals(sp);
1717         addSubExtendedInstruction(ADD, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1718     }
1719 
1720     /**
1721      * dst = src1 + extendType(src2) << imm and sets condition flags.
1722      *
1723      * @param size register size. Has to be 32 or 64.
1724      * @param dst general purpose register. May not be null or stackpointer..
1725      * @param src1 general purpose register. May not be null or zero-register.
1726      * @param src2 general purpose register. May not be null or stackpointer.
1727      * @param extendType defines how src2 is extended to the same size as src1.
1728      * @param shiftAmt must be in range 0 to 4.
1729      */
1730     protected void adds(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1731         assert !dst.equals(sp);
1732         assert !src1.equals(zr);
1733         assert !src2.equals(sp);
1734         addSubExtendedInstruction(ADDS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1735     }
1736 
1737     /**
1738      * dst = src1 - extendType(src2) << imm.
1739      *
1740      * @param size register size. Has to be 32 or 64.
1741      * @param dst general purpose register. May not be null or zero-register..
1742      * @param src1 general purpose register. May not be null or zero-register.
1743      * @param src2 general purpose register. May not be null or stackpointer.
1744      * @param extendType defines how src2 is extended to the same size as src1.
1745      * @param shiftAmt must be in range 0 to 4.
1746      */
1747     protected void sub(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1748         assert !dst.equals(zr);
1749         assert !src1.equals(zr);
1750         assert !src2.equals(sp);
1751         addSubExtendedInstruction(SUB, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1752     }
1753 
1754     /**
1755      * dst = src1 - extendType(src2) << imm and sets flags.
1756      *
1757      * @param size register size. Has to be 32 or 64.
1758      * @param dst general purpose register. May not be null or stackpointer..
1759      * @param src1 general purpose register. May not be null or zero-register.
1760      * @param src2 general purpose register. May not be null or stackpointer.
1761      * @param extendType defines how src2 is extended to the same size as src1.
1762      * @param shiftAmt must be in range 0 to 4.
1763      */
1764     public void subs(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1765         assert !dst.equals(sp);
1766         assert !src1.equals(zr);
1767         assert !src2.equals(sp);
1768         addSubExtendedInstruction(SUBS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1769     }
1770 
1771     private void addSubExtendedInstruction(Instruction instr, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt, InstructionType type) {
1772         assert shiftAmt >= 0 && shiftAmt <= 4;
1773         emitInt(type.encoding | instr.encoding | AddSubExtendedOp | shiftAmt << ImmediateOffset | extendType.encoding << ExtendTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1774     }
1775 
1776     /* Logical (shifted register) (5.5.3) */
1777     /**
1778      * dst = src1 & shiftType(src2, imm).
1779      *
1780      * @param size register size. Has to be 32 or 64.
1781      * @param dst general purpose register. May not be null or stackpointer.
1782      * @param src1 general purpose register. May not be null or stackpointer.
1783      * @param src2 general purpose register. May not be null or stackpointer.
1784      * @param shiftType all types allowed, may not be null.
1785      * @param shiftAmt must be in range 0 to size - 1.
1786      */
1787     protected void and(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1788         logicalRegInstruction(AND, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1789     }
1790 
1791     /**
1792      * dst = src1 & shiftType(src2, imm) and sets condition flags.
1793      *
1794      * @param size register size. Has to be 32 or 64.
1795      * @param dst general purpose register. May not be null or stackpointer.
1796      * @param src1 general purpose register. May not be null or stackpointer.
1797      * @param src2 general purpose register. May not be null or stackpointer.
1798      * @param shiftType all types allowed, may not be null.
1799      * @param shiftAmt must be in range 0 to size - 1.
1800      */
1801     protected void ands(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1802         logicalRegInstruction(ANDS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1803     }
1804 
1805     /**
1806      * dst = src1 & ~(shiftType(src2, imm)).
1807      *
1808      * @param size register size. Has to be 32 or 64.
1809      * @param dst general purpose register. May not be null or stackpointer.
1810      * @param src1 general purpose register. May not be null or stackpointer.
1811      * @param src2 general purpose register. May not be null or stackpointer.
1812      * @param shiftType all types allowed, may not be null.
1813      * @param shiftAmt must be in range 0 to size - 1.
1814      */
1815     protected void bic(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1816         logicalRegInstruction(BIC, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1817     }
1818 
1819     /**
1820      * dst = src1 & ~(shiftType(src2, imm)) and sets condition flags.
1821      *
1822      * @param size register size. Has to be 32 or 64.
1823      * @param dst general purpose register. May not be null or stackpointer.
1824      * @param src1 general purpose register. May not be null or stackpointer.
1825      * @param src2 general purpose register. May not be null or stackpointer.
1826      * @param shiftType all types allowed, may not be null.
1827      * @param shiftAmt must be in range 0 to size - 1.
1828      */
1829     protected void bics(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1830         logicalRegInstruction(BICS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1831     }
1832 
1833     /**
1834      * dst = src1 ^ ~(shiftType(src2, imm)).
1835      *
1836      * @param size register size. Has to be 32 or 64.
1837      * @param dst general purpose register. May not be null or stackpointer.
1838      * @param src1 general purpose register. May not be null or stackpointer.
1839      * @param src2 general purpose register. May not be null or stackpointer.
1840      * @param shiftType all types allowed, may not be null.
1841      * @param shiftAmt must be in range 0 to size - 1.
1842      */
1843     protected void eon(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1844         logicalRegInstruction(EON, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1845     }
1846 
1847     /**
1848      * dst = src1 ^ shiftType(src2, imm).
1849      *
1850      * @param size register size. Has to be 32 or 64.
1851      * @param dst general purpose register. May not be null or stackpointer.
1852      * @param src1 general purpose register. May not be null or stackpointer.
1853      * @param src2 general purpose register. May not be null or stackpointer.
1854      * @param shiftType all types allowed, may not be null.
1855      * @param shiftAmt must be in range 0 to size - 1.
1856      */
1857     protected void eor(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1858         logicalRegInstruction(EOR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1859     }
1860 
1861     /**
1862      * dst = src1 | shiftType(src2, imm).
1863      *
1864      * @param size register size. Has to be 32 or 64.
1865      * @param dst general purpose register. May not be null or stackpointer.
1866      * @param src1 general purpose register. May not be null or stackpointer.
1867      * @param src2 general purpose register. May not be null or stackpointer.
1868      * @param shiftType all types allowed, may not be null.
1869      * @param shiftAmt must be in range 0 to size - 1.
1870      */
1871     protected void orr(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1872         logicalRegInstruction(ORR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1873     }
1874 
1875     /**
1876      * dst = src1 | ~(shiftType(src2, imm)).
1877      *
1878      * @param size register size. Has to be 32 or 64.
1879      * @param dst general purpose register. May not be null or stackpointer.
1880      * @param src1 general purpose register. May not be null or stackpointer.
1881      * @param src2 general purpose register. May not be null or stackpointer.
1882      * @param shiftType all types allowed, may not be null.
1883      * @param shiftAmt must be in range 0 to size - 1.
1884      */
1885     protected void orn(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1886         logicalRegInstruction(ORN, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1887     }
1888 
1889     private void logicalRegInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt, InstructionType type) {
1890         assert !dst.equals(sp);
1891         assert !src1.equals(sp);
1892         assert !src2.equals(sp);
1893         assert shiftAmt >= 0 && shiftAmt < type.width;
1894         emitInt(type.encoding | instr.encoding | LogicalShiftOp | shiftAmt << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1895     }
1896 
1897     /* Variable Shift (5.5.4) */
1898     /**
1899      * dst = src1 >> (src2 & log2(size)).
1900      *
1901      * @param size register size. Has to be 32 or 64.
1902      * @param dst general purpose register. May not be null or stackpointer.
1903      * @param src1 general purpose register. May not be null or stackpointer.
1904      * @param src2 general purpose register. May not be null or stackpointer.
1905      */
1906     protected void asr(int size, Register dst, Register src1, Register src2) {
1907         dataProcessing2SourceOp(ASRV, dst, src1, src2, generalFromSize(size));
1908     }
1909 
1910     /**
1911      * dst = src1 << (src2 & log2(size)).
1912      *
1913      * @param size register size. Has to be 32 or 64.
1914      * @param dst general purpose register. May not be null or stackpointer.
1915      * @param src1 general purpose register. May not be null or stackpointer.
1916      * @param src2 general purpose register. May not be null or stackpointer.
1917      */
1918     protected void lsl(int size, Register dst, Register src1, Register src2) {
1919         dataProcessing2SourceOp(LSLV, dst, src1, src2, generalFromSize(size));
1920     }
1921 
1922     /**
1923      * dst = src1 >>> (src2 & log2(size)).
1924      *
1925      * @param size register size. Has to be 32 or 64.
1926      * @param dst general purpose register. May not be null or stackpointer.
1927      * @param src1 general purpose register. May not be null or stackpointer.
1928      * @param src2 general purpose register. May not be null or stackpointer.
1929      */
1930     protected void lsr(int size, Register dst, Register src1, Register src2) {
1931         dataProcessing2SourceOp(LSRV, dst, src1, src2, generalFromSize(size));
1932     }
1933 
1934     /**
1935      * dst = rotateRight(src1, (src2 & log2(size))).
1936      *
1937      * @param size register size. Has to be 32 or 64.
1938      * @param dst general purpose register. May not be null or stackpointer.
1939      * @param src1 general purpose register. May not be null or stackpointer.
1940      * @param src2 general purpose register. May not be null or stackpointer.
1941      */
1942     protected void ror(int size, Register dst, Register src1, Register src2) {
1943         dataProcessing2SourceOp(RORV, dst, src1, src2, generalFromSize(size));
1944     }
1945 
1946     /* Bit Operations (5.5.5) */
1947 
1948     /**
1949      * Counts leading sign bits. Sets Wd to the number of consecutive bits following the topmost bit
1950      * in dst, that are the same as the topmost bit. The count does not include the topmost bit
1951      * itself , so the result will be in the range 0 to size-1 inclusive.
1952      *
1953      * @param size register size. Has to be 32 or 64.
1954      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1955      * @param src source register. May not be null, zero-register or the stackpointer.
1956      */
1957     protected void cls(int size, Register dst, Register src) {
1958         dataProcessing1SourceOp(CLS, dst, src, generalFromSize(size));
1959     }
1960 
1961     /**
1962      * Counts leading zeros.
1963      *
1964      * @param size register size. Has to be 32 or 64.
1965      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1966      * @param src source register. May not be null, zero-register or the stackpointer.
1967      */
1968     public void clz(int size, Register dst, Register src) {
1969         dataProcessing1SourceOp(CLZ, dst, src, generalFromSize(size));
1970     }
1971 
1972     /**
1973      * Reverses bits.
1974      *
1975      * @param size register size. Has to be 32 or 64.
1976      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1977      * @param src source register. May not be null, zero-register or the stackpointer.
1978      */
1979     public void rbit(int size, Register dst, Register src) {
1980         dataProcessing1SourceOp(RBIT, dst, src, generalFromSize(size));
1981     }
1982 
1983     /**
1984      * Reverses bytes.
1985      *
1986      * @param size register size. Has to be 32 or 64.
1987      * @param dst general purpose register. May not be null or the stackpointer.
1988      * @param src source register. May not be null or the stackpointer.
1989      */
1990     public void rev(int size, Register dst, Register src) {
1991         if (size == 64) {
1992             dataProcessing1SourceOp(REVX, dst, src, generalFromSize(size));
1993         } else {
1994             assert size == 32;
1995             dataProcessing1SourceOp(REVW, dst, src, generalFromSize(size));
1996         }
1997     }
1998 
1999     /* Conditional Data Processing (5.5.6) */
2000 
2001     /**
2002      * Conditional select. dst = src1 if condition else src2.
2003      *
2004      * @param size register size. Has to be 32 or 64.
2005      * @param dst general purpose register. May not be null or the stackpointer.
2006      * @param src1 general purpose register. May not be null or the stackpointer.
2007      * @param src2 general purpose register. May not be null or the stackpointer.
2008      * @param condition any condition flag. May not be null.
2009      */
2010     protected void csel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2011         conditionalSelectInstruction(CSEL, dst, src1, src2, condition, generalFromSize(size));
2012     }
2013 
2014     /**
2015      * Conditional select negate. dst = src1 if condition else -src2.
2016      *
2017      * @param size register size. Has to be 32 or 64.
2018      * @param dst general purpose register. May not be null or the stackpointer.
2019      * @param src1 general purpose register. May not be null or the stackpointer.
2020      * @param src2 general purpose register. May not be null or the stackpointer.
2021      * @param condition any condition flag. May not be null.
2022      */
2023     protected void csneg(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2024         conditionalSelectInstruction(CSNEG, dst, src1, src2, condition, generalFromSize(size));
2025     }
2026 
2027     /**
2028      * Conditional increase. dst = src1 if condition else src2 + 1.
2029      *
2030      * @param size register size. Has to be 32 or 64.
2031      * @param dst general purpose register. May not be null or the stackpointer.
2032      * @param src1 general purpose register. May not be null or the stackpointer.
2033      * @param src2 general purpose register. May not be null or the stackpointer.
2034      * @param condition any condition flag. May not be null.
2035      */
2036     protected void csinc(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2037         conditionalSelectInstruction(CSINC, dst, src1, src2, condition, generalFromSize(size));
2038     }
2039 
2040     private void conditionalSelectInstruction(Instruction instr, Register dst, Register src1, Register src2, ConditionFlag condition, InstructionType type) {
2041         assert !dst.equals(sp);
2042         assert !src1.equals(sp);
2043         assert !src2.equals(sp);
2044         emitInt(type.encoding | instr.encoding | ConditionalSelectOp | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
2045     }
2046 
2047     /* Integer Multiply/Divide (5.6) */
2048 
2049     /**
2050      * dst = src1 * src2 + src3.
2051      *
2052      * @param size register size. Has to be 32 or 64.
2053      * @param dst general purpose register. May not be null or the stackpointer.
2054      * @param src1 general purpose register. May not be null or the stackpointer.
2055      * @param src2 general purpose register. May not be null or the stackpointer.
2056      * @param src3 general purpose register. May not be null or the stackpointer.
2057      */
2058     protected void madd(int size, Register dst, Register src1, Register src2, Register src3) {
2059         mulInstruction(MADD, dst, src1, src2, src3, generalFromSize(size));
2060     }
2061 
2062     /**
2063      * dst = src3 - src1 * src2.
2064      *
2065      * @param size register size. Has to be 32 or 64.
2066      * @param dst general purpose register. May not be null or the stackpointer.
2067      * @param src1 general purpose register. May not be null or the stackpointer.
2068      * @param src2 general purpose register. May not be null or the stackpointer.
2069      * @param src3 general purpose register. May not be null or the stackpointer.
2070      */
2071     protected void msub(int size, Register dst, Register src1, Register src2, Register src3) {
2072         mulInstruction(MSUB, dst, src1, src2, src3, generalFromSize(size));
2073     }
2074 
2075     /**
2076      * Signed multiply high. dst = (src1 * src2)[127:64]
2077      *
2078      * @param dst general purpose register. May not be null or the stackpointer.
2079      * @param src1 general purpose register. May not be null or the stackpointer.
2080      * @param src2 general purpose register. May not be null or the stackpointer.
2081      */
2082     protected void smulh(Register dst, Register src1, Register src2) {
2083         assert !dst.equals(sp);
2084         assert !src1.equals(sp);
2085         assert !src2.equals(sp);
2086         emitInt(0b10011011010 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2087     }
2088 
2089     /**
2090      * unsigned multiply high. dst = (src1 * src2)[127:64]
2091      *
2092      * @param dst general purpose register. May not be null or the stackpointer.
2093      * @param src1 general purpose register. May not be null or the stackpointer.
2094      * @param src2 general purpose register. May not be null or the stackpointer.
2095      */
2096     protected void umulh(Register dst, Register src1, Register src2) {
2097         assert !dst.equals(sp);
2098         assert !src1.equals(sp);
2099         assert !src2.equals(sp);
2100         emitInt(0b10011011110 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2101     }
2102 
2103     /**
2104      * unsigned multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
2105      *
2106      * @param dst general purpose register. May not be null or the stackpointer.
2107      * @param src1 general purpose register. May not be null or the stackpointer.
2108      * @param src2 general purpose register. May not be null or the stackpointer.
2109      * @param src3 general purpose register. May not be null or the stackpointer.
2110      */
2111     protected void umaddl(Register dst, Register src1, Register src2, Register src3) {
2112         assert !dst.equals(sp);
2113         assert !src1.equals(sp);
2114         assert !src2.equals(sp);
2115         assert !src3.equals(sp);
2116         emitInt(0b10011011101 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2117     }
2118 
2119     /**
2120      * signed multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
2121      *
2122      * @param dst general purpose register. May not be null or the stackpointer.
2123      * @param src1 general purpose register. May not be null or the stackpointer.
2124      * @param src2 general purpose register. May not be null or the stackpointer.
2125      * @param src3 general purpose register. May not be null or the stackpointer.
2126      */
2127     public void smaddl(Register dst, Register src1, Register src2, Register src3) {
2128         assert !dst.equals(sp);
2129         assert !src1.equals(sp);
2130         assert !src2.equals(sp);
2131         assert !src3.equals(sp);
2132         emitInt(0b10011011001 << 21 | dst.encoding | rs1(src1) | rs2(src2) | rs3(src3));
2133     }
2134 
2135     private void mulInstruction(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
2136         assert !dst.equals(sp);
2137         assert !src1.equals(sp);
2138         assert !src2.equals(sp);
2139         assert !src3.equals(sp);
2140         emitInt(type.encoding | instr.encoding | MulOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
2141     }
2142 
2143     /**
2144      * Signed divide. dst = src1 / src2.
2145      *
2146      * @param size register size. Has to be 32 or 64.
2147      * @param dst general purpose register. May not be null or the stackpointer.
2148      * @param src1 general purpose register. May not be null or the stackpointer.
2149      * @param src2 general purpose register. May not be null or the stackpointer.
2150      */
2151     public void sdiv(int size, Register dst, Register src1, Register src2) {
2152         dataProcessing2SourceOp(SDIV, dst, src1, src2, generalFromSize(size));
2153     }
2154 
2155     /**
2156      * Unsigned divide. dst = src1 / src2.
2157      *
2158      * @param size register size. Has to be 32 or 64.
2159      * @param dst general purpose register. May not be null or the stackpointer.
2160      * @param src1 general purpose register. May not be null or the stackpointer.
2161      * @param src2 general purpose register. May not be null or the stackpointer.
2162      */
2163     public void udiv(int size, Register dst, Register src1, Register src2) {
2164         dataProcessing2SourceOp(UDIV, dst, src1, src2, generalFromSize(size));
2165     }
2166 
2167     private void dataProcessing1SourceOp(Instruction instr, Register dst, Register src, InstructionType type) {
2168         emitInt(type.encoding | instr.encoding | DataProcessing1SourceOp | rd(dst) | rs1(src));
2169     }
2170 
2171     private void dataProcessing2SourceOp(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
2172         assert !dst.equals(sp);
2173         assert !src1.equals(sp);
2174         assert !src2.equals(sp);
2175         emitInt(type.encoding | instr.encoding | DataProcessing2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
2176     }
2177 
2178     /* Floating point operations */
2179 
2180     /* Load-Store Single FP register (5.7.1.1) */
2181     /**
2182      * Floating point load.
2183      *
2184      * @param size number of bits read from memory into rt. Must be 32 or 64.
2185      * @param rt floating point register. May not be null.
2186      * @param address all addressing modes allowed. May not be null.
2187      */
2188     public void fldr(int size, Register rt, AArch64Address address) {
2189         assert rt.getRegisterCategory().equals(SIMD);
2190         assert size == 32 || size == 64;
2191         int transferSize = NumUtil.log2Ceil(size / 8);
2192         loadStoreInstruction(LDR, rt, address, InstructionType.FP32, transferSize);
2193     }
2194 
2195     /**
2196      * Floating point store.
2197      *
2198      * @param size number of bits read from memory into rt. Must be 32 or 64.
2199      * @param rt floating point register. May not be null.
2200      * @param address all addressing modes allowed. May not be null.
2201      */
2202     public void fstr(int size, Register rt, AArch64Address address) {
2203         assert rt.getRegisterCategory().equals(SIMD);
2204         assert size == 32 || size == 64;
2205         int transferSize = NumUtil.log2Ceil(size / 8);
2206         loadStoreInstruction(STR, rt, address, InstructionType.FP64, transferSize);
2207     }
2208 
2209     /* Floating-point Move (register) (5.7.2) */
2210 
2211     /**
2212      * Floating point move.
2213      *
2214      * @param size register size. Has to be 32 or 64.
2215      * @param dst floating point register. May not be null.
2216      * @param src floating point register. May not be null.
2217      */
2218     protected void fmov(int size, Register dst, Register src) {
2219         fpDataProcessing1Source(FMOV, dst, src, floatFromSize(size));
2220     }
2221 
2222     /**
2223      * Move size bits from floating point register unchanged to general purpose register.
2224      *
2225      * @param size number of bits read from memory into rt. Must be 32 or 64.
2226      * @param dst general purpose register. May not be null, stack-pointer or zero-register
2227      * @param src floating point register. May not be null.
2228      */
2229     protected void fmovFpu2Cpu(int size, Register dst, Register src) {
2230         assert dst.getRegisterCategory().equals(CPU);
2231         assert src.getRegisterCategory().equals(SIMD);
2232         fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVFPU2CPU);
2233     }
2234 
2235     /**
2236      * Move size bits from general purpose register unchanged to floating point register.
2237      *
2238      * @param size register size. Has to be 32 or 64.
2239      * @param dst floating point register. May not be null.
2240      * @param src general purpose register. May not be null or stack-pointer.
2241      */
2242     protected void fmovCpu2Fpu(int size, Register dst, Register src) {
2243         assert dst.getRegisterCategory().equals(SIMD);
2244         assert src.getRegisterCategory().equals(CPU);
2245         fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVCPU2FPU);
2246     }
2247 
2248     private void fmovCpuFpuInstruction(Register dst, Register src, boolean is64bit, Instruction instr) {
2249         int sf = is64bit ? FP64.encoding | General64.encoding : FP32.encoding | General32.encoding;
2250         emitInt(sf | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
2251     }
2252 
2253     /* Floating-point Move (immediate) (5.7.3) */
2254 
2255     /**
2256      * Move immediate into register.
2257      *
2258      * @param size register size. Has to be 32 or 64.
2259      * @param dst floating point register. May not be null.
2260      * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
2261      *            loaded, i.e. (float) imm == imm must be true. In all cases
2262      *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
2263      *            depending on size.
2264      */
2265     protected void fmov(int size, Register dst, double imm) {
2266         assert dst.getRegisterCategory().equals(SIMD);
2267         InstructionType type = floatFromSize(size);
2268         int immEncoding;
2269         if (type == FP64) {
2270             immEncoding = getDoubleImmediate(imm);
2271         } else {
2272             assert imm == (float) imm : "float mov must use an immediate that can be represented using a float.";
2273             immEncoding = getFloatImmediate((float) imm);
2274         }
2275         emitInt(type.encoding | FMOV.encoding | FpImmOp | immEncoding | rd(dst));
2276     }
2277 
2278     private static int getDoubleImmediate(double imm) {
2279         assert isDoubleImmediate(imm);
2280         // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
2281         // 0000.0000.0000.0000.0000.0000.0000.0000
2282         long repr = Double.doubleToRawLongBits(imm);
2283         int a = (int) (repr >>> 63) << 7;
2284         int b = (int) ((repr >>> 61) & 0x1) << 6;
2285         int cToH = (int) (repr >>> 48) & 0x3f;
2286         return (a | b | cToH) << FpImmOffset;
2287     }
2288 
2289     protected static boolean isDoubleImmediate(double imm) {
2290         // Valid values will have the form:
2291         // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
2292         // 0000.0000.0000.0000.0000.0000.0000.0000
2293         long bits = Double.doubleToRawLongBits(imm);
2294         // lower 48 bits are cleared
2295         if ((bits & NumUtil.getNbitNumberLong(48)) != 0) {
2296             return false;
2297         }
2298         // bits[61..54] are all set or all cleared.
2299         long pattern = (bits >> 54) & NumUtil.getNbitNumberLong(7);
2300         if (pattern != 0 && pattern != NumUtil.getNbitNumberLong(7)) {
2301             return false;
2302         }
2303         // bits[62] and bits[61] are opposites.
2304         return ((bits ^ (bits << 1)) & (1L << 62)) != 0;
2305     }
2306 
2307     private static int getFloatImmediate(float imm) {
2308         assert isFloatImmediate(imm);
2309         // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
2310         int repr = Float.floatToRawIntBits(imm);
2311         int a = (repr >>> 31) << 7;
2312         int b = ((repr >>> 29) & 0x1) << 6;
2313         int cToH = (repr >>> 19) & NumUtil.getNbitNumberInt(6);
2314         return (a | b | cToH) << FpImmOffset;
2315     }
2316 
2317     protected static boolean isFloatImmediate(float imm) {
2318         // Valid values will have the form:
2319         // aBbb.bbbc.defg.h000.0000.0000.0000.0000
2320         int bits = Float.floatToRawIntBits(imm);
2321         // lower 20 bits are cleared.
2322         if ((bits & NumUtil.getNbitNumberInt(19)) != 0) {
2323             return false;
2324         }
2325         // bits[29..25] are all set or all cleared
2326         int pattern = (bits >> 25) & NumUtil.getNbitNumberInt(5);
2327         if (pattern != 0 && pattern != NumUtil.getNbitNumberInt(5)) {
2328             return false;
2329         }
2330         // bits[29] and bits[30] have to be opposite
2331         return ((bits ^ (bits << 1)) & (1 << 30)) != 0;
2332     }
2333 
2334     /* Convert Floating-point Precision (5.7.4.1) */
2335     /* Converts float to double and vice-versa */
2336 
2337     /**
2338      * Convert float to double and vice-versa.
2339      *
2340      * @param srcSize size of source register in bits.
2341      * @param dst floating point register. May not be null.
2342      * @param src floating point register. May not be null.
2343      */
2344     public void fcvt(int srcSize, Register dst, Register src) {
2345         if (srcSize == 32) {
2346             fpDataProcessing1Source(FCVTDS, dst, src, floatFromSize(srcSize));
2347         } else {
2348             fpDataProcessing1Source(FCVTSD, dst, src, floatFromSize(srcSize));
2349         }
2350     }
2351 
2352     /* Convert to Integer (5.7.4.2) */
2353 
2354     /**
2355      * Convert floating point to integer. Rounds towards zero.
2356      *
2357      * @param targetSize size of integer register. 32 or 64.
2358      * @param srcSize size of floating point register. 32 or 64.
2359      * @param dst general purpose register. May not be null, the zero-register or the stackpointer.
2360      * @param src floating point register. May not be null.
2361      */
2362     public void fcvtzs(int targetSize, int srcSize, Register dst, Register src) {
2363         assert !dst.equals(zr) && !dst.equals(sp);
2364         assert src.getRegisterCategory().equals(SIMD);
2365         fcvtCpuFpuInstruction(FCVTZS, dst, src, generalFromSize(targetSize), floatFromSize(srcSize));
2366     }
2367 
2368     /* Convert from Integer (5.7.4.2) */
2369     /**
2370      * Converts integer to floating point. Uses rounding mode defined by FCPR.
2371      *
2372      * @param targetSize size of floating point register. 32 or 64.
2373      * @param srcSize size of integer register. 32 or 64.
2374      * @param dst floating point register. May not be null.
2375      * @param src general purpose register. May not be null or the stackpointer.
2376      */
2377     public void scvtf(int targetSize, int srcSize, Register dst, Register src) {
2378         assert dst.getRegisterCategory().equals(SIMD);
2379         assert !src.equals(sp);
2380         fcvtCpuFpuInstruction(SCVTF, dst, src, floatFromSize(targetSize), generalFromSize(srcSize));
2381     }
2382 
2383     private void fcvtCpuFpuInstruction(Instruction instr, Register dst, Register src, InstructionType type1, InstructionType type2) {
2384         emitInt(type1.encoding | type2.encoding | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
2385     }
2386 
2387     /* Floating-point Round to Integral (5.7.5) */
2388 
2389     /**
2390      * Rounds floating-point to integral. Rounds towards zero.
2391      *
2392      * @param size register size.
2393      * @param dst floating point register. May not be null.
2394      * @param src floating point register. May not be null.
2395      */
2396     protected void frintz(int size, Register dst, Register src) {
2397         fpDataProcessing1Source(FRINTZ, dst, src, floatFromSize(size));
2398     }
2399 
2400     /* Floating-point Arithmetic (1 source) (5.7.6) */
2401 
2402     /**
2403      * dst = |src|.
2404      *
2405      * @param size register size.
2406      * @param dst floating point register. May not be null.
2407      * @param src floating point register. May not be null.
2408      */
2409     public void fabs(int size, Register dst, Register src) {
2410         fpDataProcessing1Source(FABS, dst, src, floatFromSize(size));
2411     }
2412 
2413     /**
2414      * dst = -neg.
2415      *
2416      * @param size register size.
2417      * @param dst floating point register. May not be null.
2418      * @param src floating point register. May not be null.
2419      */
2420     public void fneg(int size, Register dst, Register src) {
2421         fpDataProcessing1Source(FNEG, dst, src, floatFromSize(size));
2422     }
2423 
2424     /**
2425      * dst = Sqrt(src).
2426      *
2427      * @param size register size.
2428      * @param dst floating point register. May not be null.
2429      * @param src floating point register. May not be null.
2430      */
2431     public void fsqrt(int size, Register dst, Register src) {
2432         fpDataProcessing1Source(FSQRT, dst, src, floatFromSize(size));
2433     }
2434 
2435     private void fpDataProcessing1Source(Instruction instr, Register dst, Register src, InstructionType type) {
2436         assert dst.getRegisterCategory().equals(SIMD);
2437         assert src.getRegisterCategory().equals(SIMD);
2438         emitInt(type.encoding | instr.encoding | Fp1SourceOp | rd(dst) | rs1(src));
2439     }
2440 
2441     /* Floating-point Arithmetic (2 source) (5.7.7) */
2442 
2443     /**
2444      * dst = src1 + src2.
2445      *
2446      * @param size register size.
2447      * @param dst floating point register. May not be null.
2448      * @param src1 floating point register. May not be null.
2449      * @param src2 floating point register. May not be null.
2450      */
2451     public void fadd(int size, Register dst, Register src1, Register src2) {
2452         fpDataProcessing2Source(FADD, dst, src1, src2, floatFromSize(size));
2453     }
2454 
2455     /**
2456      * dst = src1 - src2.
2457      *
2458      * @param size register size.
2459      * @param dst floating point register. May not be null.
2460      * @param src1 floating point register. May not be null.
2461      * @param src2 floating point register. May not be null.
2462      */
2463     public void fsub(int size, Register dst, Register src1, Register src2) {
2464         fpDataProcessing2Source(FSUB, dst, src1, src2, floatFromSize(size));
2465     }
2466 
2467     /**
2468      * dst = src1 * src2.
2469      *
2470      * @param size register size.
2471      * @param dst floating point register. May not be null.
2472      * @param src1 floating point register. May not be null.
2473      * @param src2 floating point register. May not be null.
2474      */
2475     public void fmul(int size, Register dst, Register src1, Register src2) {
2476         fpDataProcessing2Source(FMUL, dst, src1, src2, floatFromSize(size));
2477     }
2478 
2479     /**
2480      * dst = src1 / src2.
2481      *
2482      * @param size register size.
2483      * @param dst floating point register. May not be null.
2484      * @param src1 floating point register. May not be null.
2485      * @param src2 floating point register. May not be null.
2486      */
2487     public void fdiv(int size, Register dst, Register src1, Register src2) {
2488         fpDataProcessing2Source(FDIV, dst, src1, src2, floatFromSize(size));
2489     }
2490 
2491     private void fpDataProcessing2Source(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
2492         assert dst.getRegisterCategory().equals(SIMD);
2493         assert src1.getRegisterCategory().equals(SIMD);
2494         assert src2.getRegisterCategory().equals(SIMD);
2495         emitInt(type.encoding | instr.encoding | Fp2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
2496     }
2497 
2498     /* Floating-point Multiply-Add (5.7.9) */
2499 
2500     /**
2501      * dst = src1 * src2 + src3.
2502      *
2503      * @param size register size.
2504      * @param dst floating point register. May not be null.
2505      * @param src1 floating point register. May not be null.
2506      * @param src2 floating point register. May not be null.
2507      * @param src3 floating point register. May not be null.
2508      */
2509     protected void fmadd(int size, Register dst, Register src1, Register src2, Register src3) {
2510         fpDataProcessing3Source(FMADD, dst, src1, src2, src3, floatFromSize(size));
2511     }
2512 
2513     /**
2514      * dst = src3 - src1 * src2.
2515      *
2516      * @param size register size.
2517      * @param dst floating point register. May not be null.
2518      * @param src1 floating point register. May not be null.
2519      * @param src2 floating point register. May not be null.
2520      * @param src3 floating point register. May not be null.
2521      */
2522     protected void fmsub(int size, Register dst, Register src1, Register src2, Register src3) {
2523         fpDataProcessing3Source(FMSUB, dst, src1, src2, src3, floatFromSize(size));
2524     }
2525 
2526     private void fpDataProcessing3Source(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
2527         assert dst.getRegisterCategory().equals(SIMD);
2528         assert src1.getRegisterCategory().equals(SIMD);
2529         assert src2.getRegisterCategory().equals(SIMD);
2530         assert src3.getRegisterCategory().equals(SIMD);
2531         emitInt(type.encoding | instr.encoding | Fp3SourceOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
2532     }
2533 
2534     /* Floating-point Comparison (5.7.10) */
2535 
2536     /**
2537      * Compares src1 to src2.
2538      *
2539      * @param size register size.
2540      * @param src1 floating point register. May not be null.
2541      * @param src2 floating point register. May not be null.
2542      */
2543     public void fcmp(int size, Register src1, Register src2) {
2544         assert src1.getRegisterCategory().equals(SIMD);
2545         assert src2.getRegisterCategory().equals(SIMD);
2546         InstructionType type = floatFromSize(size);
2547         emitInt(type.encoding | FCMP.encoding | FpCmpOp | rs1(src1) | rs2(src2));
2548     }
2549 
2550     /**
2551      * Conditional compare. NZCV = fcmp(src1, src2) if condition else uimm4.
2552      *
2553      * @param size register size.
2554      * @param src1 floating point register. May not be null.
2555      * @param src2 floating point register. May not be null.
2556      * @param uimm4 condition flags that are used if condition is false.
2557      * @param condition every condition allowed. May not be null.
2558      */
2559     public void fccmp(int size, Register src1, Register src2, int uimm4, ConditionFlag condition) {
2560         assert NumUtil.isUnsignedNbit(4, uimm4);
2561         assert src1.getRegisterCategory().equals(SIMD);
2562         assert src2.getRegisterCategory().equals(SIMD);
2563         InstructionType type = floatFromSize(size);
2564         emitInt(type.encoding | FCCMP.encoding | uimm4 | condition.encoding << ConditionalConditionOffset | rs1(src1) | rs2(src2));
2565     }
2566 
2567     /**
2568      * Compare register to 0.0 .
2569      *
2570      * @param size register size.
2571      * @param src floating point register. May not be null.
2572      */
2573     public void fcmpZero(int size, Register src) {
2574         assert src.getRegisterCategory().equals(SIMD);
2575         InstructionType type = floatFromSize(size);
2576         emitInt(type.encoding | FCMPZERO.encoding | FpCmpOp | rs1(src));
2577     }
2578 
2579     /* Floating-point Conditional Select (5.7.11) */
2580 
2581     /**
2582      * Conditional select. dst = src1 if condition else src2.
2583      *
2584      * @param size register size.
2585      * @param dst floating point register. May not be null.
2586      * @param src1 floating point register. May not be null.
2587      * @param src2 floating point register. May not be null.
2588      * @param condition every condition allowed. May not be null.
2589      */
2590     protected void fcsel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2591         assert dst.getRegisterCategory().equals(SIMD);
2592         assert src1.getRegisterCategory().equals(SIMD);
2593         assert src2.getRegisterCategory().equals(SIMD);
2594         InstructionType type = floatFromSize(size);
2595         emitInt(type.encoding | FCSEL.encoding | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
2596     }
2597 
2598     /* Debug exceptions (5.9.1.2) */
2599 
2600     /**
2601      * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
2602      * UNALLOCATED instruction.
2603      *
2604      * @param uimm16 Arbitrary 16-bit unsigned payload.
2605      */
2606     protected void hlt(int uimm16) {
2607         exceptionInstruction(HLT, uimm16);
2608     }
2609 
2610     /**
2611      * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
2612      * exception level.
2613      *
2614      * @param uimm16 Arbitrary 16-bit unsigned payload.
2615      */
2616     protected void brk(int uimm16) {
2617         exceptionInstruction(BRK, uimm16);
2618     }
2619 
2620     private void exceptionInstruction(Instruction instr, int uimm16) {
2621         assert NumUtil.isUnsignedNbit(16, uimm16);
2622         emitInt(instr.encoding | ExceptionOp | uimm16 << SystemImmediateOffset);
2623     }
2624 
2625     /* Architectural hints (5.9.4) */
2626     public enum SystemHint {
2627         NOP(0x0),
2628         YIELD(0x1),
2629         WFE(0x2),
2630         WFI(0x3),
2631         SEV(0x4),
2632         SEVL(0x5);
2633 
2634         private final int encoding;
2635 
2636         SystemHint(int encoding) {
2637             this.encoding = encoding;
2638         }
2639     }
2640 
2641     /**
2642      * Architectural hints.
2643      *
2644      * @param hint Can be any of the defined hints. May not be null.
2645      */
2646     protected void hint(SystemHint hint) {
2647         emitInt(HINT.encoding | hint.encoding << SystemImmediateOffset);
2648     }
2649 
2650     /**
2651      * Clear Exclusive: clears the local record of the executing processor that an address has had a
2652      * request for an exclusive access.
2653      */
2654     protected void clrex() {
2655         emitInt(CLREX.encoding);
2656     }
2657 
2658     /**
2659      * Possible barrier definitions for Aarch64. LOAD_LOAD and LOAD_STORE map to the same underlying
2660      * barrier.
2661      *
2662      * We only need synchronization across the inner shareable domain (see B2-90 in the Reference
2663      * documentation).
2664      */
2665     public enum BarrierKind {
2666         LOAD_LOAD(0x9, "ISHLD"),
2667         LOAD_STORE(0x9, "ISHLD"),
2668         STORE_STORE(0xA, "ISHST"),
2669         ANY_ANY(0xB, "ISH");
2670 
2671         public final int encoding;
2672         public final String optionName;
2673 
2674         BarrierKind(int encoding, String optionName) {
2675             this.encoding = encoding;
2676             this.optionName = optionName;
2677         }
2678     }
2679 
2680     /**
2681      * Data Memory Barrier.
2682      *
2683      * @param barrierKind barrier that is issued. May not be null.
2684      */
2685     public void dmb(BarrierKind barrierKind) {
2686         emitInt(DMB.encoding | BarrierOp | barrierKind.encoding << BarrierKindOffset);
2687     }
2688 
2689 }