1 /*
   2  * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.graalvm.compiler.asm.aarch64;
  24 
  25 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADD;
  26 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADDS;
  27 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADR;
  28 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.AND;
  29 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ANDS;
  30 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ASRV;
  31 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BFM;
  32 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BIC;
  33 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BICS;
  34 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BLR;
  35 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BR;
  36 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BRK;
  37 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLREX;
  38 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLS;
  39 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLZ;
  40 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSEL;
  41 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSINC;
  42 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSNEG;
  43 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.DMB;
  44 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EON;
  45 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EOR;
  46 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EXTR;
  47 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FABS;
  48 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FADD;
  49 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCCMP;
  50 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCMP;
  51 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCMPZERO;
  52 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCSEL;
  53 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTDS;
  54 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTSD;
  55 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTZS;
  56 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FDIV;
  57 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMADD;
  58 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMOV;
  59 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMSUB;
  60 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMUL;
  61 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FNEG;
  62 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FRINTZ;
  63 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FSQRT;
  64 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FSUB;
  65 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.HINT;
  66 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.HLT;
  67 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDAR;
  68 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDAXR;
  69 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDP;
  70 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDR;
  71 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDRS;
  72 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDXR;
  73 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LSLV;
  74 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LSRV;
  75 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MADD;
  76 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVK;
  77 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVN;
  78 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVZ;
  79 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MSUB;
  80 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ORN;
  81 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ORR;
  82 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RBIT;
  83 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RET;
  84 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.REVW;
  85 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.REVX;
  86 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RORV;
  87 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SBFM;
  88 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SCVTF;
  89 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SDIV;
  90 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STLR;
  91 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STLXR;
  92 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STP;
  93 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STR;
  94 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STXR;
  95 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUB;
  96 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUBS;
  97 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UBFM;
  98 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UDIV;
  99 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP32;
 100 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP64;
 101 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General32;
 102 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General64;
 103 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.floatFromSize;
 104 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.generalFromSize;
 105 import static jdk.vm.ci.aarch64.AArch64.CPU;
 106 import static jdk.vm.ci.aarch64.AArch64.SIMD;
 107 import static jdk.vm.ci.aarch64.AArch64.r0;
 108 import static jdk.vm.ci.aarch64.AArch64.sp;
 109 import static jdk.vm.ci.aarch64.AArch64.zr;
 110 
 111 import java.util.Arrays;
 112 
 113 import org.graalvm.compiler.asm.Assembler;
 114 import org.graalvm.compiler.core.common.NumUtil;
 115 import org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode;
 116 import org.graalvm.compiler.debug.GraalError;
 117 
 118 import jdk.vm.ci.code.Register;
 119 import jdk.vm.ci.code.TargetDescription;
 120 
 121 public abstract class AArch64Assembler extends Assembler {
 122 
 123     public static class LogicalImmediateTable {
 124 
 125         private static final Immediate[] IMMEDIATE_TABLE = buildImmediateTable();
 126 
 127         private static final int ImmediateOffset = 10;
 128         private static final int ImmediateRotateOffset = 16;
 129         private static final int ImmediateSizeOffset = 22;
 130 
 131         /**
 132          * Specifies whether immediate can be represented in all cases (YES), as a 64bit instruction
 133          * (SIXTY_FOUR_BIT_ONLY) or not at all (NO).
 134          */
 135         enum Representable {
 136             YES,
 137             SIXTY_FOUR_BIT_ONLY,
 138             NO
 139         }
 140 
 141         /**
 142          * Tests whether an immediate can be encoded for logical instructions.
 143          *
 144          * @param is64bit if true immediate is considered a 64-bit pattern. If false we may use a
 145          *            64-bit instruction to load the 32-bit pattern into a register.
 146          * @return enum specifying whether immediate can be used for 32- and 64-bit logical
 147          *         instructions ({@code #Representable.YES}), for 64-bit instructions only (
 148          *         {@link Representable#SIXTY_FOUR_BIT_ONLY}) or not at all (
 149          *         {@link Representable#NO}).
 150          */
 151         public static Representable isRepresentable(boolean is64bit, long immediate) {
 152             int pos = getLogicalImmTablePos(is64bit, immediate);
 153             if (pos < 0) {
 154                 // if 32bit instruction we can try again as 64bit immediate which may succeed.
 155                 // i.e. 0xffffffff fails as a 32bit immediate but works as 64bit one.
 156                 if (!is64bit) {
 157                     assert NumUtil.isUnsignedNbit(32, immediate);
 158                     pos = getLogicalImmTablePos(true, immediate);
 159                     return pos >= 0 ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.NO;
 160                 }
 161                 return Representable.NO;
 162             }
 163             Immediate imm = IMMEDIATE_TABLE[pos];
 164             return imm.only64bit() ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.YES;
 165         }
 166 
 167         public static Representable isRepresentable(int immediate) {
 168             return isRepresentable(false, immediate & 0xFFFF_FFFFL);
 169         }
 170 
 171         public static int getLogicalImmEncoding(boolean is64bit, long value) {
 172             int pos = getLogicalImmTablePos(is64bit, value);
 173             assert pos >= 0 : "Value cannot be represented as logical immediate: " + value + ", is64bit=" + is64bit;
 174             Immediate imm = IMMEDIATE_TABLE[pos];
 175             assert is64bit || !imm.only64bit() : "Immediate can only be represented for 64bit, but 32bit instruction specified";
 176             return IMMEDIATE_TABLE[pos].encoding;
 177         }
 178 
 179         /**
 180          * @param is64bit if true also allow 64-bit only encodings to be returned.
 181          * @return If positive the return value is the position into the IMMEDIATE_TABLE for the
 182          *         given immediate, if negative the immediate cannot be encoded.
 183          */
 184         private static int getLogicalImmTablePos(boolean is64bit, long value) {
 185             Immediate imm;
 186             if (!is64bit) {
 187                 // 32bit instructions can only have 32bit immediates.
 188                 if (!NumUtil.isUnsignedNbit(32, value)) {
 189                     return -1;
 190                 }
 191                 // If we have a 32bit instruction (and therefore immediate) we have to duplicate it
 192                 // across 64bit to find it in the table.
 193                 imm = new Immediate(value << 32 | value);
 194             } else {
 195                 imm = new Immediate(value);
 196             }
 197             int pos = Arrays.binarySearch(IMMEDIATE_TABLE, imm);
 198             if (pos < 0) {
 199                 return -1;
 200             }
 201             if (!is64bit && IMMEDIATE_TABLE[pos].only64bit()) {
 202                 return -1;
 203             }
 204             return pos;
 205         }
 206 
 207         /**
 208          * To quote 5.4.2: [..] an immediate is a 32 or 64 bit pattern viewed as a vector of
 209          * identical elements of size e = 2, 4, 8, 16, 32 or (in the case of bimm64) 64 bits. Each
 210          * element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by
 211          * 0 to e-1 bits. It is encoded in the following: 10-16: rotation amount (6bit) starting
 212          * from 1s in the LSB (i.e. 0111->1011->1101->1110) 16-22: This stores a combination of the
 213          * number of set bits and the pattern size. The pattern size is encoded as follows (x is
 214          * used to store the number of 1 bits - 1) e pattern 2 1111xx 4 1110xx 8 110xxx 16 10xxxx 32
 215          * 0xxxxx 64 xxxxxx 22: if set we have an instruction with 64bit pattern?
 216          */
 217         private static final class Immediate implements Comparable<Immediate> {
 218             public final long imm;
 219             public final int encoding;
 220 
 221             Immediate(long imm, boolean is64, int s, int r) {
 222                 this.imm = imm;
 223                 this.encoding = computeEncoding(is64, s, r);
 224             }
 225 
 226             // Used to be able to binary search for an immediate in the table.
 227             Immediate(long imm) {
 228                 this(imm, false, 0, 0);
 229             }
 230 
 231             /**
 232              * Returns true if this pattern is only representable as 64bit.
 233              */
 234             public boolean only64bit() {
 235                 return (encoding & (1 << ImmediateSizeOffset)) != 0;
 236             }
 237 
 238             private static int computeEncoding(boolean is64, int s, int r) {
 239                 int sf = is64 ? 1 : 0;
 240                 return sf << ImmediateSizeOffset | r << ImmediateRotateOffset | s << ImmediateOffset;
 241             }
 242 
 243             @Override
 244             public int compareTo(Immediate o) {
 245                 return Long.compare(imm, o.imm);
 246             }
 247         }
 248 
 249         private static Immediate[] buildImmediateTable() {
 250             final int nrImmediates = 5334;
 251             final Immediate[] table = new Immediate[nrImmediates];
 252             int nrImms = 0;
 253             for (int logE = 1; logE <= 6; logE++) {
 254                 int e = 1 << logE;
 255                 long mask = NumUtil.getNbitNumberLong(e);
 256                 for (int nrOnes = 1; nrOnes < e; nrOnes++) {
 257                     long val = (1L << nrOnes) - 1;
 258                     // r specifies how much we rotate the value
 259                     for (int r = 0; r < e; r++) {
 260                         long immediate = (val >>> r | val << (e - r)) & mask;
 261                         // Duplicate pattern to fill whole 64bit range.
 262                         switch (logE) {
 263                             case 1:
 264                                 immediate |= immediate << 2;
 265                                 immediate |= immediate << 4;
 266                                 immediate |= immediate << 8;
 267                                 immediate |= immediate << 16;
 268                                 immediate |= immediate << 32;
 269                                 break;
 270                             case 2:
 271                                 immediate |= immediate << 4;
 272                                 immediate |= immediate << 8;
 273                                 immediate |= immediate << 16;
 274                                 immediate |= immediate << 32;
 275                                 break;
 276                             case 3:
 277                                 immediate |= immediate << 8;
 278                                 immediate |= immediate << 16;
 279                                 immediate |= immediate << 32;
 280                                 break;
 281                             case 4:
 282                                 immediate |= immediate << 16;
 283                                 immediate |= immediate << 32;
 284                                 break;
 285                             case 5:
 286                                 immediate |= immediate << 32;
 287                                 break;
 288                         }
 289                         // 5 - logE can underflow to -1, but we shift this bogus result
 290                         // out of the masked area.
 291                         int sizeEncoding = (1 << (5 - logE)) - 1;
 292                         int s = ((sizeEncoding << (logE + 1)) & 0x3f) | (nrOnes - 1);
 293                         table[nrImms++] = new Immediate(immediate, /* is64bit */e == 64, s, r);
 294                     }
 295                 }
 296             }
 297             Arrays.sort(table);
 298             assert nrImms == nrImmediates : nrImms + " instead of " + nrImmediates + " in table.";
 299             assert checkDuplicates(table) : "Duplicate values in table.";
 300             return table;
 301         }
 302 
 303         private static boolean checkDuplicates(Immediate[] table) {
 304             for (int i = 0; i < table.length - 1; i++) {
 305                 if (table[i].imm >= table[i + 1].imm) {
 306                     return false;
 307                 }
 308             }
 309             return true;
 310         }
 311     }
 312 
 313     private static final int RdOffset = 0;
 314     private static final int Rs1Offset = 5;
 315     private static final int Rs2Offset = 16;
 316     private static final int Rs3Offset = 10;
 317     private static final int RtOffset = 0;
 318     private static final int RnOffset = 5;
 319     private static final int Rt2Offset = 10;
 320 
 321     /* Helper functions */
 322     private static int rd(Register reg) {
 323         return reg.encoding << RdOffset;
 324     }
 325 
 326     private static int rs1(Register reg) {
 327         return reg.encoding << Rs1Offset;
 328     }
 329 
 330     private static int rs2(Register reg) {
 331         return reg.encoding << Rs2Offset;
 332     }
 333 
 334     private static int rs3(Register reg) {
 335         return reg.encoding << Rs3Offset;
 336     }
 337 
 338     private static int rt(Register reg) {
 339         return reg.encoding << RtOffset;
 340     }
 341 
 342     private static int rt2(Register reg) {
 343         return reg.encoding << Rt2Offset;
 344     }
 345 
 346     private static int rn(Register reg) {
 347         return reg.encoding << RnOffset;
 348     }
 349 
 350     private static int maskField(int sizeInBits, int n) {
 351         assert NumUtil.isSignedNbit(sizeInBits, n);
 352         return n & NumUtil.getNbitNumberInt(sizeInBits);
 353     }
 354 
 355     /**
 356      * Enumeration of all different instruction kinds: General32/64 are the general instructions
 357      * (integer, branch, etc.), for 32-, respectively 64-bit operands. FP32/64 is the encoding for
 358      * the 32/64bit float operations
 359      */
 360     protected enum InstructionType {
 361         General32(0b00 << 30, 32, true),
 362         General64(0b10 << 30, 64, true),
 363         FP32(0x00000000, 32, false),
 364         FP64(0x00400000, 64, false);
 365 
 366         public final int encoding;
 367         public final int width;
 368         public final boolean isGeneral;
 369 
 370         InstructionType(int encoding, int width, boolean isGeneral) {
 371             this.encoding = encoding;
 372             this.width = width;
 373             this.isGeneral = isGeneral;
 374         }
 375 
 376         public static InstructionType generalFromSize(int size) {
 377             assert size == 32 || size == 64;
 378             return size == 32 ? General32 : General64;
 379         }
 380 
 381         public static InstructionType floatFromSize(int size) {
 382             assert size == 32 || size == 64;
 383             return size == 32 ? FP32 : FP64;
 384         }
 385 
 386     }
 387 
 388     private static final int ImmediateOffset = 10;
 389     private static final int ImmediateRotateOffset = 16;
 390     private static final int ImmediateSizeOffset = 22;
 391     private static final int ExtendTypeOffset = 13;
 392 
 393     private static final int AddSubImmOp = 0x11000000;
 394     private static final int AddSubShift12 = 0b01 << 22;
 395     private static final int AddSubSetFlag = 0x20000000;
 396 
 397     private static final int LogicalImmOp = 0x12000000;
 398 
 399     private static final int MoveWideImmOp = 0x12800000;
 400     private static final int MoveWideImmOffset = 5;
 401     private static final int MoveWideShiftOffset = 21;
 402 
 403     private static final int BitfieldImmOp = 0x13000000;
 404 
 405     private static final int AddSubShiftedOp = 0x0B000000;
 406     private static final int ShiftTypeOffset = 22;
 407 
 408     private static final int AddSubExtendedOp = 0x0B200000;
 409 
 410     private static final int MulOp = 0x1B000000;
 411     private static final int DataProcessing1SourceOp = 0x5AC00000;
 412     private static final int DataProcessing2SourceOp = 0x1AC00000;
 413 
 414     private static final int Fp1SourceOp = 0x1E204000;
 415     private static final int Fp2SourceOp = 0x1E200800;
 416     private static final int Fp3SourceOp = 0x1F000000;
 417 
 418     private static final int FpConvertOp = 0x1E200000;
 419     private static final int FpImmOp = 0x1E201000;
 420     private static final int FpImmOffset = 13;
 421 
 422     private static final int FpCmpOp = 0x1E202000;
 423 
 424     private static final int PcRelImmHiOffset = 5;
 425     private static final int PcRelImmLoOffset = 29;
 426 
 427     private static final int PcRelImmOp = 0x10000000;
 428 
 429     private static final int UnconditionalBranchImmOp = 0x14000000;
 430     private static final int UnconditionalBranchRegOp = 0xD6000000;
 431     private static final int CompareBranchOp = 0x34000000;
 432 
 433     private static final int ConditionalBranchImmOffset = 5;
 434 
 435     private static final int ConditionalSelectOp = 0x1A800000;
 436     private static final int ConditionalConditionOffset = 12;
 437 
 438     private static final int LoadStoreScaledOp = 0b111_0_01_00 << 22;
 439     private static final int LoadStoreUnscaledOp = 0b111_0_00_00 << 22;
 440 
 441     private static final int LoadStoreRegisterOp = 0b111_0_00_00_1 << 21 | 0b10 << 10;
 442 
 443     private static final int LoadLiteralOp = 0x18000000;
 444 
 445     private static final int LoadStorePostIndexedOp = 0b111_0_00_00_0 << 21 | 0b01 << 10;
 446     private static final int LoadStorePreIndexedOp = 0b111_0_00_00_0 << 21 | 0b11 << 10;
 447 
 448     private static final int LoadStoreUnscaledImmOffset = 12;
 449     private static final int LoadStoreScaledImmOffset = 10;
 450     private static final int LoadStoreScaledRegOffset = 12;
 451     private static final int LoadStoreIndexedImmOffset = 12;
 452     private static final int LoadStoreTransferSizeOffset = 30;
 453     private static final int LoadStoreFpFlagOffset = 26;
 454     private static final int LoadLiteralImmeOffset = 5;
 455 
 456     private static final int LoadStorePairOp = 0b101_0 << 26;
 457     @SuppressWarnings("unused") private static final int LoadStorePairPostIndexOp = 0b101_0_001 << 23;
 458     @SuppressWarnings("unused") private static final int LoadStorePairPreIndexOp = 0b101_0_011 << 23;
 459     private static final int LoadStorePairImm7Offset = 15;
 460 
 461     private static final int LogicalShiftOp = 0x0A000000;
 462 
 463     private static final int ExceptionOp = 0xD4000000;
 464     private static final int SystemImmediateOffset = 5;
 465 
 466     @SuppressWarnings("unused") private static final int SimdImmediateOffset = 16;
 467 
 468     private static final int BarrierOp = 0xD503301F;
 469     private static final int BarrierKindOffset = 8;
 470 
 471     /**
 472      * Encoding for all instructions.
 473      */
 474     public enum Instruction {
 475         BCOND(0x54000000),
 476         CBNZ(0x01000000),
 477         CBZ(0x00000000),
 478 
 479         B(0x00000000),
 480         BL(0x80000000),
 481         BR(0x001F0000),
 482         BLR(0x003F0000),
 483         RET(0x005F0000),
 484 
 485         LDR(0x00000000),
 486         LDRS(0x00800000),
 487         LDXR(0x081f7c00),
 488         LDAR(0x8dffc00),
 489         LDAXR(0x85ffc00),
 490 
 491         STR(0x00000000),
 492         STXR(0x08007c00),
 493         STLR(0x089ffc00),
 494         STLXR(0x0800fc00),
 495 
 496         LDP(0b1 << 22),
 497         STP(0b0 << 22),
 498 
 499         ADR(0x00000000),
 500         ADRP(0x80000000),
 501 
 502         ADD(0x00000000),
 503         ADDS(ADD.encoding | AddSubSetFlag),
 504         SUB(0x40000000),
 505         SUBS(SUB.encoding | AddSubSetFlag),
 506 
 507         NOT(0x00200000),
 508         AND(0x00000000),
 509         BIC(AND.encoding | NOT.encoding),
 510         ORR(0x20000000),
 511         ORN(ORR.encoding | NOT.encoding),
 512         EOR(0x40000000),
 513         EON(EOR.encoding | NOT.encoding),
 514         ANDS(0x60000000),
 515         BICS(ANDS.encoding | NOT.encoding),
 516 
 517         ASRV(0x00002800),
 518         RORV(0x00002C00),
 519         LSRV(0x00002400),
 520         LSLV(0x00002000),
 521 
 522         CLS(0x00001400),
 523         CLZ(0x00001000),
 524         RBIT(0x00000000),
 525         REVX(0x00000C00),
 526         REVW(0x00000800),
 527 
 528         MOVN(0x00000000),
 529         MOVZ(0x40000000),
 530         MOVK(0x60000000),
 531 
 532         CSEL(0x00000000),
 533         CSNEG(0x40000400),
 534         CSINC(0x00000400),
 535 
 536         BFM(0x20000000),
 537         SBFM(0x00000000),
 538         UBFM(0x40000000),
 539         EXTR(0x13800000),
 540 
 541         MADD(0x00000000),
 542         MSUB(0x00008000),
 543         SDIV(0x00000C00),
 544         UDIV(0x00000800),
 545 
 546         FMOV(0x00000000),
 547         FMOVCPU2FPU(0x00070000),
 548         FMOVFPU2CPU(0x00060000),
 549 
 550         FCVTDS(0x00028000),
 551         FCVTSD(0x00020000),
 552 
 553         FCVTZS(0x00180000),
 554         SCVTF(0x00020000),
 555 
 556         FABS(0x00008000),
 557         FSQRT(0x00018000),
 558         FNEG(0x00010000),
 559 
 560         FRINTZ(0x00058000),
 561 
 562         FADD(0x00002000),
 563         FSUB(0x00003000),
 564         FMUL(0x00000000),
 565         FDIV(0x00001000),
 566         FMAX(0x00004000),
 567         FMIN(0x00005000),
 568 
 569         FMADD(0x00000000),
 570         FMSUB(0x00008000),
 571 
 572         FCMP(0x00000000),
 573         FCMPZERO(0x00000008),
 574         FCCMP(0x1E200400),
 575         FCSEL(0x1E200C00),
 576 
 577         INS(0x4e081c00),
 578         UMOV(0x4e083c00),
 579 
 580         CNT(0xe205800),
 581         USRA(0x6f001400),
 582 
 583         HLT(0x00400000),
 584         BRK(0x00200000),
 585 
 586         CLREX(0xd5033f5f),
 587         HINT(0xD503201F),
 588         DMB(0x000000A0),
 589 
 590         BLR_NATIVE(0xc0000000);
 591 
 592         public final int encoding;
 593 
 594         Instruction(int encoding) {
 595             this.encoding = encoding;
 596         }
 597 
 598     }
 599 
 600     public enum ShiftType {
 601         LSL(0),
 602         LSR(1),
 603         ASR(2),
 604         ROR(3);
 605 
 606         public final int encoding;
 607 
 608         ShiftType(int encoding) {
 609             this.encoding = encoding;
 610         }
 611     }
 612 
 613     public enum ExtendType {
 614         UXTB(0),
 615         UXTH(1),
 616         UXTW(2),
 617         UXTX(3),
 618         SXTB(4),
 619         SXTH(5),
 620         SXTW(6),
 621         SXTX(7);
 622 
 623         public final int encoding;
 624 
 625         ExtendType(int encoding) {
 626             this.encoding = encoding;
 627         }
 628     }
 629 
 630     /**
 631      * Condition Flags for branches. See 4.3
 632      */
 633     public enum ConditionFlag {
 634         // Integer | Floating-point meanings
 635         /** Equal | Equal. */
 636         EQ(0x0),
 637 
 638         /** Not Equal | Not equal or unordered. */
 639         NE(0x1),
 640 
 641         /** Unsigned Higher or Same | Greater than, equal or unordered. */
 642         HS(0x2),
 643 
 644         /** Unsigned lower | less than. */
 645         LO(0x3),
 646 
 647         /** Minus (negative) | less than. */
 648         MI(0x4),
 649 
 650         /** Plus (positive or zero) | greater than, equal or unordered. */
 651         PL(0x5),
 652 
 653         /** Overflow set | unordered. */
 654         VS(0x6),
 655 
 656         /** Overflow clear | ordered. */
 657         VC(0x7),
 658 
 659         /** Unsigned higher | greater than or unordered. */
 660         HI(0x8),
 661 
 662         /** Unsigned lower or same | less than or equal. */
 663         LS(0x9),
 664 
 665         /** Signed greater than or equal | greater than or equal. */
 666         GE(0xA),
 667 
 668         /** Signed less than | less than or unordered. */
 669         LT(0xB),
 670 
 671         /** Signed greater than | greater than. */
 672         GT(0xC),
 673 
 674         /** Signed less than or equal | less than, equal or unordered. */
 675         LE(0xD),
 676 
 677         /** Always | always. */
 678         AL(0xE),
 679 
 680         /** Always | always (identical to AL, just to have valid 0b1111 encoding). */
 681         NV(0xF);
 682 
 683         public final int encoding;
 684 
 685         ConditionFlag(int encoding) {
 686             this.encoding = encoding;
 687         }
 688 
 689         /**
 690          * @return ConditionFlag specified by decoding.
 691          */
 692         public static ConditionFlag fromEncoding(int encoding) {
 693             return values()[encoding];
 694         }
 695 
 696         public ConditionFlag negate() {
 697             switch (this) {
 698                 case EQ:
 699                     return NE;
 700                 case NE:
 701                     return EQ;
 702                 case HS:
 703                     return LO;
 704                 case LO:
 705                     return HS;
 706                 case MI:
 707                     return PL;
 708                 case PL:
 709                     return MI;
 710                 case VS:
 711                     return VC;
 712                 case VC:
 713                     return VS;
 714                 case HI:
 715                     return LS;
 716                 case LS:
 717                     return HI;
 718                 case GE:
 719                     return LT;
 720                 case LT:
 721                     return GE;
 722                 case GT:
 723                     return LE;
 724                 case LE:
 725                     return GT;
 726                 case AL:
 727                 case NV:
 728                 default:
 729                     throw GraalError.shouldNotReachHere();
 730             }
 731         }
 732     }
 733 
 734     public AArch64Assembler(TargetDescription target) {
 735         super(target);
 736     }
 737 
 738     /* Conditional Branch (5.2.1) */
 739 
 740     /**
 741      * Branch conditionally.
 742      *
 743      * @param condition may not be null.
 744      * @param imm21 Signed 21-bit offset, has to be word aligned.
 745      */
 746     protected void b(ConditionFlag condition, int imm21) {
 747         b(condition, imm21, -1);
 748     }
 749 
 750     /**
 751      * Branch conditionally. Inserts instruction into code buffer at pos.
 752      *
 753      * @param condition may not be null.
 754      * @param imm21 Signed 21-bit offset, has to be word aligned.
 755      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 756      */
 757     protected void b(ConditionFlag condition, int imm21, int pos) {
 758         if (pos == -1) {
 759             emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding);
 760         } else {
 761             emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding, pos);
 762         }
 763     }
 764 
 765     /**
 766      * Compare register and branch if non-zero.
 767      *
 768      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 769      * @param size Instruction size in bits. Should be either 32 or 64.
 770      * @param imm21 Signed 21-bit offset, has to be word aligned.
 771      */
 772     protected void cbnz(int size, Register reg, int imm21) {
 773         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, -1);
 774     }
 775 
 776     /**
 777      * Compare register and branch if non-zero.
 778      *
 779      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 780      * @param size Instruction size in bits. Should be either 32 or 64.
 781      * @param imm21 Signed 21-bit offset, has to be word aligned.
 782      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 783      */
 784     protected void cbnz(int size, Register reg, int imm21, int pos) {
 785         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, pos);
 786     }
 787 
 788     /**
 789      * Compare and branch if zero.
 790      *
 791      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 792      * @param size Instruction size in bits. Should be either 32 or 64.
 793      * @param imm21 Signed 21-bit offset, has to be word aligned.
 794      */
 795     protected void cbz(int size, Register reg, int imm21) {
 796         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, -1);
 797     }
 798 
 799     /**
 800      * Compare register and branch if zero.
 801      *
 802      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 803      * @param size Instruction size in bits. Should be either 32 or 64.
 804      * @param imm21 Signed 21-bit offset, has to be word aligned.
 805      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 806      */
 807     protected void cbz(int size, Register reg, int imm21, int pos) {
 808         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, pos);
 809     }
 810 
 811     private void conditionalBranchInstruction(Register reg, int imm21, InstructionType type, Instruction instr, int pos) {
 812         assert reg.getRegisterCategory().equals(CPU);
 813         int instrEncoding = instr.encoding | CompareBranchOp;
 814         if (pos == -1) {
 815             emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg));
 816         } else {
 817             emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg), pos);
 818         }
 819     }
 820 
 821     private static int getConditionalBranchImm(int imm21) {
 822         assert NumUtil.isSignedNbit(21, imm21) && (imm21 & 0x3) == 0 : "Immediate has to be 21bit signed number and word aligned";
 823         int imm = (imm21 & NumUtil.getNbitNumberInt(21)) >> 2;
 824         return imm << ConditionalBranchImmOffset;
 825     }
 826 
 827     /* Unconditional Branch (immediate) (5.2.2) */
 828 
 829     /**
 830      * @param imm28 Signed 28-bit offset, has to be word aligned.
 831      */
 832     protected void b(int imm28) {
 833         unconditionalBranchImmInstruction(imm28, Instruction.B, -1);
 834     }
 835 
 836     /**
 837      *
 838      * @param imm28 Signed 28-bit offset, has to be word aligned.
 839      * @param pos Position where instruction is inserted into code buffer.
 840      */
 841     protected void b(int imm28, int pos) {
 842         unconditionalBranchImmInstruction(imm28, Instruction.B, pos);
 843     }
 844 
 845     /**
 846      * Branch and link return address to register X30.
 847      *
 848      * @param imm28 Signed 28-bit offset, has to be word aligned.
 849      */
 850     public void bl(int imm28) {
 851         unconditionalBranchImmInstruction(imm28, Instruction.BL, -1);
 852     }
 853 
 854     private void unconditionalBranchImmInstruction(int imm28, Instruction instr, int pos) {
 855         assert NumUtil.isSignedNbit(28, imm28) && (imm28 & 0x3) == 0 : "Immediate has to be 28bit signed number and word aligned";
 856         int imm = (imm28 & NumUtil.getNbitNumberInt(28)) >> 2;
 857         int instrEncoding = instr.encoding | UnconditionalBranchImmOp;
 858         if (pos == -1) {
 859             emitInt(instrEncoding | imm);
 860         } else {
 861             emitInt(instrEncoding | imm, pos);
 862         }
 863     }
 864 
 865     /* Unconditional Branch (register) (5.2.3) */
 866 
 867     /**
 868      * Branches to address in register and writes return address into register X30.
 869      *
 870      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 871      */
 872     public void blr(Register reg) {
 873         unconditionalBranchRegInstruction(BLR, reg);
 874     }
 875 
 876     /**
 877      * Branches to address in register.
 878      *
 879      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 880      */
 881     protected void br(Register reg) {
 882         unconditionalBranchRegInstruction(BR, reg);
 883     }
 884 
 885     /**
 886      * Return to address in register.
 887      *
 888      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 889      */
 890     public void ret(Register reg) {
 891         unconditionalBranchRegInstruction(RET, reg);
 892     }
 893 
 894     private void unconditionalBranchRegInstruction(Instruction instr, Register reg) {
 895         assert reg.getRegisterCategory().equals(CPU);
 896         assert !reg.equals(zr);
 897         assert !reg.equals(sp);
 898         emitInt(instr.encoding | UnconditionalBranchRegOp | rs1(reg));
 899     }
 900 
 901     /* Load-Store Single Register (5.3.1) */
 902 
 903     /**
 904      * Loads a srcSize value from address into rt zero-extending it.
 905      *
 906      * @param srcSize size of memory read in bits. Must be 8, 16, 32 or 64.
 907      * @param rt general purpose register. May not be null or stackpointer.
 908      * @param address all addressing modes allowed. May not be null.
 909      */
 910     public void ldr(int srcSize, Register rt, AArch64Address address) {
 911         assert rt.getRegisterCategory().equals(CPU);
 912         assert srcSize == 8 || srcSize == 16 || srcSize == 32 || srcSize == 64;
 913         int transferSize = NumUtil.log2Ceil(srcSize / 8);
 914         loadStoreInstruction(LDR, rt, address, General32, transferSize);
 915     }
 916 
 917     /**
 918      * Loads a srcSize value from address into rt sign-extending it.
 919      *
 920      * @param targetSize size of target register in bits. Must be 32 or 64.
 921      * @param srcSize size of memory read in bits. Must be 8, 16 or 32, but may not be equivalent to
 922      *            targetSize.
 923      * @param rt general purpose register. May not be null or stackpointer.
 924      * @param address all addressing modes allowed. May not be null.
 925      */
 926     protected void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
 927         assert rt.getRegisterCategory().equals(CPU);
 928         assert (srcSize == 8 || srcSize == 16 || srcSize == 32) && srcSize != targetSize;
 929         int transferSize = NumUtil.log2Ceil(srcSize / 8);
 930         loadStoreInstruction(LDRS, rt, address, generalFromSize(targetSize), transferSize);
 931     }
 932 
 933     /**
 934      * Stores register rt into memory pointed by address.
 935      *
 936      * @param destSize number of bits written to memory. Must be 8, 16, 32 or 64.
 937      * @param rt general purpose register. May not be null or stackpointer.
 938      * @param address all addressing modes allowed. May not be null.
 939      */
 940     public void str(int destSize, Register rt, AArch64Address address) {
 941         assert rt.getRegisterCategory().equals(CPU);
 942         assert destSize == 8 || destSize == 16 || destSize == 32 || destSize == 64;
 943         int transferSize = NumUtil.log2Ceil(destSize / 8);
 944         loadStoreInstruction(STR, rt, address, General64, transferSize);
 945     }
 946 
 947     private void loadStoreInstruction(Instruction instr, Register reg, AArch64Address address, InstructionType type, int log2TransferSize) {
 948         assert log2TransferSize >= 0 && log2TransferSize < 4;
 949         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
 950         int is32Bit = type.width == 32 ? 1 << ImmediateSizeOffset : 0;
 951         int isFloat = !type.isGeneral ? 1 << LoadStoreFpFlagOffset : 0;
 952         int memop = instr.encoding | transferSizeEncoding | is32Bit | isFloat | rt(reg);
 953         switch (address.getAddressingMode()) {
 954             case IMMEDIATE_SCALED:
 955                 emitInt(memop | LoadStoreScaledOp | address.getImmediate() << LoadStoreScaledImmOffset | rs1(address.getBase()));
 956                 break;
 957             case IMMEDIATE_UNSCALED:
 958                 emitInt(memop | LoadStoreUnscaledOp | address.getImmediate() << LoadStoreUnscaledImmOffset | rs1(address.getBase()));
 959                 break;
 960             case BASE_REGISTER_ONLY:
 961                 emitInt(memop | LoadStoreScaledOp | rs1(address.getBase()));
 962                 break;
 963             case EXTENDED_REGISTER_OFFSET:
 964             case REGISTER_OFFSET:
 965                 ExtendType extendType = address.getAddressingMode() == AddressingMode.EXTENDED_REGISTER_OFFSET ? address.getExtendType() : ExtendType.UXTX;
 966                 boolean shouldScale = address.isScaled() && log2TransferSize != 0;
 967                 emitInt(memop | LoadStoreRegisterOp | rs2(address.getOffset()) | extendType.encoding << ExtendTypeOffset | (shouldScale ? 1 : 0) << LoadStoreScaledRegOffset | rs1(address.getBase()));
 968                 break;
 969             case PC_LITERAL:
 970                 assert log2TransferSize >= 2 : "PC literal loads only works for load/stores of 32-bit and larger";
 971                 transferSizeEncoding = (log2TransferSize - 2) << LoadStoreTransferSizeOffset;
 972                 emitInt(transferSizeEncoding | isFloat | LoadLiteralOp | rd(reg) | address.getImmediate() << LoadLiteralImmeOffset);
 973                 break;
 974             case IMMEDIATE_POST_INDEXED:
 975                 emitInt(memop | LoadStorePostIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
 976                 break;
 977             case IMMEDIATE_PRE_INDEXED:
 978                 emitInt(memop | LoadStorePreIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
 979                 break;
 980             default:
 981                 throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
 982         }
 983     }
 984 
 985     /**
 986      * Load Pair of Registers calculates an address from a base register value and an immediate
 987      * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
 988      * two registers.
 989      */
 990     public void ldp(int size, Register rt, Register rt2, AArch64Address address) {
 991         assert size == 32 || size == 64;
 992         loadStorePairInstruction(LDP, rt, rt2, address, generalFromSize(size));
 993     }
 994 
 995     /**
 996      * Store Pair of Registers calculates an address from a base register value and an immediate
 997      * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
 998      * two registers.
 999      */
1000     public void stp(int size, Register rt, Register rt2, AArch64Address address) {
1001         assert size == 32 || size == 64;
1002         loadStorePairInstruction(STP, rt, rt2, address, generalFromSize(size));
1003     }
1004 
1005     private void loadStorePairInstruction(Instruction instr, Register rt, Register rt2, AArch64Address address, InstructionType type) {
1006         int scaledOffset = maskField(7, address.getImmediateRaw());  // LDP/STP use a 7-bit scaled
1007                                                                      // offset
1008         int memop = type.encoding | instr.encoding | scaledOffset << LoadStorePairImm7Offset | rt2(rt2) | rn(address.getBase()) | rt(rt);
1009         switch (address.getAddressingMode()) {
1010             case IMMEDIATE_SCALED:
1011                 emitInt(memop | LoadStorePairOp | (0b010 << 23));
1012                 break;
1013             case IMMEDIATE_POST_INDEXED:
1014                 emitInt(memop | LoadStorePairOp | (0b001 << 23));
1015                 break;
1016             case IMMEDIATE_PRE_INDEXED:
1017                 emitInt(memop | LoadStorePairOp | (0b011 << 23));
1018                 break;
1019             default:
1020                 throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
1021         }
1022     }
1023 
1024     /* Load-Store Exclusive (5.3.6) */
1025 
1026     /**
1027      * Load address exclusive. Natural alignment of address is required.
1028      *
1029      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1030      * @param rt general purpose register. May not be null or stackpointer.
1031      * @param rn general purpose register.
1032      */
1033     protected void ldxr(int size, Register rt, Register rn) {
1034         assert size == 8 || size == 16 || size == 32 || size == 64;
1035         int transferSize = NumUtil.log2Ceil(size / 8);
1036         exclusiveLoadInstruction(LDXR, rt, rn, transferSize);
1037     }
1038 
1039     /**
1040      * Store address exclusive. Natural alignment of address is required. rs and rt may not point to
1041      * the same register.
1042      *
1043      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1044      * @param rs general purpose register. Set to exclusive access status. 0 means success,
1045      *            everything else failure. May not be null, or stackpointer.
1046      * @param rt general purpose register. May not be null or stackpointer.
1047      * @param rn general purpose register.
1048      */
1049     protected void stxr(int size, Register rs, Register rt, Register rn) {
1050         assert size == 8 || size == 16 || size == 32 || size == 64;
1051         int transferSize = NumUtil.log2Ceil(size / 8);
1052         exclusiveStoreInstruction(STXR, rs, rt, rn, transferSize);
1053     }
1054 
1055     /* Load-Acquire/Store-Release (5.3.7) */
1056 
1057     /* non exclusive access */
1058     /**
1059      * Load acquire. Natural alignment of address is required.
1060      *
1061      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1062      * @param rt general purpose register. May not be null or stackpointer.
1063      * @param rn general purpose register.
1064      */
1065     protected void ldar(int size, Register rt, Register rn) {
1066         assert size == 8 || size == 16 || size == 32 || size == 64;
1067         int transferSize = NumUtil.log2Ceil(size / 8);
1068         exclusiveLoadInstruction(LDAR, rt, rn, transferSize);
1069     }
1070 
1071     /**
1072      * Store-release. Natural alignment of address is required.
1073      *
1074      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1075      * @param rt general purpose register. May not be null or stackpointer.
1076      * @param rn general purpose register.
1077      */
1078     protected void stlr(int size, Register rt, Register rn) {
1079         assert size == 8 || size == 16 || size == 32 || size == 64;
1080         int transferSize = NumUtil.log2Ceil(size / 8);
1081         // Hack: Passing the zero-register means it is ignored when building the encoding.
1082         exclusiveStoreInstruction(STLR, r0, rt, rn, transferSize);
1083     }
1084 
1085     /* exclusive access */
1086     /**
1087      * Load acquire exclusive. Natural alignment of address is required.
1088      *
1089      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1090      * @param rt general purpose register. May not be null or stackpointer.
1091      * @param rn general purpose register.
1092      */
1093     public void ldaxr(int size, Register rt, Register rn) {
1094         assert size == 8 || size == 16 || size == 32 || size == 64;
1095         int transferSize = NumUtil.log2Ceil(size / 8);
1096         exclusiveLoadInstruction(LDAXR, rt, rn, transferSize);
1097     }
1098 
1099     /**
1100      * Store-release exclusive. Natural alignment of address is required. rs and rt may not point to
1101      * the same register.
1102      *
1103      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1104      * @param rs general purpose register. Set to exclusive access status. 0 means success,
1105      *            everything else failure. May not be null, or stackpointer.
1106      * @param rt general purpose register. May not be null or stackpointer.
1107      * @param rn general purpose register.
1108      */
1109     public void stlxr(int size, Register rs, Register rt, Register rn) {
1110         assert size == 8 || size == 16 || size == 32 || size == 64;
1111         int transferSize = NumUtil.log2Ceil(size / 8);
1112         exclusiveStoreInstruction(STLXR, rs, rt, rn, transferSize);
1113     }
1114 
1115     private void exclusiveLoadInstruction(Instruction instr, Register reg, Register rn, int log2TransferSize) {
1116         assert log2TransferSize >= 0 && log2TransferSize < 4;
1117         assert reg.getRegisterCategory().equals(CPU);
1118         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1119         emitInt(transferSizeEncoding | instr.encoding | 1 << ImmediateSizeOffset | rn(rn) | rt(reg));
1120     }
1121 
1122     /**
1123      * Stores data from rt into address and sets rs to the returned exclusive access status.
1124      *
1125      * @param rs general purpose register into which the exclusive access status is written. May not
1126      *            be null.
1127      * @param rt general purpose register containing data to be written to memory at address. May
1128      *            not be null
1129      * @param rn general purpose register containing the address specifying where rt is written to.
1130      * @param log2TransferSize log2Ceil of memory transfer size.
1131      */
1132     private void exclusiveStoreInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize) {
1133         assert log2TransferSize >= 0 && log2TransferSize < 4;
1134         assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
1135         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1136         emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt));
1137     }
1138 
1139     /* PC-relative Address Calculation (5.4.4) */
1140 
1141     /**
1142      * Address of page: sign extends 21-bit offset, shifts if left by 12 and adds it to the value of
1143      * the PC with its bottom 12-bits cleared, writing the result to dst.
1144      *
1145      * @param dst general purpose register. May not be null, zero-register or stackpointer.
1146      * @param imm Signed 33-bit offset with lower 12bits clear.
1147      */
1148     // protected void adrp(Register dst, long imm) {
1149     // assert (imm & NumUtil.getNbitNumberInt(12)) == 0 : "Lower 12-bit of immediate must be zero.";
1150     // assert NumUtil.isSignedNbit(33, imm);
1151     // addressCalculationInstruction(dst, (int) (imm >>> 12), Instruction.ADRP);
1152     // }
1153 
1154     /**
1155      * Adds a 21-bit signed offset to the program counter and writes the result to dst.
1156      *
1157      * @param dst general purpose register. May not be null, zero-register or stackpointer.
1158      * @param imm21 Signed 21-bit offset.
1159      */
1160     public void adr(Register dst, int imm21) {
1161         emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21));
1162     }
1163 
1164     public void adr(Register dst, int imm21, int pos) {
1165         emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21), pos);
1166     }
1167 
1168     private static int getPcRelativeImmEncoding(int imm21) {
1169         assert NumUtil.isSignedNbit(21, imm21);
1170         int imm = imm21 & NumUtil.getNbitNumberInt(21);
1171         // higher 19 bit
1172         int immHi = (imm >> 2) << PcRelImmHiOffset;
1173         // lower 2 bit
1174         int immLo = (imm & 0x3) << PcRelImmLoOffset;
1175         return immHi | immLo;
1176     }
1177 
1178     /* Arithmetic (Immediate) (5.4.1) */
1179 
1180     /**
1181      * dst = src + aimm.
1182      *
1183      * @param size register size. Has to be 32 or 64.
1184      * @param dst general purpose register. May not be null or zero-register.
1185      * @param src general purpose register. May not be null or zero-register.
1186      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1187      *            the lower 12-bit cleared.
1188      */
1189     protected void add(int size, Register dst, Register src, int aimm) {
1190         assert !dst.equals(zr);
1191         assert !src.equals(zr);
1192         addSubImmInstruction(ADD, dst, src, aimm, generalFromSize(size));
1193     }
1194 
1195     /**
1196      * dst = src + aimm and sets condition flags.
1197      *
1198      * @param size register size. Has to be 32 or 64.
1199      * @param dst general purpose register. May not be null or stackpointer.
1200      * @param src general purpose register. May not be null or zero-register.
1201      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1202      *            the lower 12-bit cleared.
1203      */
1204     protected void adds(int size, Register dst, Register src, int aimm) {
1205         assert !dst.equals(sp);
1206         assert !src.equals(zr);
1207         addSubImmInstruction(ADDS, dst, src, aimm, generalFromSize(size));
1208     }
1209 
1210     /**
1211      * dst = src - aimm.
1212      *
1213      * @param size register size. Has to be 32 or 64.
1214      * @param dst general purpose register. May not be null or zero-register.
1215      * @param src general purpose register. May not be null or zero-register.
1216      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1217      *            the lower 12-bit cleared.
1218      */
1219     protected void sub(int size, Register dst, Register src, int aimm) {
1220         assert !dst.equals(zr);
1221         assert !src.equals(zr);
1222         addSubImmInstruction(SUB, dst, src, aimm, generalFromSize(size));
1223     }
1224 
1225     /**
1226      * dst = src - aimm and sets condition flags.
1227      *
1228      * @param size register size. Has to be 32 or 64.
1229      * @param dst general purpose register. May not be null or stackpointer.
1230      * @param src general purpose register. May not be null or zero-register.
1231      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1232      *            the lower 12-bit cleared.
1233      */
1234     protected void subs(int size, Register dst, Register src, int aimm) {
1235         assert !dst.equals(sp);
1236         assert !src.equals(zr);
1237         addSubImmInstruction(SUBS, dst, src, aimm, generalFromSize(size));
1238     }
1239 
1240     private void addSubImmInstruction(Instruction instr, Register dst, Register src, int aimm, InstructionType type) {
1241         emitInt(type.encoding | instr.encoding | AddSubImmOp | encodeAimm(aimm) | rd(dst) | rs1(src));
1242     }
1243 
1244     /**
1245      * Encodes arithmetic immediate.
1246      *
1247      * @param imm Immediate has to be either an unsigned 12-bit value or an unsigned 24-bit value
1248      *            with the lower 12 bits zero.
1249      * @return Representation of immediate for use with arithmetic instructions.
1250      */
1251     private static int encodeAimm(int imm) {
1252         assert isAimm(imm) : "Immediate has to be legal arithmetic immediate value " + imm;
1253         if (NumUtil.isUnsignedNbit(12, imm)) {
1254             return imm << ImmediateOffset;
1255         } else {
1256             // First 12-bit are zero, so shift immediate 12-bit and set flag to indicate
1257             // shifted immediate value.
1258             return (imm >>> 12 << ImmediateOffset) | AddSubShift12;
1259         }
1260     }
1261 
1262     /**
1263      * Checks whether immediate can be encoded as an arithmetic immediate.
1264      *
1265      * @param imm Immediate has to be either an unsigned 12bit value or un unsigned 24bit value with
1266      *            the lower 12 bits 0.
1267      * @return true if valid arithmetic immediate, false otherwise.
1268      */
1269     protected static boolean isAimm(int imm) {
1270         return NumUtil.isUnsignedNbit(12, imm) || NumUtil.isUnsignedNbit(12, imm >>> 12) && (imm & 0xfff) == 0;
1271     }
1272 
1273     /* Logical (immediate) (5.4.2) */
1274 
1275     /**
1276      * dst = src & bimm.
1277      *
1278      * @param size register size. Has to be 32 or 64.
1279      * @param dst general purpose register. May not be null or zero-register.
1280      * @param src general purpose register. May not be null or stack-pointer.
1281      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1282      */
1283     public void and(int size, Register dst, Register src, long bimm) {
1284         assert !dst.equals(zr);
1285         assert !src.equals(sp);
1286         logicalImmInstruction(AND, dst, src, bimm, generalFromSize(size));
1287     }
1288 
1289     /**
1290      * dst = src & bimm and sets condition flags.
1291      *
1292      * @param size register size. Has to be 32 or 64.
1293      * @param dst general purpose register. May not be null or stack-pointer.
1294      * @param src general purpose register. May not be null or stack-pointer.
1295      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1296      */
1297     public void ands(int size, Register dst, Register src, long bimm) {
1298         assert !dst.equals(sp);
1299         assert !src.equals(sp);
1300         logicalImmInstruction(ANDS, dst, src, bimm, generalFromSize(size));
1301     }
1302 
1303     /**
1304      * dst = src ^ bimm.
1305      *
1306      * @param size register size. Has to be 32 or 64.
1307      * @param dst general purpose register. May not be null or zero-register.
1308      * @param src general purpose register. May not be null or stack-pointer.
1309      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1310      */
1311     public void eor(int size, Register dst, Register src, long bimm) {
1312         assert !dst.equals(zr);
1313         assert !src.equals(sp);
1314         logicalImmInstruction(EOR, dst, src, bimm, generalFromSize(size));
1315     }
1316 
1317     /**
1318      * dst = src | bimm.
1319      *
1320      * @param size register size. Has to be 32 or 64.
1321      * @param dst general purpose register. May not be null or zero-register.
1322      * @param src general purpose register. May not be null or stack-pointer.
1323      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1324      */
1325     protected void orr(int size, Register dst, Register src, long bimm) {
1326         assert !dst.equals(zr);
1327         assert !src.equals(sp);
1328         logicalImmInstruction(ORR, dst, src, bimm, generalFromSize(size));
1329     }
1330 
1331     private void logicalImmInstruction(Instruction instr, Register dst, Register src, long bimm, InstructionType type) {
1332         // Mask higher bits off, since we always pass longs around even for the 32-bit instruction.
1333         long bimmValue;
1334         if (type == General32) {
1335             assert (bimm >> 32) == 0 || (bimm >> 32) == -1L : "Higher order bits for 32-bit instruction must either all be 0 or 1.";
1336             bimmValue = bimm & NumUtil.getNbitNumberLong(32);
1337         } else {
1338             bimmValue = bimm;
1339         }
1340         int immEncoding = LogicalImmediateTable.getLogicalImmEncoding(type == General64, bimmValue);
1341         emitInt(type.encoding | instr.encoding | LogicalImmOp | immEncoding | rd(dst) | rs1(src));
1342     }
1343 
1344     /* Move (wide immediate) (5.4.3) */
1345 
1346     /**
1347      * dst = uimm16 << shiftAmt.
1348      *
1349      * @param size register size. Has to be 32 or 64.
1350      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1351      * @param uimm16 16-bit unsigned immediate
1352      * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
1353      *            than size.
1354      */
1355     protected void movz(int size, Register dst, int uimm16, int shiftAmt) {
1356         moveWideImmInstruction(MOVZ, dst, uimm16, shiftAmt, generalFromSize(size));
1357     }
1358 
1359     /**
1360      * dst = ~(uimm16 << shiftAmt).
1361      *
1362      * @param size register size. Has to be 32 or 64.
1363      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1364      * @param uimm16 16-bit unsigned immediate
1365      * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
1366      *            than size.
1367      */
1368     protected void movn(int size, Register dst, int uimm16, int shiftAmt) {
1369         moveWideImmInstruction(MOVN, dst, uimm16, shiftAmt, generalFromSize(size));
1370     }
1371 
1372     /**
1373      * dst<pos+15:pos> = uimm16.
1374      *
1375      * @param size register size. Has to be 32 or 64.
1376      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1377      * @param uimm16 16-bit unsigned immediate
1378      * @param pos position into which uimm16 is inserted. Can be any multiple of 16 smaller than
1379      *            size.
1380      */
1381     protected void movk(int size, Register dst, int uimm16, int pos) {
1382         moveWideImmInstruction(MOVK, dst, uimm16, pos, generalFromSize(size));
1383     }
1384 
1385     private void moveWideImmInstruction(Instruction instr, Register dst, int uimm16, int shiftAmt, InstructionType type) {
1386         assert dst.getRegisterCategory().equals(CPU);
1387         assert NumUtil.isUnsignedNbit(16, uimm16) : "Immediate has to be unsigned 16bit";
1388         assert shiftAmt == 0 || shiftAmt == 16 || (type == InstructionType.General64 && (shiftAmt == 32 || shiftAmt == 48)) : "Invalid shift amount: " + shiftAmt;
1389         int shiftValue = shiftAmt >> 4;
1390         emitInt(type.encoding | instr.encoding | MoveWideImmOp | rd(dst) | uimm16 << MoveWideImmOffset | shiftValue << MoveWideShiftOffset);
1391     }
1392 
1393     /* Bitfield Operations (5.4.5) */
1394 
1395     /**
1396      * Bitfield move.
1397      *
1398      * @param size register size. Has to be 32 or 64.
1399      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1400      * @param src general purpose register. May not be null, stackpointer or zero-register.
1401      * @param r must be in the range 0 to size - 1
1402      * @param s must be in the range 0 to size - 1
1403      */
1404     protected void bfm(int size, Register dst, Register src, int r, int s) {
1405         bitfieldInstruction(BFM, dst, src, r, s, generalFromSize(size));
1406     }
1407 
1408     /**
1409      * Unsigned bitfield move.
1410      *
1411      * @param size register size. Has to be 32 or 64.
1412      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1413      * @param src general purpose register. May not be null, stackpointer or zero-register.
1414      * @param r must be in the range 0 to size - 1
1415      * @param s must be in the range 0 to size - 1
1416      */
1417     protected void ubfm(int size, Register dst, Register src, int r, int s) {
1418         bitfieldInstruction(UBFM, dst, src, r, s, generalFromSize(size));
1419     }
1420 
1421     /**
1422      * Signed bitfield move.
1423      *
1424      * @param size register size. Has to be 32 or 64.
1425      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1426      * @param src general purpose register. May not be null, stackpointer or zero-register.
1427      * @param r must be in the range 0 to size - 1
1428      * @param s must be in the range 0 to size - 1
1429      */
1430     protected void sbfm(int size, Register dst, Register src, int r, int s) {
1431         bitfieldInstruction(SBFM, dst, src, r, s, generalFromSize(size));
1432     }
1433 
1434     private void bitfieldInstruction(Instruction instr, Register dst, Register src, int r, int s, InstructionType type) {
1435         assert !dst.equals(sp) && !dst.equals(zr);
1436         assert !src.equals(sp) && !src.equals(zr);
1437         assert s >= 0 && s < type.width && r >= 0 && r < type.width;
1438         int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
1439         emitInt(type.encoding | instr.encoding | BitfieldImmOp | sf | r << ImmediateRotateOffset | s << ImmediateOffset | rd(dst) | rs1(src));
1440     }
1441 
1442     /* Extract (Immediate) (5.4.6) */
1443 
1444     /**
1445      * Extract. dst = src1:src2<lsb+31:lsb>
1446      *
1447      * @param size register size. Has to be 32 or 64.
1448      * @param dst general purpose register. May not be null or stackpointer.
1449      * @param src1 general purpose register. May not be null or stackpointer.
1450      * @param src2 general purpose register. May not be null or stackpointer.
1451      * @param lsb must be in range 0 to size - 1.
1452      */
1453     protected void extr(int size, Register dst, Register src1, Register src2, int lsb) {
1454         assert !dst.equals(sp);
1455         assert !src1.equals(sp);
1456         assert !src2.equals(sp);
1457         InstructionType type = generalFromSize(size);
1458         assert lsb >= 0 && lsb < type.width;
1459         int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
1460         emitInt(type.encoding | EXTR.encoding | sf | lsb << ImmediateOffset | rd(dst) | rs1(src1) | rs2(src2));
1461     }
1462 
1463     /* Arithmetic (shifted register) (5.5.1) */
1464 
1465     /**
1466      * dst = src1 + shiftType(src2, imm).
1467      *
1468      * @param size register size. Has to be 32 or 64.
1469      * @param dst general purpose register. May not be null or stackpointer.
1470      * @param src1 general purpose register. May not be null or stackpointer.
1471      * @param src2 general purpose register. May not be null or stackpointer.
1472      * @param shiftType any type but ROR.
1473      * @param imm must be in range 0 to size - 1.
1474      */
1475     protected void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1476         addSubShiftedInstruction(ADD, dst, src1, src2, shiftType, imm, generalFromSize(size));
1477     }
1478 
1479     /**
1480      * dst = src1 + shiftType(src2, imm) and sets condition flags.
1481      *
1482      * @param size register size. Has to be 32 or 64.
1483      * @param dst general purpose register. May not be null or stackpointer.
1484      * @param src1 general purpose register. May not be null or stackpointer.
1485      * @param src2 general purpose register. May not be null or stackpointer.
1486      * @param shiftType any type but ROR.
1487      * @param imm must be in range 0 to size - 1.
1488      */
1489     public void adds(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1490         addSubShiftedInstruction(ADDS, dst, src1, src2, shiftType, imm, generalFromSize(size));
1491     }
1492 
1493     /**
1494      * dst = src1 - shiftType(src2, imm).
1495      *
1496      * @param size register size. Has to be 32 or 64.
1497      * @param dst general purpose register. May not be null or stackpointer.
1498      * @param src1 general purpose register. May not be null or stackpointer.
1499      * @param src2 general purpose register. May not be null or stackpointer.
1500      * @param shiftType any type but ROR.
1501      * @param imm must be in range 0 to size - 1.
1502      */
1503     protected void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1504         addSubShiftedInstruction(SUB, dst, src1, src2, shiftType, imm, generalFromSize(size));
1505     }
1506 
1507     /**
1508      * dst = src1 - shiftType(src2, imm) and sets condition flags.
1509      *
1510      * @param size register size. Has to be 32 or 64.
1511      * @param dst general purpose register. May not be null or stackpointer.
1512      * @param src1 general purpose register. May not be null or stackpointer.
1513      * @param src2 general purpose register. May not be null or stackpointer.
1514      * @param shiftType any type but ROR.
1515      * @param imm must be in range 0 to size - 1.
1516      */
1517     public void subs(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1518         addSubShiftedInstruction(SUBS, dst, src1, src2, shiftType, imm, generalFromSize(size));
1519     }
1520 
1521     private void addSubShiftedInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int imm, InstructionType type) {
1522         assert shiftType != ShiftType.ROR;
1523         assert imm >= 0 && imm < type.width;
1524         emitInt(type.encoding | instr.encoding | AddSubShiftedOp | imm << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1525     }
1526 
1527     /* Arithmetic (extended register) (5.5.2) */
1528     /**
1529      * dst = src1 + extendType(src2) << imm.
1530      *
1531      * @param size register size. Has to be 32 or 64.
1532      * @param dst general purpose register. May not be null or zero-register..
1533      * @param src1 general purpose register. May not be null or zero-register.
1534      * @param src2 general purpose register. May not be null or stackpointer.
1535      * @param extendType defines how src2 is extended to the same size as src1.
1536      * @param shiftAmt must be in range 0 to 4.
1537      */
1538     public void add(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1539         assert !dst.equals(zr);
1540         assert !src1.equals(zr);
1541         assert !src2.equals(sp);
1542         addSubExtendedInstruction(ADD, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1543     }
1544 
1545     /**
1546      * dst = src1 + extendType(src2) << imm and sets condition flags.
1547      *
1548      * @param size register size. Has to be 32 or 64.
1549      * @param dst general purpose register. May not be null or stackpointer..
1550      * @param src1 general purpose register. May not be null or zero-register.
1551      * @param src2 general purpose register. May not be null or stackpointer.
1552      * @param extendType defines how src2 is extended to the same size as src1.
1553      * @param shiftAmt must be in range 0 to 4.
1554      */
1555     protected void adds(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1556         assert !dst.equals(sp);
1557         assert !src1.equals(zr);
1558         assert !src2.equals(sp);
1559         addSubExtendedInstruction(ADDS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1560     }
1561 
1562     /**
1563      * dst = src1 - extendType(src2) << imm.
1564      *
1565      * @param size register size. Has to be 32 or 64.
1566      * @param dst general purpose register. May not be null or zero-register..
1567      * @param src1 general purpose register. May not be null or zero-register.
1568      * @param src2 general purpose register. May not be null or stackpointer.
1569      * @param extendType defines how src2 is extended to the same size as src1.
1570      * @param shiftAmt must be in range 0 to 4.
1571      */
1572     protected void sub(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1573         assert !dst.equals(zr);
1574         assert !src1.equals(zr);
1575         assert !src2.equals(sp);
1576         addSubExtendedInstruction(SUB, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1577     }
1578 
1579     /**
1580      * dst = src1 - extendType(src2) << imm and sets flags.
1581      *
1582      * @param size register size. Has to be 32 or 64.
1583      * @param dst general purpose register. May not be null or stackpointer..
1584      * @param src1 general purpose register. May not be null or zero-register.
1585      * @param src2 general purpose register. May not be null or stackpointer.
1586      * @param extendType defines how src2 is extended to the same size as src1.
1587      * @param shiftAmt must be in range 0 to 4.
1588      */
1589     public void subs(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1590         assert !dst.equals(sp);
1591         assert !src1.equals(zr);
1592         assert !src2.equals(sp);
1593         addSubExtendedInstruction(SUBS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1594     }
1595 
1596     private void addSubExtendedInstruction(Instruction instr, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt, InstructionType type) {
1597         assert shiftAmt >= 0 && shiftAmt <= 4;
1598         emitInt(type.encoding | instr.encoding | AddSubExtendedOp | shiftAmt << ImmediateOffset | extendType.encoding << ExtendTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1599     }
1600 
1601     /* Logical (shifted register) (5.5.3) */
1602     /**
1603      * dst = src1 & shiftType(src2, imm).
1604      *
1605      * @param size register size. Has to be 32 or 64.
1606      * @param dst general purpose register. May not be null or stackpointer.
1607      * @param src1 general purpose register. May not be null or stackpointer.
1608      * @param src2 general purpose register. May not be null or stackpointer.
1609      * @param shiftType all types allowed, may not be null.
1610      * @param shiftAmt must be in range 0 to size - 1.
1611      */
1612     protected void and(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1613         logicalRegInstruction(AND, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1614     }
1615 
1616     /**
1617      * dst = src1 & shiftType(src2, imm) and sets condition flags.
1618      *
1619      * @param size register size. Has to be 32 or 64.
1620      * @param dst general purpose register. May not be null or stackpointer.
1621      * @param src1 general purpose register. May not be null or stackpointer.
1622      * @param src2 general purpose register. May not be null or stackpointer.
1623      * @param shiftType all types allowed, may not be null.
1624      * @param shiftAmt must be in range 0 to size - 1.
1625      */
1626     protected void ands(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1627         logicalRegInstruction(ANDS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1628     }
1629 
1630     /**
1631      * dst = src1 & ~(shiftType(src2, imm)).
1632      *
1633      * @param size register size. Has to be 32 or 64.
1634      * @param dst general purpose register. May not be null or stackpointer.
1635      * @param src1 general purpose register. May not be null or stackpointer.
1636      * @param src2 general purpose register. May not be null or stackpointer.
1637      * @param shiftType all types allowed, may not be null.
1638      * @param shiftAmt must be in range 0 to size - 1.
1639      */
1640     protected void bic(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1641         logicalRegInstruction(BIC, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1642     }
1643 
1644     /**
1645      * dst = src1 & ~(shiftType(src2, imm)) and sets condition flags.
1646      *
1647      * @param size register size. Has to be 32 or 64.
1648      * @param dst general purpose register. May not be null or stackpointer.
1649      * @param src1 general purpose register. May not be null or stackpointer.
1650      * @param src2 general purpose register. May not be null or stackpointer.
1651      * @param shiftType all types allowed, may not be null.
1652      * @param shiftAmt must be in range 0 to size - 1.
1653      */
1654     protected void bics(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1655         logicalRegInstruction(BICS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1656     }
1657 
1658     /**
1659      * dst = src1 ^ ~(shiftType(src2, imm)).
1660      *
1661      * @param size register size. Has to be 32 or 64.
1662      * @param dst general purpose register. May not be null or stackpointer.
1663      * @param src1 general purpose register. May not be null or stackpointer.
1664      * @param src2 general purpose register. May not be null or stackpointer.
1665      * @param shiftType all types allowed, may not be null.
1666      * @param shiftAmt must be in range 0 to size - 1.
1667      */
1668     protected void eon(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1669         logicalRegInstruction(EON, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1670     }
1671 
1672     /**
1673      * dst = src1 ^ shiftType(src2, imm).
1674      *
1675      * @param size register size. Has to be 32 or 64.
1676      * @param dst general purpose register. May not be null or stackpointer.
1677      * @param src1 general purpose register. May not be null or stackpointer.
1678      * @param src2 general purpose register. May not be null or stackpointer.
1679      * @param shiftType all types allowed, may not be null.
1680      * @param shiftAmt must be in range 0 to size - 1.
1681      */
1682     protected void eor(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1683         logicalRegInstruction(EOR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1684     }
1685 
1686     /**
1687      * dst = src1 | shiftType(src2, imm).
1688      *
1689      * @param size register size. Has to be 32 or 64.
1690      * @param dst general purpose register. May not be null or stackpointer.
1691      * @param src1 general purpose register. May not be null or stackpointer.
1692      * @param src2 general purpose register. May not be null or stackpointer.
1693      * @param shiftType all types allowed, may not be null.
1694      * @param shiftAmt must be in range 0 to size - 1.
1695      */
1696     protected void orr(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1697         logicalRegInstruction(ORR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1698     }
1699 
1700     /**
1701      * dst = src1 | ~(shiftType(src2, imm)).
1702      *
1703      * @param size register size. Has to be 32 or 64.
1704      * @param dst general purpose register. May not be null or stackpointer.
1705      * @param src1 general purpose register. May not be null or stackpointer.
1706      * @param src2 general purpose register. May not be null or stackpointer.
1707      * @param shiftType all types allowed, may not be null.
1708      * @param shiftAmt must be in range 0 to size - 1.
1709      */
1710     protected void orn(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1711         logicalRegInstruction(ORN, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1712     }
1713 
1714     private void logicalRegInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt, InstructionType type) {
1715         assert !dst.equals(sp);
1716         assert !src1.equals(sp);
1717         assert !src2.equals(sp);
1718         assert shiftAmt >= 0 && shiftAmt < type.width;
1719         emitInt(type.encoding | instr.encoding | LogicalShiftOp | shiftAmt << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1720     }
1721 
1722     /* Variable Shift (5.5.4) */
1723     /**
1724      * dst = src1 >> (src2 & log2(size)).
1725      *
1726      * @param size register size. Has to be 32 or 64.
1727      * @param dst general purpose register. May not be null or stackpointer.
1728      * @param src1 general purpose register. May not be null or stackpointer.
1729      * @param src2 general purpose register. May not be null or stackpointer.
1730      */
1731     protected void asr(int size, Register dst, Register src1, Register src2) {
1732         dataProcessing2SourceOp(ASRV, dst, src1, src2, generalFromSize(size));
1733     }
1734 
1735     /**
1736      * dst = src1 << (src2 & log2(size)).
1737      *
1738      * @param size register size. Has to be 32 or 64.
1739      * @param dst general purpose register. May not be null or stackpointer.
1740      * @param src1 general purpose register. May not be null or stackpointer.
1741      * @param src2 general purpose register. May not be null or stackpointer.
1742      */
1743     protected void lsl(int size, Register dst, Register src1, Register src2) {
1744         dataProcessing2SourceOp(LSLV, dst, src1, src2, generalFromSize(size));
1745     }
1746 
1747     /**
1748      * dst = src1 >>> (src2 & log2(size)).
1749      *
1750      * @param size register size. Has to be 32 or 64.
1751      * @param dst general purpose register. May not be null or stackpointer.
1752      * @param src1 general purpose register. May not be null or stackpointer.
1753      * @param src2 general purpose register. May not be null or stackpointer.
1754      */
1755     protected void lsr(int size, Register dst, Register src1, Register src2) {
1756         dataProcessing2SourceOp(LSRV, dst, src1, src2, generalFromSize(size));
1757     }
1758 
1759     /**
1760      * dst = rotateRight(src1, (src2 & log2(size))).
1761      *
1762      * @param size register size. Has to be 32 or 64.
1763      * @param dst general purpose register. May not be null or stackpointer.
1764      * @param src1 general purpose register. May not be null or stackpointer.
1765      * @param src2 general purpose register. May not be null or stackpointer.
1766      */
1767     protected void ror(int size, Register dst, Register src1, Register src2) {
1768         dataProcessing2SourceOp(RORV, dst, src1, src2, generalFromSize(size));
1769     }
1770 
1771     /* Bit Operations (5.5.5) */
1772 
1773     /**
1774      * Counts leading sign bits. Sets Wd to the number of consecutive bits following the topmost bit
1775      * in dst, that are the same as the topmost bit. The count does not include the topmost bit
1776      * itself , so the result will be in the range 0 to size-1 inclusive.
1777      *
1778      * @param size register size. Has to be 32 or 64.
1779      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1780      * @param src source register. May not be null, zero-register or the stackpointer.
1781      */
1782     protected void cls(int size, Register dst, Register src) {
1783         dataProcessing1SourceOp(CLS, dst, src, generalFromSize(size));
1784     }
1785 
1786     /**
1787      * Counts leading zeros.
1788      *
1789      * @param size register size. Has to be 32 or 64.
1790      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1791      * @param src source register. May not be null, zero-register or the stackpointer.
1792      */
1793     public void clz(int size, Register dst, Register src) {
1794         dataProcessing1SourceOp(CLZ, dst, src, generalFromSize(size));
1795     }
1796 
1797     /**
1798      * Reverses bits.
1799      *
1800      * @param size register size. Has to be 32 or 64.
1801      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1802      * @param src source register. May not be null, zero-register or the stackpointer.
1803      */
1804     public void rbit(int size, Register dst, Register src) {
1805         dataProcessing1SourceOp(RBIT, dst, src, generalFromSize(size));
1806     }
1807 
1808     /**
1809      * Reverses bytes.
1810      *
1811      * @param size register size. Has to be 32 or 64.
1812      * @param dst general purpose register. May not be null or the stackpointer.
1813      * @param src source register. May not be null or the stackpointer.
1814      */
1815     public void rev(int size, Register dst, Register src) {
1816         if (size == 64) {
1817             dataProcessing1SourceOp(REVX, dst, src, generalFromSize(size));
1818         } else {
1819             assert size == 32;
1820             dataProcessing1SourceOp(REVW, dst, src, generalFromSize(size));
1821         }
1822     }
1823 
1824     /* Conditional Data Processing (5.5.6) */
1825 
1826     /**
1827      * Conditional select. dst = src1 if condition else src2.
1828      *
1829      * @param size register size. Has to be 32 or 64.
1830      * @param dst general purpose register. May not be null or the stackpointer.
1831      * @param src1 general purpose register. May not be null or the stackpointer.
1832      * @param src2 general purpose register. May not be null or the stackpointer.
1833      * @param condition any condition flag. May not be null.
1834      */
1835     protected void csel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
1836         conditionalSelectInstruction(CSEL, dst, src1, src2, condition, generalFromSize(size));
1837     }
1838 
1839     /**
1840      * Conditional select negate. dst = src1 if condition else -src2.
1841      *
1842      * @param size register size. Has to be 32 or 64.
1843      * @param dst general purpose register. May not be null or the stackpointer.
1844      * @param src1 general purpose register. May not be null or the stackpointer.
1845      * @param src2 general purpose register. May not be null or the stackpointer.
1846      * @param condition any condition flag. May not be null.
1847      */
1848     protected void csneg(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
1849         conditionalSelectInstruction(CSNEG, dst, src1, src2, condition, generalFromSize(size));
1850     }
1851 
1852     /**
1853      * Conditional increase. dst = src1 if condition else src2 + 1.
1854      *
1855      * @param size register size. Has to be 32 or 64.
1856      * @param dst general purpose register. May not be null or the stackpointer.
1857      * @param src1 general purpose register. May not be null or the stackpointer.
1858      * @param src2 general purpose register. May not be null or the stackpointer.
1859      * @param condition any condition flag. May not be null.
1860      */
1861     protected void csinc(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
1862         conditionalSelectInstruction(CSINC, dst, src1, src2, condition, generalFromSize(size));
1863     }
1864 
1865     private void conditionalSelectInstruction(Instruction instr, Register dst, Register src1, Register src2, ConditionFlag condition, InstructionType type) {
1866         assert !dst.equals(sp);
1867         assert !src1.equals(sp);
1868         assert !src2.equals(sp);
1869         emitInt(type.encoding | instr.encoding | ConditionalSelectOp | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
1870     }
1871 
1872     /* Integer Multiply/Divide (5.6) */
1873 
1874     /**
1875      * dst = src1 * src2 + src3.
1876      *
1877      * @param size register size. Has to be 32 or 64.
1878      * @param dst general purpose register. May not be null or the stackpointer.
1879      * @param src1 general purpose register. May not be null or the stackpointer.
1880      * @param src2 general purpose register. May not be null or the stackpointer.
1881      * @param src3 general purpose register. May not be null or the stackpointer.
1882      */
1883     protected void madd(int size, Register dst, Register src1, Register src2, Register src3) {
1884         mulInstruction(MADD, dst, src1, src2, src3, generalFromSize(size));
1885     }
1886 
1887     /**
1888      * dst = src3 - src1 * src2.
1889      *
1890      * @param size register size. Has to be 32 or 64.
1891      * @param dst general purpose register. May not be null or the stackpointer.
1892      * @param src1 general purpose register. May not be null or the stackpointer.
1893      * @param src2 general purpose register. May not be null or the stackpointer.
1894      * @param src3 general purpose register. May not be null or the stackpointer.
1895      */
1896     protected void msub(int size, Register dst, Register src1, Register src2, Register src3) {
1897         mulInstruction(MSUB, dst, src1, src2, src3, generalFromSize(size));
1898     }
1899 
1900     /**
1901      * Signed multiply high. dst = (src1 * src2)[127:64]
1902      *
1903      * @param dst general purpose register. May not be null or the stackpointer.
1904      * @param src1 general purpose register. May not be null or the stackpointer.
1905      * @param src2 general purpose register. May not be null or the stackpointer.
1906      */
1907     protected void smulh(Register dst, Register src1, Register src2) {
1908         assert !dst.equals(sp);
1909         assert !src1.equals(sp);
1910         assert !src2.equals(sp);
1911         emitInt(0b10011011010 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
1912     }
1913 
1914     /**
1915      * unsigned multiply high. dst = (src1 * src2)[127:64]
1916      *
1917      * @param dst general purpose register. May not be null or the stackpointer.
1918      * @param src1 general purpose register. May not be null or the stackpointer.
1919      * @param src2 general purpose register. May not be null or the stackpointer.
1920      */
1921     protected void umulh(Register dst, Register src1, Register src2) {
1922         assert !dst.equals(sp);
1923         assert !src1.equals(sp);
1924         assert !src2.equals(sp);
1925         emitInt(0b10011011110 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
1926     }
1927 
1928     /**
1929      * unsigned multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
1930      *
1931      * @param dst general purpose register. May not be null or the stackpointer.
1932      * @param src1 general purpose register. May not be null or the stackpointer.
1933      * @param src2 general purpose register. May not be null or the stackpointer.
1934      * @param src3 general purpose register. May not be null or the stackpointer.
1935      */
1936     protected void umaddl(Register dst, Register src1, Register src2, Register src3) {
1937         assert !dst.equals(sp);
1938         assert !src1.equals(sp);
1939         assert !src2.equals(sp);
1940         assert !src3.equals(sp);
1941         emitInt(0b10011011101 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
1942     }
1943 
1944     /**
1945      * signed multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
1946      *
1947      * @param dst general purpose register. May not be null or the stackpointer.
1948      * @param src1 general purpose register. May not be null or the stackpointer.
1949      * @param src2 general purpose register. May not be null or the stackpointer.
1950      * @param src3 general purpose register. May not be null or the stackpointer.
1951      */
1952     public void smaddl(Register dst, Register src1, Register src2, Register src3) {
1953         assert !dst.equals(sp);
1954         assert !src1.equals(sp);
1955         assert !src2.equals(sp);
1956         assert !src3.equals(sp);
1957         emitInt(0b10011011001 << 21 | dst.encoding | rs1(src1) | rs2(src2) | rs3(src3));
1958     }
1959 
1960     private void mulInstruction(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
1961         assert !dst.equals(sp);
1962         assert !src1.equals(sp);
1963         assert !src2.equals(sp);
1964         assert !src3.equals(sp);
1965         emitInt(type.encoding | instr.encoding | MulOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
1966     }
1967 
1968     /**
1969      * Signed divide. dst = src1 / src2.
1970      *
1971      * @param size register size. Has to be 32 or 64.
1972      * @param dst general purpose register. May not be null or the stackpointer.
1973      * @param src1 general purpose register. May not be null or the stackpointer.
1974      * @param src2 general purpose register. May not be null or the stackpointer.
1975      */
1976     public void sdiv(int size, Register dst, Register src1, Register src2) {
1977         dataProcessing2SourceOp(SDIV, dst, src1, src2, generalFromSize(size));
1978     }
1979 
1980     /**
1981      * Unsigned divide. dst = src1 / src2.
1982      *
1983      * @param size register size. Has to be 32 or 64.
1984      * @param dst general purpose register. May not be null or the stackpointer.
1985      * @param src1 general purpose register. May not be null or the stackpointer.
1986      * @param src2 general purpose register. May not be null or the stackpointer.
1987      */
1988     public void udiv(int size, Register dst, Register src1, Register src2) {
1989         dataProcessing2SourceOp(UDIV, dst, src1, src2, generalFromSize(size));
1990     }
1991 
1992     private void dataProcessing1SourceOp(Instruction instr, Register dst, Register src, InstructionType type) {
1993         emitInt(type.encoding | instr.encoding | DataProcessing1SourceOp | rd(dst) | rs1(src));
1994     }
1995 
1996     private void dataProcessing2SourceOp(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
1997         assert !dst.equals(sp);
1998         assert !src1.equals(sp);
1999         assert !src2.equals(sp);
2000         emitInt(type.encoding | instr.encoding | DataProcessing2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
2001     }
2002 
2003     /* Floating point operations */
2004 
2005     /* Load-Store Single FP register (5.7.1.1) */
2006     /**
2007      * Floating point load.
2008      *
2009      * @param size number of bits read from memory into rt. Must be 32 or 64.
2010      * @param rt floating point register. May not be null.
2011      * @param address all addressing modes allowed. May not be null.
2012      */
2013     public void fldr(int size, Register rt, AArch64Address address) {
2014         assert rt.getRegisterCategory().equals(SIMD);
2015         assert size == 32 || size == 64;
2016         int transferSize = NumUtil.log2Ceil(size / 8);
2017         loadStoreInstruction(LDR, rt, address, InstructionType.FP32, transferSize);
2018     }
2019 
2020     /**
2021      * Floating point store.
2022      *
2023      * @param size number of bits read from memory into rt. Must be 32 or 64.
2024      * @param rt floating point register. May not be null.
2025      * @param address all addressing modes allowed. May not be null.
2026      */
2027     public void fstr(int size, Register rt, AArch64Address address) {
2028         assert rt.getRegisterCategory().equals(SIMD);
2029         assert size == 32 || size == 64;
2030         int transferSize = NumUtil.log2Ceil(size / 8);
2031         loadStoreInstruction(STR, rt, address, InstructionType.FP64, transferSize);
2032     }
2033 
2034     /* Floating-point Move (register) (5.7.2) */
2035 
2036     /**
2037      * Floating point move.
2038      *
2039      * @param size register size. Has to be 32 or 64.
2040      * @param dst floating point register. May not be null.
2041      * @param src floating point register. May not be null.
2042      */
2043     protected void fmov(int size, Register dst, Register src) {
2044         fpDataProcessing1Source(FMOV, dst, src, floatFromSize(size));
2045     }
2046 
2047     /**
2048      * Move size bits from floating point register unchanged to general purpose register.
2049      *
2050      * @param size number of bits read from memory into rt. Must be 32 or 64.
2051      * @param dst general purpose register. May not be null, stack-pointer or zero-register
2052      * @param src floating point register. May not be null.
2053      */
2054     protected void fmovFpu2Cpu(int size, Register dst, Register src) {
2055         assert dst.getRegisterCategory().equals(CPU);
2056         assert src.getRegisterCategory().equals(SIMD);
2057         fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVFPU2CPU);
2058     }
2059 
2060     /**
2061      * Move size bits from general purpose register unchanged to floating point register.
2062      *
2063      * @param size register size. Has to be 32 or 64.
2064      * @param dst floating point register. May not be null.
2065      * @param src general purpose register. May not be null or stack-pointer.
2066      */
2067     protected void fmovCpu2Fpu(int size, Register dst, Register src) {
2068         assert dst.getRegisterCategory().equals(SIMD);
2069         assert src.getRegisterCategory().equals(CPU);
2070         fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVCPU2FPU);
2071     }
2072 
2073     private void fmovCpuFpuInstruction(Register dst, Register src, boolean is64bit, Instruction instr) {
2074         int sf = is64bit ? FP64.encoding | General64.encoding : FP32.encoding | General32.encoding;
2075         emitInt(sf | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
2076     }
2077 
2078     /* Floating-point Move (immediate) (5.7.3) */
2079 
2080     /**
2081      * Move immediate into register.
2082      *
2083      * @param size register size. Has to be 32 or 64.
2084      * @param dst floating point register. May not be null.
2085      * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
2086      *            loaded, i.e. (float) imm == imm must be true. In all cases
2087      *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
2088      *            depending on size.
2089      */
2090     protected void fmov(int size, Register dst, double imm) {
2091         assert dst.getRegisterCategory().equals(SIMD);
2092         InstructionType type = floatFromSize(size);
2093         int immEncoding;
2094         if (type == FP64) {
2095             immEncoding = getDoubleImmediate(imm);
2096         } else {
2097             assert imm == (float) imm : "float mov must use an immediate that can be represented using a float.";
2098             immEncoding = getFloatImmediate((float) imm);
2099         }
2100         emitInt(type.encoding | FMOV.encoding | FpImmOp | immEncoding | rd(dst));
2101     }
2102 
2103     private static int getDoubleImmediate(double imm) {
2104         assert isDoubleImmediate(imm);
2105         // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
2106         // 0000.0000.0000.0000.0000.0000.0000.0000
2107         long repr = Double.doubleToRawLongBits(imm);
2108         int a = (int) (repr >>> 63) << 7;
2109         int b = (int) ((repr >>> 61) & 0x1) << 6;
2110         int cToH = (int) (repr >>> 48) & 0x3f;
2111         return (a | b | cToH) << FpImmOffset;
2112     }
2113 
2114     protected static boolean isDoubleImmediate(double imm) {
2115         // Valid values will have the form:
2116         // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
2117         // 0000.0000.0000.0000.0000.0000.0000.0000
2118         long bits = Double.doubleToRawLongBits(imm);
2119         // lower 48 bits are cleared
2120         if ((bits & NumUtil.getNbitNumberLong(48)) != 0) {
2121             return false;
2122         }
2123         // bits[61..54] are all set or all cleared.
2124         long pattern = (bits >> 54) & NumUtil.getNbitNumberLong(7);
2125         if (pattern != 0 && pattern != NumUtil.getNbitNumberLong(7)) {
2126             return false;
2127         }
2128         // bits[62] and bits[61] are opposites.
2129         return ((bits ^ (bits << 1)) & (1L << 62)) != 0;
2130     }
2131 
2132     private static int getFloatImmediate(float imm) {
2133         assert isFloatImmediate(imm);
2134         // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
2135         int repr = Float.floatToRawIntBits(imm);
2136         int a = (repr >>> 31) << 7;
2137         int b = ((repr >>> 29) & 0x1) << 6;
2138         int cToH = (repr >>> 19) & NumUtil.getNbitNumberInt(6);
2139         return (a | b | cToH) << FpImmOffset;
2140     }
2141 
2142     protected static boolean isFloatImmediate(float imm) {
2143         // Valid values will have the form:
2144         // aBbb.bbbc.defg.h000.0000.0000.0000.0000
2145         int bits = Float.floatToRawIntBits(imm);
2146         // lower 20 bits are cleared.
2147         if ((bits & NumUtil.getNbitNumberInt(19)) != 0) {
2148             return false;
2149         }
2150         // bits[29..25] are all set or all cleared
2151         int pattern = (bits >> 25) & NumUtil.getNbitNumberInt(5);
2152         if (pattern != 0 && pattern != NumUtil.getNbitNumberInt(5)) {
2153             return false;
2154         }
2155         // bits[29] and bits[30] have to be opposite
2156         return ((bits ^ (bits << 1)) & (1 << 30)) != 0;
2157     }
2158 
2159     /* Convert Floating-point Precision (5.7.4.1) */
2160     /* Converts float to double and vice-versa */
2161 
2162     /**
2163      * Convert float to double and vice-versa.
2164      *
2165      * @param srcSize size of source register in bits.
2166      * @param dst floating point register. May not be null.
2167      * @param src floating point register. May not be null.
2168      */
2169     public void fcvt(int srcSize, Register dst, Register src) {
2170         if (srcSize == 32) {
2171             fpDataProcessing1Source(FCVTDS, dst, src, floatFromSize(srcSize));
2172         } else {
2173             fpDataProcessing1Source(FCVTSD, dst, src, floatFromSize(srcSize));
2174         }
2175     }
2176 
2177     /* Convert to Integer (5.7.4.2) */
2178 
2179     /**
2180      * Convert floating point to integer. Rounds towards zero.
2181      *
2182      * @param targetSize size of integer register. 32 or 64.
2183      * @param srcSize size of floating point register. 32 or 64.
2184      * @param dst general purpose register. May not be null, the zero-register or the stackpointer.
2185      * @param src floating point register. May not be null.
2186      */
2187     public void fcvtzs(int targetSize, int srcSize, Register dst, Register src) {
2188         assert !dst.equals(zr) && !dst.equals(sp);
2189         assert src.getRegisterCategory().equals(SIMD);
2190         fcvtCpuFpuInstruction(FCVTZS, dst, src, generalFromSize(targetSize), floatFromSize(srcSize));
2191     }
2192 
2193     /* Convert from Integer (5.7.4.2) */
2194     /**
2195      * Converts integer to floating point. Uses rounding mode defined by FCPR.
2196      *
2197      * @param targetSize size of floating point register. 32 or 64.
2198      * @param srcSize size of integer register. 32 or 64.
2199      * @param dst floating point register. May not be null.
2200      * @param src general purpose register. May not be null or the stackpointer.
2201      */
2202     public void scvtf(int targetSize, int srcSize, Register dst, Register src) {
2203         assert dst.getRegisterCategory().equals(SIMD);
2204         assert !src.equals(sp);
2205         fcvtCpuFpuInstruction(SCVTF, dst, src, floatFromSize(targetSize), generalFromSize(srcSize));
2206     }
2207 
2208     private void fcvtCpuFpuInstruction(Instruction instr, Register dst, Register src, InstructionType type1, InstructionType type2) {
2209         emitInt(type1.encoding | type2.encoding | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
2210     }
2211 
2212     /* Floating-point Round to Integral (5.7.5) */
2213 
2214     /**
2215      * Rounds floating-point to integral. Rounds towards zero.
2216      *
2217      * @param size register size.
2218      * @param dst floating point register. May not be null.
2219      * @param src floating point register. May not be null.
2220      */
2221     protected void frintz(int size, Register dst, Register src) {
2222         fpDataProcessing1Source(FRINTZ, dst, src, floatFromSize(size));
2223     }
2224 
2225     /* Floating-point Arithmetic (1 source) (5.7.6) */
2226 
2227     /**
2228      * dst = |src|.
2229      *
2230      * @param size register size.
2231      * @param dst floating point register. May not be null.
2232      * @param src floating point register. May not be null.
2233      */
2234     public void fabs(int size, Register dst, Register src) {
2235         fpDataProcessing1Source(FABS, dst, src, floatFromSize(size));
2236     }
2237 
2238     /**
2239      * dst = -neg.
2240      *
2241      * @param size register size.
2242      * @param dst floating point register. May not be null.
2243      * @param src floating point register. May not be null.
2244      */
2245     public void fneg(int size, Register dst, Register src) {
2246         fpDataProcessing1Source(FNEG, dst, src, floatFromSize(size));
2247     }
2248 
2249     /**
2250      * dst = Sqrt(src).
2251      *
2252      * @param size register size.
2253      * @param dst floating point register. May not be null.
2254      * @param src floating point register. May not be null.
2255      */
2256     public void fsqrt(int size, Register dst, Register src) {
2257         fpDataProcessing1Source(FSQRT, dst, src, floatFromSize(size));
2258     }
2259 
2260     private void fpDataProcessing1Source(Instruction instr, Register dst, Register src, InstructionType type) {
2261         assert dst.getRegisterCategory().equals(SIMD);
2262         assert src.getRegisterCategory().equals(SIMD);
2263         emitInt(type.encoding | instr.encoding | Fp1SourceOp | rd(dst) | rs1(src));
2264     }
2265 
2266     /* Floating-point Arithmetic (2 source) (5.7.7) */
2267 
2268     /**
2269      * dst = src1 + src2.
2270      *
2271      * @param size register size.
2272      * @param dst floating point register. May not be null.
2273      * @param src1 floating point register. May not be null.
2274      * @param src2 floating point register. May not be null.
2275      */
2276     public void fadd(int size, Register dst, Register src1, Register src2) {
2277         fpDataProcessing2Source(FADD, dst, src1, src2, floatFromSize(size));
2278     }
2279 
2280     /**
2281      * dst = src1 - src2.
2282      *
2283      * @param size register size.
2284      * @param dst floating point register. May not be null.
2285      * @param src1 floating point register. May not be null.
2286      * @param src2 floating point register. May not be null.
2287      */
2288     public void fsub(int size, Register dst, Register src1, Register src2) {
2289         fpDataProcessing2Source(FSUB, dst, src1, src2, floatFromSize(size));
2290     }
2291 
2292     /**
2293      * dst = src1 * src2.
2294      *
2295      * @param size register size.
2296      * @param dst floating point register. May not be null.
2297      * @param src1 floating point register. May not be null.
2298      * @param src2 floating point register. May not be null.
2299      */
2300     public void fmul(int size, Register dst, Register src1, Register src2) {
2301         fpDataProcessing2Source(FMUL, dst, src1, src2, floatFromSize(size));
2302     }
2303 
2304     /**
2305      * dst = src1 / src2.
2306      *
2307      * @param size register size.
2308      * @param dst floating point register. May not be null.
2309      * @param src1 floating point register. May not be null.
2310      * @param src2 floating point register. May not be null.
2311      */
2312     public void fdiv(int size, Register dst, Register src1, Register src2) {
2313         fpDataProcessing2Source(FDIV, dst, src1, src2, floatFromSize(size));
2314     }
2315 
2316     private void fpDataProcessing2Source(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
2317         assert dst.getRegisterCategory().equals(SIMD);
2318         assert src1.getRegisterCategory().equals(SIMD);
2319         assert src2.getRegisterCategory().equals(SIMD);
2320         emitInt(type.encoding | instr.encoding | Fp2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
2321     }
2322 
2323     /* Floating-point Multiply-Add (5.7.9) */
2324 
2325     /**
2326      * dst = src1 * src2 + src3.
2327      *
2328      * @param size register size.
2329      * @param dst floating point register. May not be null.
2330      * @param src1 floating point register. May not be null.
2331      * @param src2 floating point register. May not be null.
2332      * @param src3 floating point register. May not be null.
2333      */
2334     protected void fmadd(int size, Register dst, Register src1, Register src2, Register src3) {
2335         fpDataProcessing3Source(FMADD, dst, src1, src2, src3, floatFromSize(size));
2336     }
2337 
2338     /**
2339      * dst = src3 - src1 * src2.
2340      *
2341      * @param size register size.
2342      * @param dst floating point register. May not be null.
2343      * @param src1 floating point register. May not be null.
2344      * @param src2 floating point register. May not be null.
2345      * @param src3 floating point register. May not be null.
2346      */
2347     protected void fmsub(int size, Register dst, Register src1, Register src2, Register src3) {
2348         fpDataProcessing3Source(FMSUB, dst, src1, src2, src3, floatFromSize(size));
2349     }
2350 
2351     private void fpDataProcessing3Source(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
2352         assert dst.getRegisterCategory().equals(SIMD);
2353         assert src1.getRegisterCategory().equals(SIMD);
2354         assert src2.getRegisterCategory().equals(SIMD);
2355         assert src3.getRegisterCategory().equals(SIMD);
2356         emitInt(type.encoding | instr.encoding | Fp3SourceOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
2357     }
2358 
2359     /* Floating-point Comparison (5.7.10) */
2360 
2361     /**
2362      * Compares src1 to src2.
2363      *
2364      * @param size register size.
2365      * @param src1 floating point register. May not be null.
2366      * @param src2 floating point register. May not be null.
2367      */
2368     public void fcmp(int size, Register src1, Register src2) {
2369         assert src1.getRegisterCategory().equals(SIMD);
2370         assert src2.getRegisterCategory().equals(SIMD);
2371         InstructionType type = floatFromSize(size);
2372         emitInt(type.encoding | FCMP.encoding | FpCmpOp | rs1(src1) | rs2(src2));
2373     }
2374 
2375     /**
2376      * Conditional compare. NZCV = fcmp(src1, src2) if condition else uimm4.
2377      *
2378      * @param size register size.
2379      * @param src1 floating point register. May not be null.
2380      * @param src2 floating point register. May not be null.
2381      * @param uimm4 condition flags that are used if condition is false.
2382      * @param condition every condition allowed. May not be null.
2383      */
2384     public void fccmp(int size, Register src1, Register src2, int uimm4, ConditionFlag condition) {
2385         assert NumUtil.isUnsignedNbit(4, uimm4);
2386         assert src1.getRegisterCategory().equals(SIMD);
2387         assert src2.getRegisterCategory().equals(SIMD);
2388         InstructionType type = floatFromSize(size);
2389         emitInt(type.encoding | FCCMP.encoding | uimm4 | condition.encoding << ConditionalConditionOffset | rs1(src1) | rs2(src2));
2390     }
2391 
2392     /**
2393      * Compare register to 0.0 .
2394      *
2395      * @param size register size.
2396      * @param src floating point register. May not be null.
2397      */
2398     public void fcmpZero(int size, Register src) {
2399         assert src.getRegisterCategory().equals(SIMD);
2400         InstructionType type = floatFromSize(size);
2401         emitInt(type.encoding | FCMPZERO.encoding | FpCmpOp | rs1(src));
2402     }
2403 
2404     /* Floating-point Conditional Select (5.7.11) */
2405 
2406     /**
2407      * Conditional select. dst = src1 if condition else src2.
2408      *
2409      * @param size register size.
2410      * @param dst floating point register. May not be null.
2411      * @param src1 floating point register. May not be null.
2412      * @param src2 floating point register. May not be null.
2413      * @param condition every condition allowed. May not be null.
2414      */
2415     protected void fcsel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2416         assert dst.getRegisterCategory().equals(SIMD);
2417         assert src1.getRegisterCategory().equals(SIMD);
2418         assert src2.getRegisterCategory().equals(SIMD);
2419         InstructionType type = floatFromSize(size);
2420         emitInt(type.encoding | FCSEL.encoding | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
2421     }
2422 
2423     /* Debug exceptions (5.9.1.2) */
2424 
2425     /**
2426      * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
2427      * UNALLOCATED instruction.
2428      *
2429      * @param uimm16 Arbitrary 16-bit unsigned payload.
2430      */
2431     protected void hlt(int uimm16) {
2432         exceptionInstruction(HLT, uimm16);
2433     }
2434 
2435     /**
2436      * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
2437      * exception level.
2438      *
2439      * @param uimm16 Arbitrary 16-bit unsigned payload.
2440      */
2441     protected void brk(int uimm16) {
2442         exceptionInstruction(BRK, uimm16);
2443     }
2444 
2445     private void exceptionInstruction(Instruction instr, int uimm16) {
2446         assert NumUtil.isUnsignedNbit(16, uimm16);
2447         emitInt(instr.encoding | ExceptionOp | uimm16 << SystemImmediateOffset);
2448     }
2449 
2450     /* Architectural hints (5.9.4) */
2451     public enum SystemHint {
2452         NOP(0x0),
2453         YIELD(0x1),
2454         WFE(0x2),
2455         WFI(0x3),
2456         SEV(0x4),
2457         SEVL(0x5);
2458 
2459         private final int encoding;
2460 
2461         SystemHint(int encoding) {
2462             this.encoding = encoding;
2463         }
2464     }
2465 
2466     /**
2467      * Architectural hints.
2468      *
2469      * @param hint Can be any of the defined hints. May not be null.
2470      */
2471     protected void hint(SystemHint hint) {
2472         emitInt(HINT.encoding | hint.encoding << SystemImmediateOffset);
2473     }
2474 
2475     /**
2476      * Clear Exclusive: clears the local record of the executing processor that an address has had a
2477      * request for an exclusive access.
2478      */
2479     protected void clrex() {
2480         emitInt(CLREX.encoding);
2481     }
2482 
2483     /**
2484      * Possible barrier definitions for Aarch64. LOAD_LOAD and LOAD_STORE map to the same underlying
2485      * barrier.
2486      *
2487      * We only need synchronization across the inner shareable domain (see B2-90 in the Reference
2488      * documentation).
2489      */
2490     public enum BarrierKind {
2491         LOAD_LOAD(0x9, "ISHLD"),
2492         LOAD_STORE(0x9, "ISHLD"),
2493         STORE_STORE(0xA, "ISHST"),
2494         ANY_ANY(0xB, "ISH");
2495 
2496         public final int encoding;
2497         public final String optionName;
2498 
2499         BarrierKind(int encoding, String optionName) {
2500             this.encoding = encoding;
2501             this.optionName = optionName;
2502         }
2503     }
2504 
2505     /**
2506      * Data Memory Barrier.
2507      *
2508      * @param barrierKind barrier that is issued. May not be null.
2509      */
2510     public void dmb(BarrierKind barrierKind) {
2511         emitInt(DMB.encoding | BarrierOp | barrierKind.encoding << BarrierKindOffset);
2512     }
2513 
2514 }