1 /*
   2  * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2018, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 
  26 package org.graalvm.compiler.asm.aarch64;
  27 
  28 import static jdk.vm.ci.aarch64.AArch64.cpuRegisters;
  29 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADD;
  30 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADDS;
  31 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADR;
  32 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADRP;
  33 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.AND;
  34 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ANDS;
  35 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ASRV;
  36 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BFM;
  37 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BIC;
  38 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BICS;
  39 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BLR;
  40 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BR;
  41 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BRK;
  42 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CAS;
  43 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLREX;
  44 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLS;
  45 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLZ;
  46 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSEL;
  47 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSINC;
  48 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSNEG;
  49 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.DMB;
  50 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EON;
  51 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EOR;
  52 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EXTR;
  53 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FABS;
  54 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FADD;
  55 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCCMP;
  56 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCMP;
  57 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCMPZERO;
  58 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCSEL;
  59 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTDS;
  60 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTSD;
  61 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTZS;
  62 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FDIV;
  63 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMADD;
  64 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMOV;
  65 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMSUB;
  66 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMUL;
  67 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FNEG;
  68 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FRINTM;
  69 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FRINTN;
  70 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FRINTP;
  71 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FRINTZ;
  72 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FSQRT;
  73 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FSUB;
  74 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.HINT;
  75 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.HLT;
  76 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDADD;
  77 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDAR;
  78 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDAXR;
  79 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDP;
  80 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDR;
  81 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDRS;
  82 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDXR;
  83 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LSLV;
  84 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LSRV;
  85 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MADD;
  86 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVK;
  87 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVN;
  88 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVZ;
  89 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MSUB;
  90 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ORN;
  91 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ORR;
  92 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RBIT;
  93 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RET;
  94 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.REVW;
  95 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.REVX;
  96 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RORV;
  97 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SBFM;
  98 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SCVTF;
  99 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SDIV;
 100 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STLR;
 101 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STLXR;
 102 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STP;
 103 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STR;
 104 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STXR;
 105 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUB;
 106 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUBS;
 107 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SWP;
 108 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBZ;
 109 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBNZ;
 110 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UBFM;
 111 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UDIV;
 112 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP32;
 113 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP64;
 114 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General32;
 115 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General64;
 116 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.floatFromSize;
 117 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.generalFromSize;
 118 import static jdk.vm.ci.aarch64.AArch64.CPU;
 119 import static jdk.vm.ci.aarch64.AArch64.SIMD;
 120 import static jdk.vm.ci.aarch64.AArch64.r0;
 121 import static jdk.vm.ci.aarch64.AArch64.sp;
 122 import static jdk.vm.ci.aarch64.AArch64.zr;
 123 
 124 import java.util.Arrays;
 125 
 126 import org.graalvm.compiler.asm.Assembler;
 127 import org.graalvm.compiler.core.common.NumUtil;
 128 import org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode;
 129 import org.graalvm.compiler.debug.GraalError;
 130 
 131 import jdk.vm.ci.aarch64.AArch64;
 132 import jdk.vm.ci.aarch64.AArch64.CPUFeature;
 133 import jdk.vm.ci.aarch64.AArch64.Flag;
 134 import jdk.vm.ci.code.Register;
 135 import jdk.vm.ci.code.TargetDescription;
 136 
 137 public abstract class AArch64Assembler extends Assembler {
 138 
 139     public static class LogicalImmediateTable {
 140 
 141         private static final Immediate[] IMMEDIATE_TABLE = buildImmediateTable();
 142 
 143         private static final int ImmediateOffset = 10;
 144         private static final int ImmediateRotateOffset = 16;
 145         private static final int ImmediateSizeOffset = 22;
 146 
 147         /**
 148          * Specifies whether immediate can be represented in all cases (YES), as a 64bit instruction
 149          * (SIXTY_FOUR_BIT_ONLY) or not at all (NO).
 150          */
 151         enum Representable {
 152             YES,
 153             SIXTY_FOUR_BIT_ONLY,
 154             NO
 155         }
 156 
 157         /**
 158          * Tests whether an immediate can be encoded for logical instructions.
 159          *
 160          * @param is64bit if true immediate is considered a 64-bit pattern. If false we may use a
 161          *            64-bit instruction to load the 32-bit pattern into a register.
 162          * @return enum specifying whether immediate can be used for 32- and 64-bit logical
 163          *         instructions ({@code #Representable.YES}), for 64-bit instructions only (
 164          *         {@link Representable#SIXTY_FOUR_BIT_ONLY}) or not at all (
 165          *         {@link Representable#NO}).
 166          */
 167         public static Representable isRepresentable(boolean is64bit, long immediate) {
 168             int pos = getLogicalImmTablePos(is64bit, immediate);
 169             if (pos < 0) {
 170                 // if 32bit instruction we can try again as 64bit immediate which may succeed.
 171                 // i.e. 0xffffffff fails as a 32bit immediate but works as 64bit one.
 172                 if (!is64bit) {
 173                     assert NumUtil.isUnsignedNbit(32, immediate);
 174                     pos = getLogicalImmTablePos(true, immediate);
 175                     return pos >= 0 ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.NO;
 176                 }
 177                 return Representable.NO;
 178             }
 179             Immediate imm = IMMEDIATE_TABLE[pos];
 180             return imm.only64bit() ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.YES;
 181         }
 182 
 183         public static Representable isRepresentable(int immediate) {
 184             return isRepresentable(false, immediate & 0xFFFF_FFFFL);
 185         }
 186 
 187         public static int getLogicalImmEncoding(boolean is64bit, long value) {
 188             int pos = getLogicalImmTablePos(is64bit, value);
 189             assert pos >= 0 : "Value cannot be represented as logical immediate: " + value + ", is64bit=" + is64bit;
 190             Immediate imm = IMMEDIATE_TABLE[pos];
 191             assert is64bit || !imm.only64bit() : "Immediate can only be represented for 64bit, but 32bit instruction specified";
 192             return IMMEDIATE_TABLE[pos].encoding;
 193         }
 194 
 195         /**
 196          * @param is64bit if true also allow 64-bit only encodings to be returned.
 197          * @return If positive the return value is the position into the IMMEDIATE_TABLE for the
 198          *         given immediate, if negative the immediate cannot be encoded.
 199          */
 200         private static int getLogicalImmTablePos(boolean is64bit, long value) {
 201             Immediate imm;
 202             if (!is64bit) {
 203                 // 32bit instructions can only have 32bit immediates.
 204                 if (!NumUtil.isUnsignedNbit(32, value)) {
 205                     return -1;
 206                 }
 207                 // If we have a 32bit instruction (and therefore immediate) we have to duplicate it
 208                 // across 64bit to find it in the table.
 209                 imm = new Immediate(value << 32 | value);
 210             } else {
 211                 imm = new Immediate(value);
 212             }
 213             int pos = Arrays.binarySearch(IMMEDIATE_TABLE, imm);
 214             if (pos < 0) {
 215                 return -1;
 216             }
 217             if (!is64bit && IMMEDIATE_TABLE[pos].only64bit()) {
 218                 return -1;
 219             }
 220             return pos;
 221         }
 222 
 223         /**
 224          * To quote 5.4.2: [..] an immediate is a 32 or 64 bit pattern viewed as a vector of
 225          * identical elements of size e = 2, 4, 8, 16, 32 or (in the case of bimm64) 64 bits. Each
 226          * element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by
 227          * 0 to e-1 bits. It is encoded in the following: 10-16: rotation amount (6bit) starting
 228          * from 1s in the LSB (i.e. 0111->1011->1101->1110) 16-22: This stores a combination of the
 229          * number of set bits and the pattern size. The pattern size is encoded as follows (x is
 230          * used to store the number of 1 bits - 1) e pattern 2 1111xx 4 1110xx 8 110xxx 16 10xxxx 32
 231          * 0xxxxx 64 xxxxxx 22: if set we have an instruction with 64bit pattern?
 232          */
 233         private static final class Immediate implements Comparable<Immediate> {
 234             public final long imm;
 235             public final int encoding;
 236 
 237             Immediate(long imm, boolean is64, int s, int r) {
 238                 this.imm = imm;
 239                 this.encoding = computeEncoding(is64, s, r);
 240             }
 241 
 242             // Used to be able to binary search for an immediate in the table.
 243             Immediate(long imm) {
 244                 this(imm, false, 0, 0);
 245             }
 246 
 247             /**
 248              * Returns true if this pattern is only representable as 64bit.
 249              */
 250             public boolean only64bit() {
 251                 return (encoding & (1 << ImmediateSizeOffset)) != 0;
 252             }
 253 
 254             private static int computeEncoding(boolean is64, int s, int r) {
 255                 int sf = is64 ? 1 : 0;
 256                 return sf << ImmediateSizeOffset | r << ImmediateRotateOffset | s << ImmediateOffset;
 257             }
 258 
 259             @Override
 260             public int compareTo(Immediate o) {
 261                 return Long.compare(imm, o.imm);
 262             }
 263         }
 264 
 265         private static Immediate[] buildImmediateTable() {
 266             final int nrImmediates = 5334;
 267             final Immediate[] table = new Immediate[nrImmediates];
 268             int nrImms = 0;
 269             for (int logE = 1; logE <= 6; logE++) {
 270                 int e = 1 << logE;
 271                 long mask = NumUtil.getNbitNumberLong(e);
 272                 for (int nrOnes = 1; nrOnes < e; nrOnes++) {
 273                     long val = (1L << nrOnes) - 1;
 274                     // r specifies how much we rotate the value
 275                     for (int r = 0; r < e; r++) {
 276                         long immediate = (val >>> r | val << (e - r)) & mask;
 277                         // Duplicate pattern to fill whole 64bit range.
 278                         switch (logE) {
 279                             case 1:
 280                                 immediate |= immediate << 2;
 281                                 immediate |= immediate << 4;
 282                                 immediate |= immediate << 8;
 283                                 immediate |= immediate << 16;
 284                                 immediate |= immediate << 32;
 285                                 break;
 286                             case 2:
 287                                 immediate |= immediate << 4;
 288                                 immediate |= immediate << 8;
 289                                 immediate |= immediate << 16;
 290                                 immediate |= immediate << 32;
 291                                 break;
 292                             case 3:
 293                                 immediate |= immediate << 8;
 294                                 immediate |= immediate << 16;
 295                                 immediate |= immediate << 32;
 296                                 break;
 297                             case 4:
 298                                 immediate |= immediate << 16;
 299                                 immediate |= immediate << 32;
 300                                 break;
 301                             case 5:
 302                                 immediate |= immediate << 32;
 303                                 break;
 304                         }
 305                         // 5 - logE can underflow to -1, but we shift this bogus result
 306                         // out of the masked area.
 307                         int sizeEncoding = (1 << (5 - logE)) - 1;
 308                         int s = ((sizeEncoding << (logE + 1)) & 0x3f) | (nrOnes - 1);
 309                         table[nrImms++] = new Immediate(immediate, /* is64bit */e == 64, s, r);
 310                     }
 311                 }
 312             }
 313             Arrays.sort(table);
 314             assert nrImms == nrImmediates : nrImms + " instead of " + nrImmediates + " in table.";
 315             assert checkDuplicates(table) : "Duplicate values in table.";
 316             return table;
 317         }
 318 
 319         private static boolean checkDuplicates(Immediate[] table) {
 320             for (int i = 0; i < table.length - 1; i++) {
 321                 if (table[i].imm >= table[i + 1].imm) {
 322                     return false;
 323                 }
 324             }
 325             return true;
 326         }
 327     }
 328 
 329     private static final int RdOffset = 0;
 330     private static final int Rs1Offset = 5;
 331     private static final int Rs2Offset = 16;
 332     private static final int Rs3Offset = 10;
 333     private static final int RtOffset = 0;
 334     private static final int RnOffset = 5;
 335     private static final int Rt2Offset = 10;
 336 
 337     /* Helper functions */
 338     private static int rd(Register reg) {
 339         return reg.encoding << RdOffset;
 340     }
 341 
 342     private static int rs1(Register reg) {
 343         return reg.encoding << Rs1Offset;
 344     }
 345 
 346     private static int rs2(Register reg) {
 347         return reg.encoding << Rs2Offset;
 348     }
 349 
 350     private static int rs3(Register reg) {
 351         return reg.encoding << Rs3Offset;
 352     }
 353 
 354     private static int rt(Register reg) {
 355         return reg.encoding << RtOffset;
 356     }
 357 
 358     private static int rt2(Register reg) {
 359         return reg.encoding << Rt2Offset;
 360     }
 361 
 362     private static int rn(Register reg) {
 363         return reg.encoding << RnOffset;
 364     }
 365 
 366     private static int maskField(int sizeInBits, int n) {
 367         assert NumUtil.isSignedNbit(sizeInBits, n);
 368         return n & NumUtil.getNbitNumberInt(sizeInBits);
 369     }
 370 
 371     /**
 372      * Enumeration of all different instruction kinds: General32/64 are the general instructions
 373      * (integer, branch, etc.), for 32-, respectively 64-bit operands. FP32/64 is the encoding for
 374      * the 32/64bit float operations
 375      */
 376     protected enum InstructionType {
 377         General32(0b00 << 30, 32, true),
 378         General64(0b10 << 30, 64, true),
 379         FP32(0x00000000, 32, false),
 380         FP64(0x00400000, 64, false);
 381 
 382         public final int encoding;
 383         public final int width;
 384         public final boolean isGeneral;
 385 
 386         InstructionType(int encoding, int width, boolean isGeneral) {
 387             this.encoding = encoding;
 388             this.width = width;
 389             this.isGeneral = isGeneral;
 390         }
 391 
 392         public static InstructionType generalFromSize(int size) {
 393             assert size == 32 || size == 64;
 394             return size == 32 ? General32 : General64;
 395         }
 396 
 397         public static InstructionType floatFromSize(int size) {
 398             assert size == 32 || size == 64;
 399             return size == 32 ? FP32 : FP64;
 400         }
 401 
 402     }
 403 
 404     private static final int ImmediateOffset = 10;
 405     private static final int ImmediateRotateOffset = 16;
 406     private static final int ImmediateSizeOffset = 22;
 407     private static final int ExtendTypeOffset = 13;
 408 
 409     private static final int AddSubImmOp = 0x11000000;
 410     private static final int AddSubShift12 = 0b01 << 22;
 411     private static final int AddSubSetFlag = 0x20000000;
 412 
 413     private static final int LogicalImmOp = 0x12000000;
 414 
 415     private static final int MoveWideImmOp = 0x12800000;
 416     private static final int MoveWideImmOffset = 5;
 417     private static final int MoveWideShiftOffset = 21;
 418 
 419     private static final int BitfieldImmOp = 0x13000000;
 420 
 421     private static final int AddSubShiftedOp = 0x0B000000;
 422     private static final int ShiftTypeOffset = 22;
 423 
 424     private static final int AddSubExtendedOp = 0x0B200000;
 425 
 426     private static final int MulOp = 0x1B000000;
 427     private static final int DataProcessing1SourceOp = 0x5AC00000;
 428     private static final int DataProcessing2SourceOp = 0x1AC00000;
 429 
 430     private static final int Fp1SourceOp = 0x1E204000;
 431     private static final int Fp2SourceOp = 0x1E200800;
 432     private static final int Fp3SourceOp = 0x1F000000;
 433 
 434     private static final int FpConvertOp = 0x1E200000;
 435     private static final int FpImmOp = 0x1E201000;
 436     private static final int FpImmOffset = 13;
 437 
 438     private static final int FpCmpOp = 0x1E202000;
 439 
 440     private static final int PcRelImmHiOffset = 5;
 441     private static final int PcRelImmLoOffset = 29;
 442 
 443     private static final int PcRelImmOp = 0x10000000;
 444 
 445     private static final int UnconditionalBranchImmOp = 0x14000000;
 446     private static final int UnconditionalBranchRegOp = 0xD6000000;
 447     private static final int CompareBranchOp = 0x34000000;
 448 
 449     private static final int ConditionalBranchImmOffset = 5;
 450 
 451     private static final int ConditionalSelectOp = 0x1A800000;
 452     private static final int ConditionalConditionOffset = 12;
 453 
 454     private static final int LoadStoreScaledOp = 0b111_0_01_00 << 22;
 455     private static final int LoadStoreUnscaledOp = 0b111_0_00_00 << 22;
 456 
 457     private static final int LoadStoreRegisterOp = 0b111_0_00_00_1 << 21 | 0b10 << 10;
 458 
 459     private static final int LoadLiteralOp = 0x18000000;
 460 
 461     private static final int LoadStorePostIndexedOp = 0b111_0_00_00_0 << 21 | 0b01 << 10;
 462     private static final int LoadStorePreIndexedOp = 0b111_0_00_00_0 << 21 | 0b11 << 10;
 463 
 464     private static final int LoadStoreUnscaledImmOffset = 12;
 465     private static final int LoadStoreScaledImmOffset = 10;
 466     private static final int LoadStoreScaledRegOffset = 12;
 467     private static final int LoadStoreIndexedImmOffset = 12;
 468     private static final int LoadStoreTransferSizeOffset = 30;
 469     private static final int LoadStoreFpFlagOffset = 26;
 470     private static final int LoadLiteralImmeOffset = 5;
 471 
 472     private static final int LoadStorePairOp = 0b101_0 << 26;
 473     @SuppressWarnings("unused") private static final int LoadStorePairPostIndexOp = 0b101_0_001 << 23;
 474     @SuppressWarnings("unused") private static final int LoadStorePairPreIndexOp = 0b101_0_011 << 23;
 475     private static final int LoadStorePairImm7Offset = 15;
 476 
 477     private static final int LogicalShiftOp = 0x0A000000;
 478 
 479     private static final int ExceptionOp = 0xD4000000;
 480     private static final int SystemImmediateOffset = 5;
 481 
 482     @SuppressWarnings("unused") private static final int SimdImmediateOffset = 16;
 483 
 484     private static final int BarrierOp = 0xD503301F;
 485     private static final int BarrierKindOffset = 8;
 486 
 487     private static final int CASAcquireOffset = 22;
 488     private static final int CASReleaseOffset = 15;
 489 
 490     private static final int LDADDAcquireOffset = 23;
 491     private static final int LDADDReleaseOffset = 22;
 492 
 493     /**
 494      * Encoding for all instructions.
 495      */
 496     public enum Instruction {
 497         BCOND(0x54000000),
 498         CBNZ(0x01000000),
 499         CBZ(0x00000000),
 500         TBZ(0x36000000),
 501         TBNZ(0x37000000),
 502 
 503         B(0x00000000),
 504         BL(0x80000000),
 505         BR(0x001F0000),
 506         BLR(0x003F0000),
 507         RET(0x005F0000),
 508 
 509         LDR(0x00000000),
 510         LDRS(0x00800000),
 511         LDXR(0x081f7c00),
 512         LDAR(0x8dffc00),
 513         LDAXR(0x85ffc00),
 514 
 515         STR(0x00000000),
 516         STXR(0x08007c00),
 517         STLR(0x089ffc00),
 518         STLXR(0x0800fc00),
 519 
 520         LDP(0b1 << 22),
 521         STP(0b0 << 22),
 522 
 523         CAS(0x08A07C00),
 524         LDADD(0x38200000),
 525         SWP(0x38208000),
 526 
 527         ADR(0x00000000),
 528         ADRP(0x80000000),
 529 
 530         ADD(0x00000000),
 531         ADDS(ADD.encoding | AddSubSetFlag),
 532         SUB(0x40000000),
 533         SUBS(SUB.encoding | AddSubSetFlag),
 534 
 535         NOT(0x00200000),
 536         AND(0x00000000),
 537         BIC(AND.encoding | NOT.encoding),
 538         ORR(0x20000000),
 539         ORN(ORR.encoding | NOT.encoding),
 540         EOR(0x40000000),
 541         EON(EOR.encoding | NOT.encoding),
 542         ANDS(0x60000000),
 543         BICS(ANDS.encoding | NOT.encoding),
 544 
 545         ASRV(0x00002800),
 546         RORV(0x00002C00),
 547         LSRV(0x00002400),
 548         LSLV(0x00002000),
 549 
 550         CLS(0x00001400),
 551         CLZ(0x00001000),
 552         RBIT(0x00000000),
 553         REVX(0x00000C00),
 554         REVW(0x00000800),
 555 
 556         MOVN(0x00000000),
 557         MOVZ(0x40000000),
 558         MOVK(0x60000000),
 559 
 560         CSEL(0x00000000),
 561         CSNEG(0x40000400),
 562         CSINC(0x00000400),
 563 
 564         BFM(0x20000000),
 565         SBFM(0x00000000),
 566         UBFM(0x40000000),
 567         EXTR(0x13800000),
 568 
 569         MADD(0x00000000),
 570         MSUB(0x00008000),
 571         SDIV(0x00000C00),
 572         UDIV(0x00000800),
 573 
 574         FMOV(0x00000000),
 575         FMOVCPU2FPU(0x00070000),
 576         FMOVFPU2CPU(0x00060000),
 577 
 578         FCVTDS(0x00028000),
 579         FCVTSD(0x00020000),
 580 
 581         FCVTZS(0x00180000),
 582         SCVTF(0x00020000),
 583 
 584         FABS(0x00008000),
 585         FSQRT(0x00018000),
 586         FNEG(0x00010000),
 587 
 588         FRINTM(0x00050000),
 589         FRINTN(0x00040000),
 590         FRINTP(0x00048000),
 591         FRINTZ(0x00058000),
 592 
 593         FADD(0x00002000),
 594         FSUB(0x00003000),
 595         FMUL(0x00000000),
 596         FDIV(0x00001000),
 597         FMAX(0x00004000),
 598         FMIN(0x00005000),
 599 
 600         FMADD(0x00000000),
 601         FMSUB(0x00008000),
 602 
 603         FCMP(0x00000000),
 604         FCMPZERO(0x00000008),
 605         FCCMP(0x1E200400),
 606         FCSEL(0x1E200C00),
 607 
 608         INS(0x4e081c00),
 609         UMOV(0x4e083c00),
 610 
 611         CNT(0xe205800),
 612         USRA(0x6f001400),
 613 
 614         HLT(0x00400000),
 615         BRK(0x00200000),
 616 
 617         CLREX(0xd5033f5f),
 618         HINT(0xD503201F),
 619         DMB(0x000000A0),
 620 
 621         BLR_NATIVE(0xc0000000);
 622 
 623         public final int encoding;
 624 
 625         Instruction(int encoding) {
 626             this.encoding = encoding;
 627         }
 628 
 629     }
 630 
 631     public enum ShiftType {
 632         LSL(0),
 633         LSR(1),
 634         ASR(2),
 635         ROR(3);
 636 
 637         public final int encoding;
 638 
 639         ShiftType(int encoding) {
 640             this.encoding = encoding;
 641         }
 642     }
 643 
 644     public enum ExtendType {
 645         UXTB(0),
 646         UXTH(1),
 647         UXTW(2),
 648         UXTX(3),
 649         SXTB(4),
 650         SXTH(5),
 651         SXTW(6),
 652         SXTX(7);
 653 
 654         public final int encoding;
 655 
 656         ExtendType(int encoding) {
 657             this.encoding = encoding;
 658         }
 659     }
 660 
 661     /**
 662      * Condition Flags for branches. See 4.3
 663      */
 664     public enum ConditionFlag {
 665         // Integer | Floating-point meanings
 666         /** Equal | Equal. */
 667         EQ(0x0),
 668 
 669         /** Not Equal | Not equal or unordered. */
 670         NE(0x1),
 671 
 672         /** Unsigned Higher or Same | Greater than, equal or unordered. */
 673         HS(0x2),
 674 
 675         /** Unsigned lower | less than. */
 676         LO(0x3),
 677 
 678         /** Minus (negative) | less than. */
 679         MI(0x4),
 680 
 681         /** Plus (positive or zero) | greater than, equal or unordered. */
 682         PL(0x5),
 683 
 684         /** Overflow set | unordered. */
 685         VS(0x6),
 686 
 687         /** Overflow clear | ordered. */
 688         VC(0x7),
 689 
 690         /** Unsigned higher | greater than or unordered. */
 691         HI(0x8),
 692 
 693         /** Unsigned lower or same | less than or equal. */
 694         LS(0x9),
 695 
 696         /** Signed greater than or equal | greater than or equal. */
 697         GE(0xA),
 698 
 699         /** Signed less than | less than or unordered. */
 700         LT(0xB),
 701 
 702         /** Signed greater than | greater than. */
 703         GT(0xC),
 704 
 705         /** Signed less than or equal | less than, equal or unordered. */
 706         LE(0xD),
 707 
 708         /** Always | always. */
 709         AL(0xE),
 710 
 711         /** Always | always (identical to AL, just to have valid 0b1111 encoding). */
 712         NV(0xF);
 713 
 714         public final int encoding;
 715 
 716         ConditionFlag(int encoding) {
 717             this.encoding = encoding;
 718         }
 719 
 720         /**
 721          * @return ConditionFlag specified by decoding.
 722          */
 723         public static ConditionFlag fromEncoding(int encoding) {
 724             return values()[encoding];
 725         }
 726 
 727         public ConditionFlag negate() {
 728             switch (this) {
 729                 case EQ:
 730                     return NE;
 731                 case NE:
 732                     return EQ;
 733                 case HS:
 734                     return LO;
 735                 case LO:
 736                     return HS;
 737                 case MI:
 738                     return PL;
 739                 case PL:
 740                     return MI;
 741                 case VS:
 742                     return VC;
 743                 case VC:
 744                     return VS;
 745                 case HI:
 746                     return LS;
 747                 case LS:
 748                     return HI;
 749                 case GE:
 750                     return LT;
 751                 case LT:
 752                     return GE;
 753                 case GT:
 754                     return LE;
 755                 case LE:
 756                     return GT;
 757                 case AL:
 758                 case NV:
 759                 default:
 760                     throw GraalError.shouldNotReachHere();
 761             }
 762         }
 763     }
 764 
 765     public AArch64Assembler(TargetDescription target) {
 766         super(target);
 767     }
 768 
 769     public boolean supports(CPUFeature feature) {
 770         return ((AArch64) target.arch).getFeatures().contains(feature);
 771     }
 772 
 773     public boolean isFlagSet(Flag flag) {
 774         return ((AArch64) target.arch).getFlags().contains(flag);
 775     }
 776 
 777     /* Conditional Branch (5.2.1) */
 778 
 779     /**
 780      * Branch conditionally.
 781      *
 782      * @param condition may not be null.
 783      * @param imm21 Signed 21-bit offset, has to be word aligned.
 784      */
 785     protected void b(ConditionFlag condition, int imm21) {
 786         b(condition, imm21, -1);
 787     }
 788 
 789     /**
 790      * Branch conditionally. Inserts instruction into code buffer at pos.
 791      *
 792      * @param condition may not be null.
 793      * @param imm21 Signed 21-bit offset, has to be word aligned.
 794      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 795      */
 796     protected void b(ConditionFlag condition, int imm21, int pos) {
 797         if (pos == -1) {
 798             emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding);
 799         } else {
 800             emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding, pos);
 801         }
 802     }
 803 
 804     /**
 805      * Compare register and branch if non-zero.
 806      *
 807      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 808      * @param size Instruction size in bits. Should be either 32 or 64.
 809      * @param imm21 Signed 21-bit offset, has to be word aligned.
 810      */
 811     protected void cbnz(int size, Register reg, int imm21) {
 812         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, -1);
 813     }
 814 
 815     /**
 816      * Compare register and branch if non-zero.
 817      *
 818      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 819      * @param size Instruction size in bits. Should be either 32 or 64.
 820      * @param imm21 Signed 21-bit offset, has to be word aligned.
 821      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 822      */
 823     protected void cbnz(int size, Register reg, int imm21, int pos) {
 824         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, pos);
 825     }
 826 
 827     /**
 828      * Compare and branch if zero.
 829      *
 830      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 831      * @param size Instruction size in bits. Should be either 32 or 64.
 832      * @param imm21 Signed 21-bit offset, has to be word aligned.
 833      */
 834     protected void cbz(int size, Register reg, int imm21) {
 835         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, -1);
 836     }
 837 
 838     /**
 839      * Compare register and branch if zero.
 840      *
 841      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 842      * @param size Instruction size in bits. Should be either 32 or 64.
 843      * @param imm21 Signed 21-bit offset, has to be word aligned.
 844      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 845      */
 846     protected void cbz(int size, Register reg, int imm21, int pos) {
 847         conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, pos);
 848     }
 849 
 850     /**
 851      * Test a single bit and branch if the bit is nonzero.
 852      *
 853      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 854      * @param uimm6 Unsigned 6-bit bit index.
 855      * @param imm16 signed 16 bit offset
 856      */
 857     protected void tbnz(Register reg, int uimm6, int imm16) {
 858         tbnz(reg, uimm6, imm16, -1);
 859     }
 860 
 861     /**
 862      * Test a single bit and branch if the bit is zero.
 863      *
 864      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 865      * @param uimm6 Unsigned 6-bit bit index.
 866      * @param imm16 signed 16 bit offset
 867      */
 868     protected void tbz(Register reg, int uimm6, int imm16) {
 869         tbz(reg, uimm6, imm16, -1);
 870     }
 871 
 872     /**
 873      * Test a single bit and branch if the bit is nonzero.
 874      *
 875      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 876      * @param uimm6 Unsigned 6-bit bit index.
 877      * @param imm16 signed 16 bit offset
 878      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 879      */
 880     protected void tbnz(Register reg, int uimm6, int imm16, int pos) {
 881         assert reg.getRegisterCategory().equals(CPU);
 882         assert NumUtil.isUnsignedNbit(6, uimm6);
 883         assert NumUtil.isSignedNbit(18, imm16);
 884         assert (imm16 & 3) == 0;
 885         // size bit is overloaded as top bit of uimm6 bit index
 886         int size = (((uimm6 >> 5) & 1) == 0 ? 32 : 64);
 887         // remaining 5 bits are encoded lower down
 888         int uimm5 = uimm6 >> 1;
 889         int offset = (imm16 & NumUtil.getNbitNumberInt(16)) >> 2;
 890         InstructionType type = generalFromSize(size);
 891         int encoding = type.encoding | TBNZ.encoding | (uimm5 << 19) | (offset << 5) | rd(reg);
 892         if (pos == -1) {
 893             emitInt(encoding);
 894         } else {
 895             emitInt(encoding, pos);
 896         }
 897     }
 898 
 899     /**
 900      * Test a single bit and branch if the bit is zero.
 901      *
 902      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 903      * @param uimm6 Unsigned 6-bit bit index.
 904      * @param imm16 signed 16 bit offset
 905      * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
 906      */
 907     protected void tbz(Register reg, int uimm6, int imm16, int pos) {
 908         assert reg.getRegisterCategory().equals(CPU);
 909         assert NumUtil.isUnsignedNbit(6, uimm6);
 910         assert NumUtil.isSignedNbit(18, imm16);
 911         assert (imm16 & 3) == 0;
 912         // size bit is overloaded as top bit of uimm6 bit index
 913         int size = (((uimm6 >> 5) & 1) == 0 ? 32 : 64);
 914         // remaining 5 bits are encoded lower down
 915         int uimm5 = uimm6 >> 1;
 916         int offset = (imm16 & NumUtil.getNbitNumberInt(16)) >> 2;
 917         InstructionType type = generalFromSize(size);
 918         int encoding = type.encoding | TBZ.encoding | (uimm5 << 19) | (offset << 5) | rd(reg);
 919         if (pos == -1) {
 920             emitInt(encoding);
 921         } else {
 922             emitInt(encoding, pos);
 923         }
 924     }
 925 
 926     private void conditionalBranchInstruction(Register reg, int imm21, InstructionType type, Instruction instr, int pos) {
 927         assert reg.getRegisterCategory().equals(CPU);
 928         int instrEncoding = instr.encoding | CompareBranchOp;
 929         if (pos == -1) {
 930             emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg));
 931         } else {
 932             emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg), pos);
 933         }
 934     }
 935 
 936     private static int getConditionalBranchImm(int imm21) {
 937         assert NumUtil.isSignedNbit(21, imm21) && (imm21 & 0x3) == 0 : "Immediate has to be 21bit signed number and word aligned";
 938         int imm = (imm21 & NumUtil.getNbitNumberInt(21)) >> 2;
 939         return imm << ConditionalBranchImmOffset;
 940     }
 941 
 942     /* Unconditional Branch (immediate) (5.2.2) */
 943 
 944     /**
 945      * @param imm28 Signed 28-bit offset, has to be word aligned.
 946      */
 947     protected void b(int imm28) {
 948         unconditionalBranchImmInstruction(imm28, Instruction.B, -1);
 949     }
 950 
 951     /**
 952      *
 953      * @param imm28 Signed 28-bit offset, has to be word aligned.
 954      * @param pos Position where instruction is inserted into code buffer.
 955      */
 956     protected void b(int imm28, int pos) {
 957         unconditionalBranchImmInstruction(imm28, Instruction.B, pos);
 958     }
 959 
 960     /**
 961      * Branch and link return address to register X30.
 962      *
 963      * @param imm28 Signed 28-bit offset, has to be word aligned.
 964      */
 965     public void bl(int imm28) {
 966         unconditionalBranchImmInstruction(imm28, Instruction.BL, -1);
 967     }
 968 
 969     private void unconditionalBranchImmInstruction(int imm28, Instruction instr, int pos) {
 970         assert NumUtil.isSignedNbit(28, imm28) && (imm28 & 0x3) == 0 : "Immediate has to be 28bit signed number and word aligned";
 971         int imm = (imm28 & NumUtil.getNbitNumberInt(28)) >> 2;
 972         int instrEncoding = instr.encoding | UnconditionalBranchImmOp;
 973         if (pos == -1) {
 974             emitInt(instrEncoding | imm);
 975         } else {
 976             emitInt(instrEncoding | imm, pos);
 977         }
 978     }
 979 
 980     /* Unconditional Branch (register) (5.2.3) */
 981 
 982     /**
 983      * Branches to address in register and writes return address into register X30.
 984      *
 985      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 986      */
 987     public void blr(Register reg) {
 988         unconditionalBranchRegInstruction(BLR, reg);
 989     }
 990 
 991     /**
 992      * Branches to address in register.
 993      *
 994      * @param reg general purpose register. May not be null, zero-register or stackpointer.
 995      */
 996     protected void br(Register reg) {
 997         unconditionalBranchRegInstruction(BR, reg);
 998     }
 999 
1000     /**
1001      * Return to address in register.
1002      *
1003      * @param reg general purpose register. May not be null, zero-register or stackpointer.
1004      */
1005     public void ret(Register reg) {
1006         unconditionalBranchRegInstruction(RET, reg);
1007     }
1008 
1009     private void unconditionalBranchRegInstruction(Instruction instr, Register reg) {
1010         assert reg.getRegisterCategory().equals(CPU);
1011         assert !reg.equals(zr);
1012         assert !reg.equals(sp);
1013         emitInt(instr.encoding | UnconditionalBranchRegOp | rs1(reg));
1014     }
1015 
1016     /* Load-Store Single Register (5.3.1) */
1017 
1018     /**
1019      * Loads a srcSize value from address into rt zero-extending it.
1020      *
1021      * @param srcSize size of memory read in bits. Must be 8, 16, 32 or 64.
1022      * @param rt general purpose register. May not be null or stackpointer.
1023      * @param address all addressing modes allowed. May not be null.
1024      */
1025     public void ldr(int srcSize, Register rt, AArch64Address address) {
1026         assert rt.getRegisterCategory().equals(CPU);
1027         assert srcSize == 8 || srcSize == 16 || srcSize == 32 || srcSize == 64;
1028         int transferSize = NumUtil.log2Ceil(srcSize / 8);
1029         loadStoreInstruction(LDR, rt, address, General32, transferSize);
1030     }
1031 
1032     /**
1033      * Loads a srcSize value from address into rt sign-extending it.
1034      *
1035      * @param targetSize size of target register in bits. Must be 32 or 64.
1036      * @param srcSize size of memory read in bits. Must be 8, 16 or 32, but may not be equivalent to
1037      *            targetSize.
1038      * @param rt general purpose register. May not be null or stackpointer.
1039      * @param address all addressing modes allowed. May not be null.
1040      */
1041     protected void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
1042         assert rt.getRegisterCategory().equals(CPU);
1043         assert (srcSize == 8 || srcSize == 16 || srcSize == 32) && srcSize != targetSize;
1044         int transferSize = NumUtil.log2Ceil(srcSize / 8);
1045         loadStoreInstruction(LDRS, rt, address, generalFromSize(targetSize), transferSize);
1046     }
1047 
1048     public enum PrefetchMode {
1049         PLDL1KEEP(0b00000),
1050         PLDL1STRM(0b00001),
1051         PLDL2KEEP(0b00010),
1052         PLDL2STRM(0b00011),
1053         PLDL3KEEP(0b00100),
1054         PLDL3STRM(0b00101),
1055 
1056         PLIL1KEEP(0b01000),
1057         PLIL1STRM(0b01001),
1058         PLIL2KEEP(0b01010),
1059         PLIL2STRM(0b01011),
1060         PLIL3KEEP(0b01100),
1061         PLIL3STRM(0b01101),
1062 
1063         PSTL1KEEP(0b10000),
1064         PSTL1STRM(0b10001),
1065         PSTL2KEEP(0b10010),
1066         PSTL2STRM(0b10011),
1067         PSTL3KEEP(0b10100),
1068         PSTL3STRM(0b10101);
1069 
1070         private final int encoding;
1071 
1072         PrefetchMode(int encoding) {
1073             this.encoding = encoding;
1074         }
1075 
1076         private static PrefetchMode[] modes = {
1077                         PLDL1KEEP,
1078                         PLDL1STRM,
1079                         PLDL2KEEP,
1080                         PLDL2STRM,
1081                         PLDL3KEEP,
1082                         PLDL3STRM,
1083 
1084                         null,
1085                         null,
1086 
1087                         PLIL1KEEP,
1088                         PLIL1STRM,
1089                         PLIL2KEEP,
1090                         PLIL2STRM,
1091                         PLIL3KEEP,
1092                         PLIL3STRM,
1093 
1094                         null,
1095                         null,
1096 
1097                         PSTL1KEEP,
1098                         PSTL1STRM,
1099                         PSTL2KEEP,
1100                         PSTL2STRM,
1101                         PSTL3KEEP,
1102                         PSTL3STRM
1103         };
1104 
1105         public static PrefetchMode lookup(int enc) {
1106             assert enc >= 00 && enc < modes.length;
1107             return modes[enc];
1108         }
1109 
1110         public Register toRegister() {
1111             return cpuRegisters.get(encoding);
1112         }
1113     }
1114 
1115     /*
1116      * implements a prefetch at a 64-bit aligned address using a scaled 12 bit or unscaled 9 bit
1117      * displacement addressing mode
1118      *
1119      * @param rt general purpose register. May not be null, zr or stackpointer.
1120      *
1121      * @param address only displacement addressing modes allowed. May not be null.
1122      */
1123     public void prfm(AArch64Address address, PrefetchMode mode) {
1124         assert (address.getAddressingMode() == AddressingMode.IMMEDIATE_SCALED ||
1125                         address.getAddressingMode() == AddressingMode.IMMEDIATE_UNSCALED ||
1126                         address.getAddressingMode() == AddressingMode.REGISTER_OFFSET);
1127         assert mode != null;
1128         final int srcSize = 64;
1129         final int transferSize = NumUtil.log2Ceil(srcSize / 8);
1130         final Register rt = mode.toRegister();
1131         // this looks weird but that's because loadStoreInstruction is weird
1132         // instruction select fields are size [31:30], v [26] and opc [25:24]
1133         // prfm requires size == 0b11, v == 0b0 and opc == 0b11
1134         // passing LDRS ensures opc[1] == 0b1
1135         // (n.b. passing LDR/STR makes no difference to opc[1:0]!!)
1136         // passing General64 ensures opc[0] == 0b1 and v = 0b0
1137         // (n.b. passing General32 ensures opc[0] == 0b0 and v = 0b0)
1138         // srcSize 64 ensures size == 0b11
1139         loadStoreInstruction(LDRS, rt, address, General64, transferSize);
1140     }
1141 
1142     /**
1143      * Stores register rt into memory pointed by address.
1144      *
1145      * @param destSize number of bits written to memory. Must be 8, 16, 32 or 64.
1146      * @param rt general purpose register. May not be null or stackpointer.
1147      * @param address all addressing modes allowed. May not be null.
1148      */
1149     public void str(int destSize, Register rt, AArch64Address address) {
1150         assert rt.getRegisterCategory().equals(CPU);
1151         assert destSize == 8 || destSize == 16 || destSize == 32 || destSize == 64;
1152         int transferSize = NumUtil.log2Ceil(destSize / 8);
1153         loadStoreInstruction(STR, rt, address, General64, transferSize);
1154     }
1155 
1156     private void loadStoreInstruction(Instruction instr, Register reg, AArch64Address address, InstructionType type, int log2TransferSize) {
1157         assert log2TransferSize >= 0 && log2TransferSize < 4;
1158         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1159         int is32Bit = type.width == 32 ? 1 << ImmediateSizeOffset : 0;
1160         int isFloat = !type.isGeneral ? 1 << LoadStoreFpFlagOffset : 0;
1161         int memop = instr.encoding | transferSizeEncoding | is32Bit | isFloat | rt(reg);
1162         switch (address.getAddressingMode()) {
1163             case IMMEDIATE_SCALED:
1164                 emitInt(memop | LoadStoreScaledOp | address.getImmediate() << LoadStoreScaledImmOffset | rs1(address.getBase()));
1165                 break;
1166             case IMMEDIATE_UNSCALED:
1167                 emitInt(memop | LoadStoreUnscaledOp | address.getImmediate() << LoadStoreUnscaledImmOffset | rs1(address.getBase()));
1168                 break;
1169             case BASE_REGISTER_ONLY:
1170                 emitInt(memop | LoadStoreScaledOp | rs1(address.getBase()));
1171                 break;
1172             case EXTENDED_REGISTER_OFFSET:
1173             case REGISTER_OFFSET:
1174                 ExtendType extendType = address.getAddressingMode() == AddressingMode.EXTENDED_REGISTER_OFFSET ? address.getExtendType() : ExtendType.UXTX;
1175                 boolean shouldScale = address.isScaled() && log2TransferSize != 0;
1176                 emitInt(memop | LoadStoreRegisterOp | rs2(address.getOffset()) | extendType.encoding << ExtendTypeOffset | (shouldScale ? 1 : 0) << LoadStoreScaledRegOffset | rs1(address.getBase()));
1177                 break;
1178             case PC_LITERAL:
1179                 assert log2TransferSize >= 2 : "PC literal loads only works for load/stores of 32-bit and larger";
1180                 transferSizeEncoding = (log2TransferSize - 2) << LoadStoreTransferSizeOffset;
1181                 emitInt(transferSizeEncoding | isFloat | LoadLiteralOp | rd(reg) | address.getImmediate() << LoadLiteralImmeOffset);
1182                 break;
1183             case IMMEDIATE_POST_INDEXED:
1184                 emitInt(memop | LoadStorePostIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
1185                 break;
1186             case IMMEDIATE_PRE_INDEXED:
1187                 emitInt(memop | LoadStorePreIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
1188                 break;
1189             default:
1190                 throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
1191         }
1192     }
1193 
1194     /**
1195      * Load Pair of Registers calculates an address from a base register value and an immediate
1196      * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
1197      * two registers.
1198      */
1199     public void ldp(int size, Register rt, Register rt2, AArch64Address address) {
1200         assert size == 32 || size == 64;
1201         loadStorePairInstruction(LDP, rt, rt2, address, generalFromSize(size));
1202     }
1203 
1204     /**
1205      * Store Pair of Registers calculates an address from a base register value and an immediate
1206      * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
1207      * two registers.
1208      */
1209     public void stp(int size, Register rt, Register rt2, AArch64Address address) {
1210         assert size == 32 || size == 64;
1211         loadStorePairInstruction(STP, rt, rt2, address, generalFromSize(size));
1212     }
1213 
1214     private void loadStorePairInstruction(Instruction instr, Register rt, Register rt2, AArch64Address address, InstructionType type) {
1215         int scaledOffset = maskField(7, address.getImmediateRaw());  // LDP/STP use a 7-bit scaled
1216                                                                      // offset
1217         int memop = type.encoding | instr.encoding | scaledOffset << LoadStorePairImm7Offset | rt2(rt2) | rn(address.getBase()) | rt(rt);
1218         switch (address.getAddressingMode()) {
1219             case IMMEDIATE_SCALED:
1220                 emitInt(memop | LoadStorePairOp | (0b010 << 23));
1221                 break;
1222             case IMMEDIATE_POST_INDEXED:
1223                 emitInt(memop | LoadStorePairOp | (0b001 << 23));
1224                 break;
1225             case IMMEDIATE_PRE_INDEXED:
1226                 emitInt(memop | LoadStorePairOp | (0b011 << 23));
1227                 break;
1228             default:
1229                 throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
1230         }
1231     }
1232 
1233     /* Load-Store Exclusive (5.3.6) */
1234 
1235     /**
1236      * Load address exclusive. Natural alignment of address is required.
1237      *
1238      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1239      * @param rt general purpose register. May not be null or stackpointer.
1240      * @param rn general purpose register.
1241      */
1242     protected void ldxr(int size, Register rt, Register rn) {
1243         assert size == 8 || size == 16 || size == 32 || size == 64;
1244         int transferSize = NumUtil.log2Ceil(size / 8);
1245         exclusiveLoadInstruction(LDXR, rt, rn, transferSize);
1246     }
1247 
1248     /**
1249      * Store address exclusive. Natural alignment of address is required. rs and rt may not point to
1250      * the same register.
1251      *
1252      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1253      * @param rs general purpose register. Set to exclusive access status. 0 means success,
1254      *            everything else failure. May not be null, or stackpointer.
1255      * @param rt general purpose register. May not be null or stackpointer.
1256      * @param rn general purpose register.
1257      */
1258     protected void stxr(int size, Register rs, Register rt, Register rn) {
1259         assert size == 8 || size == 16 || size == 32 || size == 64;
1260         int transferSize = NumUtil.log2Ceil(size / 8);
1261         exclusiveStoreInstruction(STXR, rs, rt, rn, transferSize);
1262     }
1263 
1264     /* Load-Acquire/Store-Release (5.3.7) */
1265 
1266     /* non exclusive access */
1267     /**
1268      * Load acquire. Natural alignment of address is required.
1269      *
1270      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1271      * @param rt general purpose register. May not be null or stackpointer.
1272      * @param rn general purpose register.
1273      */
1274     protected void ldar(int size, Register rt, Register rn) {
1275         assert size == 8 || size == 16 || size == 32 || size == 64;
1276         int transferSize = NumUtil.log2Ceil(size / 8);
1277         exclusiveLoadInstruction(LDAR, rt, rn, transferSize);
1278     }
1279 
1280     /**
1281      * Store-release. Natural alignment of address is required.
1282      *
1283      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1284      * @param rt general purpose register. May not be null or stackpointer.
1285      * @param rn general purpose register.
1286      */
1287     protected void stlr(int size, Register rt, Register rn) {
1288         assert size == 8 || size == 16 || size == 32 || size == 64;
1289         int transferSize = NumUtil.log2Ceil(size / 8);
1290         // Hack: Passing the zero-register means it is ignored when building the encoding.
1291         exclusiveStoreInstruction(STLR, r0, rt, rn, transferSize);
1292     }
1293 
1294     /* exclusive access */
1295     /**
1296      * Load acquire exclusive. Natural alignment of address is required.
1297      *
1298      * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1299      * @param rt general purpose register. May not be null or stackpointer.
1300      * @param rn general purpose register.
1301      */
1302     public void ldaxr(int size, Register rt, Register rn) {
1303         assert size == 8 || size == 16 || size == 32 || size == 64;
1304         int transferSize = NumUtil.log2Ceil(size / 8);
1305         exclusiveLoadInstruction(LDAXR, rt, rn, transferSize);
1306     }
1307 
1308     /**
1309      * Store-release exclusive. Natural alignment of address is required. rs and rt may not point to
1310      * the same register.
1311      *
1312      * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1313      * @param rs general purpose register. Set to exclusive access status. 0 means success,
1314      *            everything else failure. May not be null, or stackpointer.
1315      * @param rt general purpose register. May not be null or stackpointer.
1316      * @param rn general purpose register.
1317      */
1318     public void stlxr(int size, Register rs, Register rt, Register rn) {
1319         assert size == 8 || size == 16 || size == 32 || size == 64;
1320         int transferSize = NumUtil.log2Ceil(size / 8);
1321         exclusiveStoreInstruction(STLXR, rs, rt, rn, transferSize);
1322     }
1323 
1324     private void exclusiveLoadInstruction(Instruction instr, Register reg, Register rn, int log2TransferSize) {
1325         assert log2TransferSize >= 0 && log2TransferSize < 4;
1326         assert reg.getRegisterCategory().equals(CPU);
1327         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1328         emitInt(transferSizeEncoding | instr.encoding | 1 << ImmediateSizeOffset | rn(rn) | rt(reg));
1329     }
1330 
1331     /**
1332      * Stores data from rt into address and sets rs to the returned exclusive access status.
1333      *
1334      * @param rs general purpose register into which the exclusive access status is written. May not
1335      *            be null.
1336      * @param rt general purpose register containing data to be written to memory at address. May
1337      *            not be null
1338      * @param rn general purpose register containing the address specifying where rt is written to.
1339      * @param log2TransferSize log2Ceil of memory transfer size.
1340      */
1341     private void exclusiveStoreInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize) {
1342         assert log2TransferSize >= 0 && log2TransferSize < 4;
1343         assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
1344         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1345         emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt));
1346     }
1347 
1348     /**
1349      * Compare And Swap word or doubleword in memory. This reads a value from an address rn,
1350      * compares it against a given value rs, and, if equal, stores the value rt to memory. The value
1351      * read from address rn is stored in register rs.
1352      *
1353      * @param size size of bits read from memory. Must be 32 or 64.
1354      * @param rs general purpose register to be compared and loaded. May not be null.
1355      * @param rt general purpose register to be conditionally stored. May not be null.
1356      * @param rn general purpose register containing the address from which to read.
1357      * @param acquire boolean value signifying if the load should use acquire semantics.
1358      * @param release boolean value signifying if the store should use release semantics.
1359      */
1360     public void cas(int size, Register rs, Register rt, Register rn, boolean acquire, boolean release) {
1361         assert size == 32 || size == 64;
1362         int transferSize = NumUtil.log2Ceil(size / 8);
1363         compareAndSwapInstruction(CAS, rs, rt, rn, transferSize, acquire, release);
1364     }
1365 
1366     private void compareAndSwapInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize, boolean acquire, boolean release) {
1367         assert log2TransferSize >= 0 && log2TransferSize < 4;
1368         assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
1369         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1370         emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt) | (acquire ? 1 : 0) << CASAcquireOffset | (release ? 1 : 0) << CASReleaseOffset);
1371     }
1372 
1373     /**
1374      * Atomic add. This reads a value from an address rn, stores the value in rt, and adds the value
1375      * in rs to it, and stores the result back at address rn. The initial value read from memory is
1376      * stored in rt.
1377      *
1378      * @param size size of operand to read from memory. Must be 8, 16, 32, or 64.
1379      * @param rs general purpose register to be added to contents. May not be null.
1380      * @param rt general purpose register to be loaded. May not be null.
1381      * @param rn general purpose register or stack pointer holding an address from which to load.
1382      * @param acquire boolean value signifying if the load should use acquire semantics.
1383      * @param release boolean value signifying if the store should use release semantics.
1384      */
1385     public void ldadd(int size, Register rs, Register rt, Register rn, boolean acquire, boolean release) {
1386         assert size == 8 || size == 16 || size == 32 || size == 64;
1387         int transferSize = NumUtil.log2Ceil(size / 8);
1388         loadAndAddInstruction(LDADD, rs, rt, rn, transferSize, acquire, release);
1389     }
1390 
1391     private void loadAndAddInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize, boolean acquire, boolean release) {
1392         assert log2TransferSize >= 0 && log2TransferSize < 4;
1393         assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
1394         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1395         emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt) | (acquire ? 1 : 0) << LDADDAcquireOffset | (release ? 1 : 0) << LDADDReleaseOffset);
1396     }
1397 
1398     /**
1399      * Atomic swap. This reads a value from an address rn, stores the value in rt, and then stores
1400      * the value in rs back at address rn.
1401      *
1402      * @param size size of operand to read from memory. Must be 8, 16, 32, or 64.
1403      * @param rs general purpose register to be stored. May not be null.
1404      * @param rt general purpose register to be loaded. May not be null.
1405      * @param rn general purpose register or stack pointer holding an address from which to load.
1406      * @param acquire boolean value signifying if the load should use acquire semantics.
1407      * @param release boolean value signifying if the store should use release semantics.
1408      */
1409     public void swp(int size, Register rs, Register rt, Register rn, boolean acquire, boolean release) {
1410         assert size == 8 || size == 16 || size == 32 || size == 64;
1411         int transferSize = NumUtil.log2Ceil(size / 8);
1412         swapInstruction(SWP, rs, rt, rn, transferSize, acquire, release);
1413     }
1414 
1415     private void swapInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize, boolean acquire, boolean release) {
1416         assert log2TransferSize >= 0 && log2TransferSize < 4;
1417         assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
1418         int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1419         emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt) | (acquire ? 1 : 0) << LDADDAcquireOffset | (release ? 1 : 0) << LDADDReleaseOffset);
1420     }
1421 
1422     /* PC-relative Address Calculation (5.4.4) */
1423 
1424     /**
1425      * Address of page: sign extends 21-bit offset, shifts if left by 12 and adds it to the value of
1426      * the PC with its bottom 12-bits cleared, writing the result to dst. No offset is emitted; the
1427      * instruction will be patched later.
1428      *
1429      * @param dst general purpose register. May not be null, zero-register or stackpointer.
1430      */
1431     public void adrp(Register dst) {
1432         emitInt(ADRP.encoding | PcRelImmOp | rd(dst));
1433     }
1434 
1435     /**
1436      * Adds a 21-bit signed offset to the program counter and writes the result to dst.
1437      *
1438      * @param dst general purpose register. May not be null, zero-register or stackpointer.
1439      * @param imm21 Signed 21-bit offset.
1440      */
1441     public void adr(Register dst, int imm21) {
1442         emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21));
1443     }
1444 
1445     /**
1446      * Adds a 21-bit signed offset to the program counter and writes the result to dst.
1447      *
1448      * @param dst general purpose register. May not be null, zero-register or stackpointer.
1449      * @param imm21 Signed 21-bit offset.
1450      * @param pos the position in the code that the instruction is emitted.
1451      */
1452     public void adr(Register dst, int imm21, int pos) {
1453         emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21), pos);
1454     }
1455 
1456     private static int getPcRelativeImmEncoding(int imm21) {
1457         assert NumUtil.isSignedNbit(21, imm21);
1458         int imm = imm21 & NumUtil.getNbitNumberInt(21);
1459         // higher 19 bit
1460         int immHi = (imm >> 2) << PcRelImmHiOffset;
1461         // lower 2 bit
1462         int immLo = (imm & 0x3) << PcRelImmLoOffset;
1463         return immHi | immLo;
1464     }
1465 
1466     /* Arithmetic (Immediate) (5.4.1) */
1467 
1468     /**
1469      * dst = src + aimm.
1470      *
1471      * @param size register size. Has to be 32 or 64.
1472      * @param dst general purpose register. May not be null or zero-register.
1473      * @param src general purpose register. May not be null or zero-register.
1474      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1475      *            the lower 12-bit cleared.
1476      */
1477     protected void add(int size, Register dst, Register src, int aimm) {
1478         assert !dst.equals(zr);
1479         assert !src.equals(zr);
1480         addSubImmInstruction(ADD, dst, src, aimm, generalFromSize(size));
1481     }
1482 
1483     /**
1484      * dst = src + aimm and sets condition flags.
1485      *
1486      * @param size register size. Has to be 32 or 64.
1487      * @param dst general purpose register. May not be null or stackpointer.
1488      * @param src general purpose register. May not be null or zero-register.
1489      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1490      *            the lower 12-bit cleared.
1491      */
1492     protected void adds(int size, Register dst, Register src, int aimm) {
1493         assert !dst.equals(sp);
1494         assert !src.equals(zr);
1495         addSubImmInstruction(ADDS, dst, src, aimm, generalFromSize(size));
1496     }
1497 
1498     /**
1499      * dst = src - aimm.
1500      *
1501      * @param size register size. Has to be 32 or 64.
1502      * @param dst general purpose register. May not be null or zero-register.
1503      * @param src general purpose register. May not be null or zero-register.
1504      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1505      *            the lower 12-bit cleared.
1506      */
1507     protected void sub(int size, Register dst, Register src, int aimm) {
1508         assert !dst.equals(zr);
1509         assert !src.equals(zr);
1510         addSubImmInstruction(SUB, dst, src, aimm, generalFromSize(size));
1511     }
1512 
1513     /**
1514      * dst = src - aimm and sets condition flags.
1515      *
1516      * @param size register size. Has to be 32 or 64.
1517      * @param dst general purpose register. May not be null or stackpointer.
1518      * @param src general purpose register. May not be null or zero-register.
1519      * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1520      *            the lower 12-bit cleared.
1521      */
1522     protected void subs(int size, Register dst, Register src, int aimm) {
1523         assert !dst.equals(sp);
1524         assert !src.equals(zr);
1525         addSubImmInstruction(SUBS, dst, src, aimm, generalFromSize(size));
1526     }
1527 
1528     private void addSubImmInstruction(Instruction instr, Register dst, Register src, int aimm, InstructionType type) {
1529         emitInt(type.encoding | instr.encoding | AddSubImmOp | encodeAimm(aimm) | rd(dst) | rs1(src));
1530     }
1531 
1532     /**
1533      * Encodes arithmetic immediate.
1534      *
1535      * @param imm Immediate has to be either an unsigned 12-bit value or an unsigned 24-bit value
1536      *            with the lower 12 bits zero.
1537      * @return Representation of immediate for use with arithmetic instructions.
1538      */
1539     private static int encodeAimm(int imm) {
1540         assert isAimm(imm) : "Immediate has to be legal arithmetic immediate value " + imm;
1541         if (NumUtil.isUnsignedNbit(12, imm)) {
1542             return imm << ImmediateOffset;
1543         } else {
1544             // First 12-bit are zero, so shift immediate 12-bit and set flag to indicate
1545             // shifted immediate value.
1546             return (imm >>> 12 << ImmediateOffset) | AddSubShift12;
1547         }
1548     }
1549 
1550     /**
1551      * Checks whether immediate can be encoded as an arithmetic immediate.
1552      *
1553      * @param imm Immediate has to be either an unsigned 12bit value or un unsigned 24bit value with
1554      *            the lower 12 bits 0.
1555      * @return true if valid arithmetic immediate, false otherwise.
1556      */
1557     protected static boolean isAimm(int imm) {
1558         return NumUtil.isUnsignedNbit(12, imm) || NumUtil.isUnsignedNbit(12, imm >>> 12) && (imm & 0xfff) == 0;
1559     }
1560 
1561     /* Logical (immediate) (5.4.2) */
1562 
1563     /**
1564      * dst = src & bimm.
1565      *
1566      * @param size register size. Has to be 32 or 64.
1567      * @param dst general purpose register. May not be null or zero-register.
1568      * @param src general purpose register. May not be null or stack-pointer.
1569      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1570      */
1571     public void and(int size, Register dst, Register src, long bimm) {
1572         assert !dst.equals(zr);
1573         assert !src.equals(sp);
1574         logicalImmInstruction(AND, dst, src, bimm, generalFromSize(size));
1575     }
1576 
1577     /**
1578      * dst = src & bimm and sets condition flags.
1579      *
1580      * @param size register size. Has to be 32 or 64.
1581      * @param dst general purpose register. May not be null or stack-pointer.
1582      * @param src general purpose register. May not be null or stack-pointer.
1583      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1584      */
1585     public void ands(int size, Register dst, Register src, long bimm) {
1586         assert !dst.equals(sp);
1587         assert !src.equals(sp);
1588         logicalImmInstruction(ANDS, dst, src, bimm, generalFromSize(size));
1589     }
1590 
1591     /**
1592      * dst = src ^ bimm.
1593      *
1594      * @param size register size. Has to be 32 or 64.
1595      * @param dst general purpose register. May not be null or zero-register.
1596      * @param src general purpose register. May not be null or stack-pointer.
1597      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1598      */
1599     public void eor(int size, Register dst, Register src, long bimm) {
1600         assert !dst.equals(zr);
1601         assert !src.equals(sp);
1602         logicalImmInstruction(EOR, dst, src, bimm, generalFromSize(size));
1603     }
1604 
1605     /**
1606      * dst = src | bimm.
1607      *
1608      * @param size register size. Has to be 32 or 64.
1609      * @param dst general purpose register. May not be null or zero-register.
1610      * @param src general purpose register. May not be null or stack-pointer.
1611      * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1612      */
1613     protected void orr(int size, Register dst, Register src, long bimm) {
1614         assert !dst.equals(zr);
1615         assert !src.equals(sp);
1616         logicalImmInstruction(ORR, dst, src, bimm, generalFromSize(size));
1617     }
1618 
1619     private void logicalImmInstruction(Instruction instr, Register dst, Register src, long bimm, InstructionType type) {
1620         // Mask higher bits off, since we always pass longs around even for the 32-bit instruction.
1621         long bimmValue;
1622         if (type == General32) {
1623             assert (bimm >> 32) == 0 || (bimm >> 32) == -1L : "Higher order bits for 32-bit instruction must either all be 0 or 1.";
1624             bimmValue = bimm & NumUtil.getNbitNumberLong(32);
1625         } else {
1626             bimmValue = bimm;
1627         }
1628         int immEncoding = LogicalImmediateTable.getLogicalImmEncoding(type == General64, bimmValue);
1629         emitInt(type.encoding | instr.encoding | LogicalImmOp | immEncoding | rd(dst) | rs1(src));
1630     }
1631 
1632     /* Move (wide immediate) (5.4.3) */
1633 
1634     /**
1635      * dst = uimm16 << shiftAmt.
1636      *
1637      * @param size register size. Has to be 32 or 64.
1638      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1639      * @param uimm16 16-bit unsigned immediate
1640      * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
1641      *            than size.
1642      */
1643     protected void movz(int size, Register dst, int uimm16, int shiftAmt) {
1644         moveWideImmInstruction(MOVZ, dst, uimm16, shiftAmt, generalFromSize(size));
1645     }
1646 
1647     /**
1648      * dst = ~(uimm16 << shiftAmt).
1649      *
1650      * @param size register size. Has to be 32 or 64.
1651      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1652      * @param uimm16 16-bit unsigned immediate
1653      * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
1654      *            than size.
1655      */
1656     protected void movn(int size, Register dst, int uimm16, int shiftAmt) {
1657         moveWideImmInstruction(MOVN, dst, uimm16, shiftAmt, generalFromSize(size));
1658     }
1659 
1660     /**
1661      * dst<pos+15:pos> = uimm16.
1662      *
1663      * @param size register size. Has to be 32 or 64.
1664      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1665      * @param uimm16 16-bit unsigned immediate
1666      * @param pos position into which uimm16 is inserted. Can be any multiple of 16 smaller than
1667      *            size.
1668      */
1669     protected void movk(int size, Register dst, int uimm16, int pos) {
1670         moveWideImmInstruction(MOVK, dst, uimm16, pos, generalFromSize(size));
1671     }
1672 
1673     private void moveWideImmInstruction(Instruction instr, Register dst, int uimm16, int shiftAmt, InstructionType type) {
1674         assert dst.getRegisterCategory().equals(CPU);
1675         assert NumUtil.isUnsignedNbit(16, uimm16) : "Immediate has to be unsigned 16bit";
1676         assert shiftAmt == 0 || shiftAmt == 16 || (type == InstructionType.General64 && (shiftAmt == 32 || shiftAmt == 48)) : "Invalid shift amount: " + shiftAmt;
1677         int shiftValue = shiftAmt >> 4;
1678         emitInt(type.encoding | instr.encoding | MoveWideImmOp | rd(dst) | uimm16 << MoveWideImmOffset | shiftValue << MoveWideShiftOffset);
1679     }
1680 
1681     /* Bitfield Operations (5.4.5) */
1682 
1683     /**
1684      * Bitfield move.
1685      *
1686      * @param size register size. Has to be 32 or 64.
1687      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1688      * @param src general purpose register. May not be null, stackpointer or zero-register.
1689      * @param r must be in the range 0 to size - 1
1690      * @param s must be in the range 0 to size - 1
1691      */
1692     public void bfm(int size, Register dst, Register src, int r, int s) {
1693         bitfieldInstruction(BFM, dst, src, r, s, generalFromSize(size));
1694     }
1695 
1696     /**
1697      * Unsigned bitfield move.
1698      *
1699      * @param size register size. Has to be 32 or 64.
1700      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1701      * @param src general purpose register. May not be null, stackpointer or zero-register.
1702      * @param r must be in the range 0 to size - 1
1703      * @param s must be in the range 0 to size - 1
1704      */
1705     public void ubfm(int size, Register dst, Register src, int r, int s) {
1706         bitfieldInstruction(UBFM, dst, src, r, s, generalFromSize(size));
1707     }
1708 
1709     /**
1710      * Signed bitfield move.
1711      *
1712      * @param size register size. Has to be 32 or 64.
1713      * @param dst general purpose register. May not be null, stackpointer or zero-register.
1714      * @param src general purpose register. May not be null, stackpointer or zero-register.
1715      * @param r must be in the range 0 to size - 1
1716      * @param s must be in the range 0 to size - 1
1717      */
1718     protected void sbfm(int size, Register dst, Register src, int r, int s) {
1719         bitfieldInstruction(SBFM, dst, src, r, s, generalFromSize(size));
1720     }
1721 
1722     private void bitfieldInstruction(Instruction instr, Register dst, Register src, int r, int s, InstructionType type) {
1723         assert !dst.equals(sp) && !dst.equals(zr);
1724         assert !src.equals(sp) && !src.equals(zr);
1725         assert s >= 0 && s < type.width && r >= 0 && r < type.width;
1726         int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
1727         emitInt(type.encoding | instr.encoding | BitfieldImmOp | sf | r << ImmediateRotateOffset | s << ImmediateOffset | rd(dst) | rs1(src));
1728     }
1729 
1730     /* Extract (Immediate) (5.4.6) */
1731 
1732     /**
1733      * Extract. dst = src1:src2<lsb+31:lsb>
1734      *
1735      * @param size register size. Has to be 32 or 64.
1736      * @param dst general purpose register. May not be null or stackpointer.
1737      * @param src1 general purpose register. May not be null or stackpointer.
1738      * @param src2 general purpose register. May not be null or stackpointer.
1739      * @param lsb must be in range 0 to size - 1.
1740      */
1741     protected void extr(int size, Register dst, Register src1, Register src2, int lsb) {
1742         assert !dst.equals(sp);
1743         assert !src1.equals(sp);
1744         assert !src2.equals(sp);
1745         InstructionType type = generalFromSize(size);
1746         assert lsb >= 0 && lsb < type.width;
1747         int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
1748         emitInt(type.encoding | EXTR.encoding | sf | lsb << ImmediateOffset | rd(dst) | rs1(src1) | rs2(src2));
1749     }
1750 
1751     /* Arithmetic (shifted register) (5.5.1) */
1752 
1753     /**
1754      * dst = src1 + shiftType(src2, imm).
1755      *
1756      * @param size register size. Has to be 32 or 64.
1757      * @param dst general purpose register. May not be null or stackpointer.
1758      * @param src1 general purpose register. May not be null or stackpointer.
1759      * @param src2 general purpose register. May not be null or stackpointer.
1760      * @param shiftType any type but ROR.
1761      * @param imm must be in range 0 to size - 1.
1762      */
1763     protected void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1764         addSubShiftedInstruction(ADD, dst, src1, src2, shiftType, imm, generalFromSize(size));
1765     }
1766 
1767     /**
1768      * dst = src1 + shiftType(src2, imm) and sets condition flags.
1769      *
1770      * @param size register size. Has to be 32 or 64.
1771      * @param dst general purpose register. May not be null or stackpointer.
1772      * @param src1 general purpose register. May not be null or stackpointer.
1773      * @param src2 general purpose register. May not be null or stackpointer.
1774      * @param shiftType any type but ROR.
1775      * @param imm must be in range 0 to size - 1.
1776      */
1777     public void adds(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1778         addSubShiftedInstruction(ADDS, dst, src1, src2, shiftType, imm, generalFromSize(size));
1779     }
1780 
1781     /**
1782      * dst = src1 - shiftType(src2, imm).
1783      *
1784      * @param size register size. Has to be 32 or 64.
1785      * @param dst general purpose register. May not be null or stackpointer.
1786      * @param src1 general purpose register. May not be null or stackpointer.
1787      * @param src2 general purpose register. May not be null or stackpointer.
1788      * @param shiftType any type but ROR.
1789      * @param imm must be in range 0 to size - 1.
1790      */
1791     protected void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1792         addSubShiftedInstruction(SUB, dst, src1, src2, shiftType, imm, generalFromSize(size));
1793     }
1794 
1795     /**
1796      * dst = src1 - shiftType(src2, imm) and sets condition flags.
1797      *
1798      * @param size register size. Has to be 32 or 64.
1799      * @param dst general purpose register. May not be null or stackpointer.
1800      * @param src1 general purpose register. May not be null or stackpointer.
1801      * @param src2 general purpose register. May not be null or stackpointer.
1802      * @param shiftType any type but ROR.
1803      * @param imm must be in range 0 to size - 1.
1804      */
1805     public void subs(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1806         addSubShiftedInstruction(SUBS, dst, src1, src2, shiftType, imm, generalFromSize(size));
1807     }
1808 
1809     private void addSubShiftedInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int imm, InstructionType type) {
1810         assert shiftType != ShiftType.ROR;
1811         assert imm >= 0 && imm < type.width;
1812         emitInt(type.encoding | instr.encoding | AddSubShiftedOp | imm << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1813     }
1814 
1815     /* Arithmetic (extended register) (5.5.2) */
1816     /**
1817      * dst = src1 + extendType(src2) << imm.
1818      *
1819      * @param size register size. Has to be 32 or 64.
1820      * @param dst general purpose register. May not be null or zero-register..
1821      * @param src1 general purpose register. May not be null or zero-register.
1822      * @param src2 general purpose register. May not be null or stackpointer.
1823      * @param extendType defines how src2 is extended to the same size as src1.
1824      * @param shiftAmt must be in range 0 to 4.
1825      */
1826     public void add(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1827         assert !dst.equals(zr);
1828         assert !src1.equals(zr);
1829         assert !src2.equals(sp);
1830         addSubExtendedInstruction(ADD, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1831     }
1832 
1833     /**
1834      * dst = src1 + extendType(src2) << imm and sets condition flags.
1835      *
1836      * @param size register size. Has to be 32 or 64.
1837      * @param dst general purpose register. May not be null or stackpointer..
1838      * @param src1 general purpose register. May not be null or zero-register.
1839      * @param src2 general purpose register. May not be null or stackpointer.
1840      * @param extendType defines how src2 is extended to the same size as src1.
1841      * @param shiftAmt must be in range 0 to 4.
1842      */
1843     protected void adds(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1844         assert !dst.equals(sp);
1845         assert !src1.equals(zr);
1846         assert !src2.equals(sp);
1847         addSubExtendedInstruction(ADDS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1848     }
1849 
1850     /**
1851      * dst = src1 - extendType(src2) << imm.
1852      *
1853      * @param size register size. Has to be 32 or 64.
1854      * @param dst general purpose register. May not be null or zero-register..
1855      * @param src1 general purpose register. May not be null or zero-register.
1856      * @param src2 general purpose register. May not be null or stackpointer.
1857      * @param extendType defines how src2 is extended to the same size as src1.
1858      * @param shiftAmt must be in range 0 to 4.
1859      */
1860     protected void sub(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1861         assert !dst.equals(zr);
1862         assert !src1.equals(zr);
1863         assert !src2.equals(sp);
1864         addSubExtendedInstruction(SUB, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1865     }
1866 
1867     /**
1868      * dst = src1 - extendType(src2) << imm and sets flags.
1869      *
1870      * @param size register size. Has to be 32 or 64.
1871      * @param dst general purpose register. May not be null or stackpointer..
1872      * @param src1 general purpose register. May not be null or zero-register.
1873      * @param src2 general purpose register. May not be null or stackpointer.
1874      * @param extendType defines how src2 is extended to the same size as src1.
1875      * @param shiftAmt must be in range 0 to 4.
1876      */
1877     public void subs(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1878         assert !dst.equals(sp);
1879         assert !src1.equals(zr);
1880         assert !src2.equals(sp);
1881         addSubExtendedInstruction(SUBS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1882     }
1883 
1884     private void addSubExtendedInstruction(Instruction instr, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt, InstructionType type) {
1885         assert shiftAmt >= 0 && shiftAmt <= 4;
1886         emitInt(type.encoding | instr.encoding | AddSubExtendedOp | shiftAmt << ImmediateOffset | extendType.encoding << ExtendTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1887     }
1888 
1889     /* Logical (shifted register) (5.5.3) */
1890     /**
1891      * dst = src1 & shiftType(src2, imm).
1892      *
1893      * @param size register size. Has to be 32 or 64.
1894      * @param dst general purpose register. May not be null or stackpointer.
1895      * @param src1 general purpose register. May not be null or stackpointer.
1896      * @param src2 general purpose register. May not be null or stackpointer.
1897      * @param shiftType all types allowed, may not be null.
1898      * @param shiftAmt must be in range 0 to size - 1.
1899      */
1900     protected void and(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1901         logicalRegInstruction(AND, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1902     }
1903 
1904     /**
1905      * dst = src1 & shiftType(src2, imm) and sets condition flags.
1906      *
1907      * @param size register size. Has to be 32 or 64.
1908      * @param dst general purpose register. May not be null or stackpointer.
1909      * @param src1 general purpose register. May not be null or stackpointer.
1910      * @param src2 general purpose register. May not be null or stackpointer.
1911      * @param shiftType all types allowed, may not be null.
1912      * @param shiftAmt must be in range 0 to size - 1.
1913      */
1914     protected void ands(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1915         logicalRegInstruction(ANDS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1916     }
1917 
1918     /**
1919      * dst = src1 & ~(shiftType(src2, imm)).
1920      *
1921      * @param size register size. Has to be 32 or 64.
1922      * @param dst general purpose register. May not be null or stackpointer.
1923      * @param src1 general purpose register. May not be null or stackpointer.
1924      * @param src2 general purpose register. May not be null or stackpointer.
1925      * @param shiftType all types allowed, may not be null.
1926      * @param shiftAmt must be in range 0 to size - 1.
1927      */
1928     protected void bic(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1929         logicalRegInstruction(BIC, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1930     }
1931 
1932     /**
1933      * dst = src1 & ~(shiftType(src2, imm)) and sets condition flags.
1934      *
1935      * @param size register size. Has to be 32 or 64.
1936      * @param dst general purpose register. May not be null or stackpointer.
1937      * @param src1 general purpose register. May not be null or stackpointer.
1938      * @param src2 general purpose register. May not be null or stackpointer.
1939      * @param shiftType all types allowed, may not be null.
1940      * @param shiftAmt must be in range 0 to size - 1.
1941      */
1942     protected void bics(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1943         logicalRegInstruction(BICS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1944     }
1945 
1946     /**
1947      * dst = src1 ^ ~(shiftType(src2, imm)).
1948      *
1949      * @param size register size. Has to be 32 or 64.
1950      * @param dst general purpose register. May not be null or stackpointer.
1951      * @param src1 general purpose register. May not be null or stackpointer.
1952      * @param src2 general purpose register. May not be null or stackpointer.
1953      * @param shiftType all types allowed, may not be null.
1954      * @param shiftAmt must be in range 0 to size - 1.
1955      */
1956     protected void eon(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1957         logicalRegInstruction(EON, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1958     }
1959 
1960     /**
1961      * dst = src1 ^ shiftType(src2, imm).
1962      *
1963      * @param size register size. Has to be 32 or 64.
1964      * @param dst general purpose register. May not be null or stackpointer.
1965      * @param src1 general purpose register. May not be null or stackpointer.
1966      * @param src2 general purpose register. May not be null or stackpointer.
1967      * @param shiftType all types allowed, may not be null.
1968      * @param shiftAmt must be in range 0 to size - 1.
1969      */
1970     protected void eor(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1971         logicalRegInstruction(EOR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1972     }
1973 
1974     /**
1975      * dst = src1 | shiftType(src2, imm).
1976      *
1977      * @param size register size. Has to be 32 or 64.
1978      * @param dst general purpose register. May not be null or stackpointer.
1979      * @param src1 general purpose register. May not be null or stackpointer.
1980      * @param src2 general purpose register. May not be null or stackpointer.
1981      * @param shiftType all types allowed, may not be null.
1982      * @param shiftAmt must be in range 0 to size - 1.
1983      */
1984     protected void orr(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1985         logicalRegInstruction(ORR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1986     }
1987 
1988     /**
1989      * dst = src1 | ~(shiftType(src2, imm)).
1990      *
1991      * @param size register size. Has to be 32 or 64.
1992      * @param dst general purpose register. May not be null or stackpointer.
1993      * @param src1 general purpose register. May not be null or stackpointer.
1994      * @param src2 general purpose register. May not be null or stackpointer.
1995      * @param shiftType all types allowed, may not be null.
1996      * @param shiftAmt must be in range 0 to size - 1.
1997      */
1998     protected void orn(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1999         logicalRegInstruction(ORN, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
2000     }
2001 
2002     private void logicalRegInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt, InstructionType type) {
2003         assert !dst.equals(sp);
2004         assert !src1.equals(sp);
2005         assert !src2.equals(sp);
2006         assert shiftAmt >= 0 && shiftAmt < type.width;
2007         emitInt(type.encoding | instr.encoding | LogicalShiftOp | shiftAmt << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
2008     }
2009 
2010     /* Variable Shift (5.5.4) */
2011     /**
2012      * dst = src1 >> (src2 & log2(size)).
2013      *
2014      * @param size register size. Has to be 32 or 64.
2015      * @param dst general purpose register. May not be null or stackpointer.
2016      * @param src1 general purpose register. May not be null or stackpointer.
2017      * @param src2 general purpose register. May not be null or stackpointer.
2018      */
2019     protected void asr(int size, Register dst, Register src1, Register src2) {
2020         dataProcessing2SourceOp(ASRV, dst, src1, src2, generalFromSize(size));
2021     }
2022 
2023     /**
2024      * dst = src1 << (src2 & log2(size)).
2025      *
2026      * @param size register size. Has to be 32 or 64.
2027      * @param dst general purpose register. May not be null or stackpointer.
2028      * @param src1 general purpose register. May not be null or stackpointer.
2029      * @param src2 general purpose register. May not be null or stackpointer.
2030      */
2031     protected void lsl(int size, Register dst, Register src1, Register src2) {
2032         dataProcessing2SourceOp(LSLV, dst, src1, src2, generalFromSize(size));
2033     }
2034 
2035     /**
2036      * dst = src1 >>> (src2 & log2(size)).
2037      *
2038      * @param size register size. Has to be 32 or 64.
2039      * @param dst general purpose register. May not be null or stackpointer.
2040      * @param src1 general purpose register. May not be null or stackpointer.
2041      * @param src2 general purpose register. May not be null or stackpointer.
2042      */
2043     protected void lsr(int size, Register dst, Register src1, Register src2) {
2044         dataProcessing2SourceOp(LSRV, dst, src1, src2, generalFromSize(size));
2045     }
2046 
2047     /**
2048      * dst = rotateRight(src1, (src2 & log2(size))).
2049      *
2050      * @param size register size. Has to be 32 or 64.
2051      * @param dst general purpose register. May not be null or stackpointer.
2052      * @param src1 general purpose register. May not be null or stackpointer.
2053      * @param src2 general purpose register. May not be null or stackpointer.
2054      */
2055     protected void ror(int size, Register dst, Register src1, Register src2) {
2056         dataProcessing2SourceOp(RORV, dst, src1, src2, generalFromSize(size));
2057     }
2058 
2059     /* Bit Operations (5.5.5) */
2060 
2061     /**
2062      * Counts leading sign bits. Sets Wd to the number of consecutive bits following the topmost bit
2063      * in dst, that are the same as the topmost bit. The count does not include the topmost bit
2064      * itself , so the result will be in the range 0 to size-1 inclusive.
2065      *
2066      * @param size register size. Has to be 32 or 64.
2067      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
2068      * @param src source register. May not be null, zero-register or the stackpointer.
2069      */
2070     protected void cls(int size, Register dst, Register src) {
2071         dataProcessing1SourceOp(CLS, dst, src, generalFromSize(size));
2072     }
2073 
2074     /**
2075      * Counts leading zeros.
2076      *
2077      * @param size register size. Has to be 32 or 64.
2078      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
2079      * @param src source register. May not be null, zero-register or the stackpointer.
2080      */
2081     public void clz(int size, Register dst, Register src) {
2082         dataProcessing1SourceOp(CLZ, dst, src, generalFromSize(size));
2083     }
2084 
2085     /**
2086      * Reverses bits.
2087      *
2088      * @param size register size. Has to be 32 or 64.
2089      * @param dst general purpose register. May not be null, zero-register or the stackpointer.
2090      * @param src source register. May not be null, zero-register or the stackpointer.
2091      */
2092     public void rbit(int size, Register dst, Register src) {
2093         dataProcessing1SourceOp(RBIT, dst, src, generalFromSize(size));
2094     }
2095 
2096     /**
2097      * Reverses bytes.
2098      *
2099      * @param size register size. Has to be 32 or 64.
2100      * @param dst general purpose register. May not be null or the stackpointer.
2101      * @param src source register. May not be null or the stackpointer.
2102      */
2103     public void rev(int size, Register dst, Register src) {
2104         if (size == 64) {
2105             dataProcessing1SourceOp(REVX, dst, src, generalFromSize(size));
2106         } else {
2107             assert size == 32;
2108             dataProcessing1SourceOp(REVW, dst, src, generalFromSize(size));
2109         }
2110     }
2111 
2112     /* Conditional Data Processing (5.5.6) */
2113 
2114     /**
2115      * Conditional select. dst = src1 if condition else src2.
2116      *
2117      * @param size register size. Has to be 32 or 64.
2118      * @param dst general purpose register. May not be null or the stackpointer.
2119      * @param src1 general purpose register. May not be null or the stackpointer.
2120      * @param src2 general purpose register. May not be null or the stackpointer.
2121      * @param condition any condition flag. May not be null.
2122      */
2123     protected void csel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2124         conditionalSelectInstruction(CSEL, dst, src1, src2, condition, generalFromSize(size));
2125     }
2126 
2127     /**
2128      * Conditional select negate. dst = src1 if condition else -src2.
2129      *
2130      * @param size register size. Has to be 32 or 64.
2131      * @param dst general purpose register. May not be null or the stackpointer.
2132      * @param src1 general purpose register. May not be null or the stackpointer.
2133      * @param src2 general purpose register. May not be null or the stackpointer.
2134      * @param condition any condition flag. May not be null.
2135      */
2136     protected void csneg(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2137         conditionalSelectInstruction(CSNEG, dst, src1, src2, condition, generalFromSize(size));
2138     }
2139 
2140     /**
2141      * Conditional increase. dst = src1 if condition else src2 + 1.
2142      *
2143      * @param size register size. Has to be 32 or 64.
2144      * @param dst general purpose register. May not be null or the stackpointer.
2145      * @param src1 general purpose register. May not be null or the stackpointer.
2146      * @param src2 general purpose register. May not be null or the stackpointer.
2147      * @param condition any condition flag. May not be null.
2148      */
2149     protected void csinc(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2150         conditionalSelectInstruction(CSINC, dst, src1, src2, condition, generalFromSize(size));
2151     }
2152 
2153     private void conditionalSelectInstruction(Instruction instr, Register dst, Register src1, Register src2, ConditionFlag condition, InstructionType type) {
2154         assert !dst.equals(sp);
2155         assert !src1.equals(sp);
2156         assert !src2.equals(sp);
2157         emitInt(type.encoding | instr.encoding | ConditionalSelectOp | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
2158     }
2159 
2160     /* Integer Multiply/Divide (5.6) */
2161 
2162     /**
2163      * dst = src1 * src2 + src3.
2164      *
2165      * @param size register size. Has to be 32 or 64.
2166      * @param dst general purpose register. May not be null or the stackpointer.
2167      * @param src1 general purpose register. May not be null or the stackpointer.
2168      * @param src2 general purpose register. May not be null or the stackpointer.
2169      * @param src3 general purpose register. May not be null or the stackpointer.
2170      */
2171     protected void madd(int size, Register dst, Register src1, Register src2, Register src3) {
2172         mulInstruction(MADD, dst, src1, src2, src3, generalFromSize(size));
2173     }
2174 
2175     /**
2176      * dst = src3 - src1 * src2.
2177      *
2178      * @param size register size. Has to be 32 or 64.
2179      * @param dst general purpose register. May not be null or the stackpointer.
2180      * @param src1 general purpose register. May not be null or the stackpointer.
2181      * @param src2 general purpose register. May not be null or the stackpointer.
2182      * @param src3 general purpose register. May not be null or the stackpointer.
2183      */
2184     protected void msub(int size, Register dst, Register src1, Register src2, Register src3) {
2185         mulInstruction(MSUB, dst, src1, src2, src3, generalFromSize(size));
2186     }
2187 
2188     /**
2189      * Signed multiply high. dst = (src1 * src2)[127:64]
2190      *
2191      * @param dst general purpose register. May not be null or the stackpointer.
2192      * @param src1 general purpose register. May not be null or the stackpointer.
2193      * @param src2 general purpose register. May not be null or the stackpointer.
2194      */
2195     protected void smulh(Register dst, Register src1, Register src2) {
2196         assert !dst.equals(sp);
2197         assert !src1.equals(sp);
2198         assert !src2.equals(sp);
2199         emitInt(0b10011011010 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2200     }
2201 
2202     /**
2203      * unsigned multiply high. dst = (src1 * src2)[127:64]
2204      *
2205      * @param dst general purpose register. May not be null or the stackpointer.
2206      * @param src1 general purpose register. May not be null or the stackpointer.
2207      * @param src2 general purpose register. May not be null or the stackpointer.
2208      */
2209     protected void umulh(Register dst, Register src1, Register src2) {
2210         assert !dst.equals(sp);
2211         assert !src1.equals(sp);
2212         assert !src2.equals(sp);
2213         emitInt(0b10011011110 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2214     }
2215 
2216     /**
2217      * unsigned multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
2218      *
2219      * @param dst general purpose register. May not be null or the stackpointer.
2220      * @param src1 general purpose register. May not be null or the stackpointer.
2221      * @param src2 general purpose register. May not be null or the stackpointer.
2222      * @param src3 general purpose register. May not be null or the stackpointer.
2223      */
2224     protected void umaddl(Register dst, Register src1, Register src2, Register src3) {
2225         assert !dst.equals(sp);
2226         assert !src1.equals(sp);
2227         assert !src2.equals(sp);
2228         assert !src3.equals(sp);
2229         emitInt(0b10011011101 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2230     }
2231 
2232     /**
2233      * signed multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
2234      *
2235      * @param dst general purpose register. May not be null or the stackpointer.
2236      * @param src1 general purpose register. May not be null or the stackpointer.
2237      * @param src2 general purpose register. May not be null or the stackpointer.
2238      * @param src3 general purpose register. May not be null or the stackpointer.
2239      */
2240     public void smaddl(Register dst, Register src1, Register src2, Register src3) {
2241         assert !dst.equals(sp);
2242         assert !src1.equals(sp);
2243         assert !src2.equals(sp);
2244         assert !src3.equals(sp);
2245         emitInt(0b10011011001 << 21 | dst.encoding | rs1(src1) | rs2(src2) | rs3(src3));
2246     }
2247 
2248     private void mulInstruction(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
2249         assert !dst.equals(sp);
2250         assert !src1.equals(sp);
2251         assert !src2.equals(sp);
2252         assert !src3.equals(sp);
2253         emitInt(type.encoding | instr.encoding | MulOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
2254     }
2255 
2256     /**
2257      * Signed divide. dst = src1 / src2.
2258      *
2259      * @param size register size. Has to be 32 or 64.
2260      * @param dst general purpose register. May not be null or the stackpointer.
2261      * @param src1 general purpose register. May not be null or the stackpointer.
2262      * @param src2 general purpose register. May not be null or the stackpointer.
2263      */
2264     public void sdiv(int size, Register dst, Register src1, Register src2) {
2265         dataProcessing2SourceOp(SDIV, dst, src1, src2, generalFromSize(size));
2266     }
2267 
2268     /**
2269      * Unsigned divide. dst = src1 / src2.
2270      *
2271      * @param size register size. Has to be 32 or 64.
2272      * @param dst general purpose register. May not be null or the stackpointer.
2273      * @param src1 general purpose register. May not be null or the stackpointer.
2274      * @param src2 general purpose register. May not be null or the stackpointer.
2275      */
2276     public void udiv(int size, Register dst, Register src1, Register src2) {
2277         dataProcessing2SourceOp(UDIV, dst, src1, src2, generalFromSize(size));
2278     }
2279 
2280     private void dataProcessing1SourceOp(Instruction instr, Register dst, Register src, InstructionType type) {
2281         emitInt(type.encoding | instr.encoding | DataProcessing1SourceOp | rd(dst) | rs1(src));
2282     }
2283 
2284     private void dataProcessing2SourceOp(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
2285         assert !dst.equals(sp);
2286         assert !src1.equals(sp);
2287         assert !src2.equals(sp);
2288         emitInt(type.encoding | instr.encoding | DataProcessing2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
2289     }
2290 
2291     /* Floating point operations */
2292 
2293     /* Load-Store Single FP register (5.7.1.1) */
2294     /**
2295      * Floating point load.
2296      *
2297      * @param size number of bits read from memory into rt. Must be 32 or 64.
2298      * @param rt floating point register. May not be null.
2299      * @param address all addressing modes allowed. May not be null.
2300      */
2301     public void fldr(int size, Register rt, AArch64Address address) {
2302         assert rt.getRegisterCategory().equals(SIMD);
2303         assert size == 32 || size == 64;
2304         int transferSize = NumUtil.log2Ceil(size / 8);
2305         loadStoreInstruction(LDR, rt, address, InstructionType.FP32, transferSize);
2306     }
2307 
2308     /**
2309      * Floating point store.
2310      *
2311      * @param size number of bits read from memory into rt. Must be 32 or 64.
2312      * @param rt floating point register. May not be null.
2313      * @param address all addressing modes allowed. May not be null.
2314      */
2315     public void fstr(int size, Register rt, AArch64Address address) {
2316         assert rt.getRegisterCategory().equals(SIMD);
2317         assert size == 32 || size == 64;
2318         int transferSize = NumUtil.log2Ceil(size / 8);
2319         loadStoreInstruction(STR, rt, address, InstructionType.FP64, transferSize);
2320     }
2321 
2322     /* Floating-point Move (register) (5.7.2) */
2323 
2324     /**
2325      * Floating point move.
2326      *
2327      * @param size register size. Has to be 32 or 64.
2328      * @param dst floating point register. May not be null.
2329      * @param src floating point register. May not be null.
2330      */
2331     protected void fmov(int size, Register dst, Register src) {
2332         fpDataProcessing1Source(FMOV, dst, src, floatFromSize(size));
2333     }
2334 
2335     /**
2336      * Move size bits from floating point register unchanged to general purpose register.
2337      *
2338      * @param size number of bits read from memory into rt. Must be 32 or 64.
2339      * @param dst general purpose register. May not be null, stack-pointer or zero-register
2340      * @param src floating point register. May not be null.
2341      */
2342     protected void fmovFpu2Cpu(int size, Register dst, Register src) {
2343         assert dst.getRegisterCategory().equals(CPU);
2344         assert src.getRegisterCategory().equals(SIMD);
2345         fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVFPU2CPU);
2346     }
2347 
2348     /**
2349      * Move size bits from general purpose register unchanged to floating point register.
2350      *
2351      * @param size register size. Has to be 32 or 64.
2352      * @param dst floating point register. May not be null.
2353      * @param src general purpose register. May not be null or stack-pointer.
2354      */
2355     protected void fmovCpu2Fpu(int size, Register dst, Register src) {
2356         assert dst.getRegisterCategory().equals(SIMD);
2357         assert src.getRegisterCategory().equals(CPU);
2358         fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVCPU2FPU);
2359     }
2360 
2361     private void fmovCpuFpuInstruction(Register dst, Register src, boolean is64bit, Instruction instr) {
2362         int sf = is64bit ? FP64.encoding | General64.encoding : FP32.encoding | General32.encoding;
2363         emitInt(sf | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
2364     }
2365 
2366     /* Floating-point Move (immediate) (5.7.3) */
2367 
2368     /**
2369      * Move immediate into register.
2370      *
2371      * @param size register size. Has to be 32 or 64.
2372      * @param dst floating point register. May not be null.
2373      * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
2374      *            loaded, i.e. (float) imm == imm must be true. In all cases
2375      *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
2376      *            depending on size.
2377      */
2378     protected void fmov(int size, Register dst, double imm) {
2379         assert dst.getRegisterCategory().equals(SIMD);
2380         InstructionType type = floatFromSize(size);
2381         int immEncoding;
2382         if (type == FP64) {
2383             immEncoding = getDoubleImmediate(imm);
2384         } else {
2385             assert imm == (float) imm : "float mov must use an immediate that can be represented using a float.";
2386             immEncoding = getFloatImmediate((float) imm);
2387         }
2388         emitInt(type.encoding | FMOV.encoding | FpImmOp | immEncoding | rd(dst));
2389     }
2390 
2391     private static int getDoubleImmediate(double imm) {
2392         assert isDoubleImmediate(imm);
2393         // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
2394         // 0000.0000.0000.0000.0000.0000.0000.0000
2395         long repr = Double.doubleToRawLongBits(imm);
2396         int a = (int) (repr >>> 63) << 7;
2397         int b = (int) ((repr >>> 61) & 0x1) << 6;
2398         int cToH = (int) (repr >>> 48) & 0x3f;
2399         return (a | b | cToH) << FpImmOffset;
2400     }
2401 
2402     protected static boolean isDoubleImmediate(double imm) {
2403         // Valid values will have the form:
2404         // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
2405         // 0000.0000.0000.0000.0000.0000.0000.0000
2406         long bits = Double.doubleToRawLongBits(imm);
2407         // lower 48 bits are cleared
2408         if ((bits & NumUtil.getNbitNumberLong(48)) != 0) {
2409             return false;
2410         }
2411         // bits[61..54] are all set or all cleared.
2412         long pattern = (bits >> 54) & NumUtil.getNbitNumberLong(7);
2413         if (pattern != 0 && pattern != NumUtil.getNbitNumberLong(7)) {
2414             return false;
2415         }
2416         // bits[62] and bits[61] are opposites.
2417         return ((bits ^ (bits << 1)) & (1L << 62)) != 0;
2418     }
2419 
2420     private static int getFloatImmediate(float imm) {
2421         assert isFloatImmediate(imm);
2422         // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
2423         int repr = Float.floatToRawIntBits(imm);
2424         int a = (repr >>> 31) << 7;
2425         int b = ((repr >>> 29) & 0x1) << 6;
2426         int cToH = (repr >>> 19) & NumUtil.getNbitNumberInt(6);
2427         return (a | b | cToH) << FpImmOffset;
2428     }
2429 
2430     protected static boolean isFloatImmediate(float imm) {
2431         // Valid values will have the form:
2432         // aBbb.bbbc.defg.h000.0000.0000.0000.0000
2433         int bits = Float.floatToRawIntBits(imm);
2434         // lower 20 bits are cleared.
2435         if ((bits & NumUtil.getNbitNumberInt(19)) != 0) {
2436             return false;
2437         }
2438         // bits[29..25] are all set or all cleared
2439         int pattern = (bits >> 25) & NumUtil.getNbitNumberInt(5);
2440         if (pattern != 0 && pattern != NumUtil.getNbitNumberInt(5)) {
2441             return false;
2442         }
2443         // bits[29] and bits[30] have to be opposite
2444         return ((bits ^ (bits << 1)) & (1 << 30)) != 0;
2445     }
2446 
2447     /* Convert Floating-point Precision (5.7.4.1) */
2448     /* Converts float to double and vice-versa */
2449 
2450     /**
2451      * Convert float to double and vice-versa.
2452      *
2453      * @param srcSize size of source register in bits.
2454      * @param dst floating point register. May not be null.
2455      * @param src floating point register. May not be null.
2456      */
2457     public void fcvt(int srcSize, Register dst, Register src) {
2458         if (srcSize == 32) {
2459             fpDataProcessing1Source(FCVTDS, dst, src, floatFromSize(srcSize));
2460         } else {
2461             fpDataProcessing1Source(FCVTSD, dst, src, floatFromSize(srcSize));
2462         }
2463     }
2464 
2465     /* Convert to Integer (5.7.4.2) */
2466 
2467     /**
2468      * Convert floating point to integer. Rounds towards zero.
2469      *
2470      * @param targetSize size of integer register. 32 or 64.
2471      * @param srcSize size of floating point register. 32 or 64.
2472      * @param dst general purpose register. May not be null, the zero-register or the stackpointer.
2473      * @param src floating point register. May not be null.
2474      */
2475     public void fcvtzs(int targetSize, int srcSize, Register dst, Register src) {
2476         assert !dst.equals(zr) && !dst.equals(sp);
2477         assert src.getRegisterCategory().equals(SIMD);
2478         fcvtCpuFpuInstruction(FCVTZS, dst, src, generalFromSize(targetSize), floatFromSize(srcSize));
2479     }
2480 
2481     /* Convert from Integer (5.7.4.2) */
2482     /**
2483      * Converts integer to floating point. Uses rounding mode defined by FCPR.
2484      *
2485      * @param targetSize size of floating point register. 32 or 64.
2486      * @param srcSize size of integer register. 32 or 64.
2487      * @param dst floating point register. May not be null.
2488      * @param src general purpose register. May not be null or the stackpointer.
2489      */
2490     public void scvtf(int targetSize, int srcSize, Register dst, Register src) {
2491         assert dst.getRegisterCategory().equals(SIMD);
2492         assert !src.equals(sp);
2493         fcvtCpuFpuInstruction(SCVTF, dst, src, floatFromSize(targetSize), generalFromSize(srcSize));
2494     }
2495 
2496     private void fcvtCpuFpuInstruction(Instruction instr, Register dst, Register src, InstructionType type1, InstructionType type2) {
2497         emitInt(type1.encoding | type2.encoding | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
2498     }
2499 
2500     /* Floating-point Round to Integral (5.7.5) */
2501 
2502     /**
2503      * Rounds floating-point to integral. Rounds towards zero.
2504      *
2505      * @param size register size.
2506      * @param dst floating point register. May not be null.
2507      * @param src floating point register. May not be null.
2508      */
2509     protected void frintz(int size, Register dst, Register src) {
2510         fpDataProcessing1Source(FRINTZ, dst, src, floatFromSize(size));
2511     }
2512 
2513     /**
2514      * Rounds floating-point to integral. Rounds towards nearest with ties to even.
2515      *
2516      * @param size register size.
2517      * @param dst floating point register. May not be null.
2518      * @param src floating point register. May not be null.
2519      */
2520     public void frintn(int size, Register dst, Register src) {
2521         fpDataProcessing1Source(FRINTN, dst, src, floatFromSize(size));
2522     }
2523 
2524     /**
2525      * Rounds floating-point to integral. Rounds towards minus infinity.
2526      *
2527      * @param size register size.
2528      * @param dst floating point register. May not be null.
2529      * @param src floating point register. May not be null.
2530      */
2531     public void frintm(int size, Register dst, Register src) {
2532         fpDataProcessing1Source(FRINTM, dst, src, floatFromSize(size));
2533     }
2534 
2535     /**
2536      * Rounds floating-point to integral. Rounds towards plus infinity.
2537      *
2538      * @param size register size.
2539      * @param dst floating point register. May not be null.
2540      * @param src floating point register. May not be null.
2541      */
2542     public void frintp(int size, Register dst, Register src) {
2543         fpDataProcessing1Source(FRINTP, dst, src, floatFromSize(size));
2544     }
2545 
2546     /* Floating-point Arithmetic (1 source) (5.7.6) */
2547 
2548     /**
2549      * dst = |src|.
2550      *
2551      * @param size register size.
2552      * @param dst floating point register. May not be null.
2553      * @param src floating point register. May not be null.
2554      */
2555     public void fabs(int size, Register dst, Register src) {
2556         fpDataProcessing1Source(FABS, dst, src, floatFromSize(size));
2557     }
2558 
2559     /**
2560      * dst = -neg.
2561      *
2562      * @param size register size.
2563      * @param dst floating point register. May not be null.
2564      * @param src floating point register. May not be null.
2565      */
2566     public void fneg(int size, Register dst, Register src) {
2567         fpDataProcessing1Source(FNEG, dst, src, floatFromSize(size));
2568     }
2569 
2570     /**
2571      * dst = Sqrt(src).
2572      *
2573      * @param size register size.
2574      * @param dst floating point register. May not be null.
2575      * @param src floating point register. May not be null.
2576      */
2577     public void fsqrt(int size, Register dst, Register src) {
2578         fpDataProcessing1Source(FSQRT, dst, src, floatFromSize(size));
2579     }
2580 
2581     private void fpDataProcessing1Source(Instruction instr, Register dst, Register src, InstructionType type) {
2582         assert dst.getRegisterCategory().equals(SIMD);
2583         assert src.getRegisterCategory().equals(SIMD);
2584         emitInt(type.encoding | instr.encoding | Fp1SourceOp | rd(dst) | rs1(src));
2585     }
2586 
2587     /* Floating-point Arithmetic (2 source) (5.7.7) */
2588 
2589     /**
2590      * dst = src1 + src2.
2591      *
2592      * @param size register size.
2593      * @param dst floating point register. May not be null.
2594      * @param src1 floating point register. May not be null.
2595      * @param src2 floating point register. May not be null.
2596      */
2597     public void fadd(int size, Register dst, Register src1, Register src2) {
2598         fpDataProcessing2Source(FADD, dst, src1, src2, floatFromSize(size));
2599     }
2600 
2601     /**
2602      * dst = src1 - src2.
2603      *
2604      * @param size register size.
2605      * @param dst floating point register. May not be null.
2606      * @param src1 floating point register. May not be null.
2607      * @param src2 floating point register. May not be null.
2608      */
2609     public void fsub(int size, Register dst, Register src1, Register src2) {
2610         fpDataProcessing2Source(FSUB, dst, src1, src2, floatFromSize(size));
2611     }
2612 
2613     /**
2614      * dst = src1 * src2.
2615      *
2616      * @param size register size.
2617      * @param dst floating point register. May not be null.
2618      * @param src1 floating point register. May not be null.
2619      * @param src2 floating point register. May not be null.
2620      */
2621     public void fmul(int size, Register dst, Register src1, Register src2) {
2622         fpDataProcessing2Source(FMUL, dst, src1, src2, floatFromSize(size));
2623     }
2624 
2625     /**
2626      * dst = src1 / src2.
2627      *
2628      * @param size register size.
2629      * @param dst floating point register. May not be null.
2630      * @param src1 floating point register. May not be null.
2631      * @param src2 floating point register. May not be null.
2632      */
2633     public void fdiv(int size, Register dst, Register src1, Register src2) {
2634         fpDataProcessing2Source(FDIV, dst, src1, src2, floatFromSize(size));
2635     }
2636 
2637     private void fpDataProcessing2Source(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
2638         assert dst.getRegisterCategory().equals(SIMD);
2639         assert src1.getRegisterCategory().equals(SIMD);
2640         assert src2.getRegisterCategory().equals(SIMD);
2641         emitInt(type.encoding | instr.encoding | Fp2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
2642     }
2643 
2644     /* Floating-point Multiply-Add (5.7.9) */
2645 
2646     /**
2647      * dst = src1 * src2 + src3.
2648      *
2649      * @param size register size.
2650      * @param dst floating point register. May not be null.
2651      * @param src1 floating point register. May not be null.
2652      * @param src2 floating point register. May not be null.
2653      * @param src3 floating point register. May not be null.
2654      */
2655     protected void fmadd(int size, Register dst, Register src1, Register src2, Register src3) {
2656         fpDataProcessing3Source(FMADD, dst, src1, src2, src3, floatFromSize(size));
2657     }
2658 
2659     /**
2660      * dst = src3 - src1 * src2.
2661      *
2662      * @param size register size.
2663      * @param dst floating point register. May not be null.
2664      * @param src1 floating point register. May not be null.
2665      * @param src2 floating point register. May not be null.
2666      * @param src3 floating point register. May not be null.
2667      */
2668     protected void fmsub(int size, Register dst, Register src1, Register src2, Register src3) {
2669         fpDataProcessing3Source(FMSUB, dst, src1, src2, src3, floatFromSize(size));
2670     }
2671 
2672     private void fpDataProcessing3Source(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
2673         assert dst.getRegisterCategory().equals(SIMD);
2674         assert src1.getRegisterCategory().equals(SIMD);
2675         assert src2.getRegisterCategory().equals(SIMD);
2676         assert src3.getRegisterCategory().equals(SIMD);
2677         emitInt(type.encoding | instr.encoding | Fp3SourceOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
2678     }
2679 
2680     /* Floating-point Comparison (5.7.10) */
2681 
2682     /**
2683      * Compares src1 to src2.
2684      *
2685      * @param size register size.
2686      * @param src1 floating point register. May not be null.
2687      * @param src2 floating point register. May not be null.
2688      */
2689     public void fcmp(int size, Register src1, Register src2) {
2690         assert src1.getRegisterCategory().equals(SIMD);
2691         assert src2.getRegisterCategory().equals(SIMD);
2692         InstructionType type = floatFromSize(size);
2693         emitInt(type.encoding | FCMP.encoding | FpCmpOp | rs1(src1) | rs2(src2));
2694     }
2695 
2696     /**
2697      * Conditional compare. NZCV = fcmp(src1, src2) if condition else uimm4.
2698      *
2699      * @param size register size.
2700      * @param src1 floating point register. May not be null.
2701      * @param src2 floating point register. May not be null.
2702      * @param uimm4 condition flags that are used if condition is false.
2703      * @param condition every condition allowed. May not be null.
2704      */
2705     public void fccmp(int size, Register src1, Register src2, int uimm4, ConditionFlag condition) {
2706         assert NumUtil.isUnsignedNbit(4, uimm4);
2707         assert src1.getRegisterCategory().equals(SIMD);
2708         assert src2.getRegisterCategory().equals(SIMD);
2709         InstructionType type = floatFromSize(size);
2710         emitInt(type.encoding | FCCMP.encoding | uimm4 | condition.encoding << ConditionalConditionOffset | rs1(src1) | rs2(src2));
2711     }
2712 
2713     /**
2714      * Compare register to 0.0 .
2715      *
2716      * @param size register size.
2717      * @param src floating point register. May not be null.
2718      */
2719     public void fcmpZero(int size, Register src) {
2720         assert src.getRegisterCategory().equals(SIMD);
2721         InstructionType type = floatFromSize(size);
2722         emitInt(type.encoding | FCMPZERO.encoding | FpCmpOp | rs1(src));
2723     }
2724 
2725     /* Floating-point Conditional Select (5.7.11) */
2726 
2727     /**
2728      * Conditional select. dst = src1 if condition else src2.
2729      *
2730      * @param size register size.
2731      * @param dst floating point register. May not be null.
2732      * @param src1 floating point register. May not be null.
2733      * @param src2 floating point register. May not be null.
2734      * @param condition every condition allowed. May not be null.
2735      */
2736     protected void fcsel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2737         assert dst.getRegisterCategory().equals(SIMD);
2738         assert src1.getRegisterCategory().equals(SIMD);
2739         assert src2.getRegisterCategory().equals(SIMD);
2740         InstructionType type = floatFromSize(size);
2741         emitInt(type.encoding | FCSEL.encoding | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
2742     }
2743 
2744     /* Debug exceptions (5.9.1.2) */
2745 
2746     /**
2747      * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
2748      * UNALLOCATED instruction.
2749      *
2750      * @param uimm16 Arbitrary 16-bit unsigned payload.
2751      */
2752     protected void hlt(int uimm16) {
2753         exceptionInstruction(HLT, uimm16);
2754     }
2755 
2756     /**
2757      * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
2758      * exception level.
2759      *
2760      * @param uimm16 Arbitrary 16-bit unsigned payload.
2761      */
2762     protected void brk(int uimm16) {
2763         exceptionInstruction(BRK, uimm16);
2764     }
2765 
2766     private void exceptionInstruction(Instruction instr, int uimm16) {
2767         assert NumUtil.isUnsignedNbit(16, uimm16);
2768         emitInt(instr.encoding | ExceptionOp | uimm16 << SystemImmediateOffset);
2769     }
2770 
2771     /* Architectural hints (5.9.4) */
2772     public enum SystemHint {
2773         NOP(0x0),
2774         YIELD(0x1),
2775         WFE(0x2),
2776         WFI(0x3),
2777         SEV(0x4),
2778         SEVL(0x5),
2779         CSDB(0x14);
2780 
2781         private final int encoding;
2782 
2783         SystemHint(int encoding) {
2784             this.encoding = encoding;
2785         }
2786     }
2787 
2788     /**
2789      * Architectural hints.
2790      *
2791      * @param hint Can be any of the defined hints. May not be null.
2792      */
2793     protected void hint(SystemHint hint) {
2794         emitInt(HINT.encoding | hint.encoding << SystemImmediateOffset);
2795     }
2796 
2797     /**
2798      * Clear Exclusive: clears the local record of the executing processor that an address has had a
2799      * request for an exclusive access.
2800      */
2801     protected void clrex() {
2802         emitInt(CLREX.encoding);
2803     }
2804 
2805     /**
2806      * Possible barrier definitions for Aarch64. LOAD_LOAD and LOAD_STORE map to the same underlying
2807      * barrier.
2808      *
2809      * We only need synchronization across the inner shareable domain (see B2-90 in the Reference
2810      * documentation).
2811      */
2812     public enum BarrierKind {
2813         LOAD_LOAD(0x9, "ISHLD"),
2814         LOAD_STORE(0x9, "ISHLD"),
2815         STORE_STORE(0xA, "ISHST"),
2816         ANY_ANY(0xB, "ISH");
2817 
2818         public final int encoding;
2819         public final String optionName;
2820 
2821         BarrierKind(int encoding, String optionName) {
2822             this.encoding = encoding;
2823             this.optionName = optionName;
2824         }
2825     }
2826 
2827     /**
2828      * Data Memory Barrier.
2829      *
2830      * @param barrierKind barrier that is issued. May not be null.
2831      */
2832     public void dmb(BarrierKind barrierKind) {
2833         emitInt(DMB.encoding | BarrierOp | barrierKind.encoding << BarrierKindOffset);
2834     }
2835 
2836 }