jdk-tip Sdiff src/hotspot/cpu/aarch64

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

8248238: Adding Windows support to OpenJDK on AArch64

Summary: LP64 vs LLP64 changes to add Windows support

Contributed-by: Monica Beckwith <monica.beckwith@microsoft.com>, Ludovic Henry <luhenry@microsoft.com>
Reviewed-by:
8248238: Adding Windows support to OpenJDK on AArch64

Summary: Adding Windows support for AArch64

Contributed-by: Ludovic Henry <luhenry@microsoft.com>, Monica Beckwith <monica.beckwith@microsoft.com>
Reviewed-by:

  53 #endif
  54 #ifdef COMPILER2
  55 #include "oops/oop.hpp"
  56 #include "opto/compile.hpp"
  57 #include "opto/node.hpp"
  58 #include "opto/output.hpp"
  59 #endif
  60 
  61 #ifdef PRODUCT
  62 #define BLOCK_COMMENT(str) /* nothing */
  63 #else
  64 #define BLOCK_COMMENT(str) block_comment(str)
  65 #endif
  66 #define STOP(str) stop(str);
  67 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  68 
  69 // Patch any kind of instruction; there may be several instructions.
  70 // Return the total length (in bytes) of the instructions.
  71 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
  72   int instructions = 1;
  73   assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant");
  74   long offset = (target - branch) >> 2;
  75   unsigned insn = *(unsigned*)branch;
  76   if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) {
  77     // Load register (literal)
  78     Instruction_aarch64::spatch(branch, 23, 5, offset);
  79   } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
  80     // Unconditional branch (immediate)
  81     Instruction_aarch64::spatch(branch, 25, 0, offset);
  82   } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
  83     // Conditional branch (immediate)
  84     Instruction_aarch64::spatch(branch, 23, 5, offset);
  85   } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
  86     // Compare & branch (immediate)
  87     Instruction_aarch64::spatch(branch, 23, 5, offset);
  88   } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
  89     // Test & branch (immediate)
  90     Instruction_aarch64::spatch(branch, 18, 5, offset);
  91   } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
  92     // PC-rel. addressing
  93     offset = target-branch;
  94     int shift = Instruction_aarch64::extract(insn, 31, 31);
  95     if (shift) {
  96       u_int64_t dest = (u_int64_t)target;
  97       uint64_t pc_page = (uint64_t)branch >> 12;
  98       uint64_t adr_page = (uint64_t)target >> 12;
  99       unsigned offset_lo = dest & 0xfff;
 100       offset = adr_page - pc_page;
 101 
 102       // We handle 4 types of PC relative addressing
 103       //   1 - adrp    Rx, target_page
 104       //       ldr/str Ry, [Rx, #offset_in_page]
 105       //   2 - adrp    Rx, target_page
 106       //       add     Ry, Rx, #offset_in_page
 107       //   3 - adrp    Rx, target_page (page aligned reloc, offset == 0)
 108       //       movk    Rx, #imm16<<32
 109       //   4 - adrp    Rx, target_page (page aligned reloc, offset == 0)
 110       // In the first 3 cases we must check that Rx is the same in the adrp and the
 111       // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end
 112       // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened
 113       // to be followed by a random unrelated ldr/str, add or movk instruction.
 114       //
 115       unsigned insn2 = ((unsigned*)branch)[1];
 116       if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
 117                 Instruction_aarch64::extract(insn, 4, 0) ==
 118                         Instruction_aarch64::extract(insn2, 9, 5)) {
 119         // Load/store register (unsigned immediate)
 120         unsigned size = Instruction_aarch64::extract(insn2, 31, 30);
 121         Instruction_aarch64::patch(branch + sizeof (unsigned),
 122                                     21, 10, offset_lo >> size);
 123         guarantee(((dest >> size) << size) == dest, "misaligned target");
 124         instructions = 2;
 125       } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
 126                 Instruction_aarch64::extract(insn, 4, 0) ==
 127                         Instruction_aarch64::extract(insn2, 4, 0)) {
 128         // add (immediate)
 129         Instruction_aarch64::patch(branch + sizeof (unsigned),
 130                                    21, 10, offset_lo);
 131         instructions = 2;
 132       } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
 133                    Instruction_aarch64::extract(insn, 4, 0) ==
 134                      Instruction_aarch64::extract(insn2, 4, 0)) {
 135         // movk #imm16<<32
 136         Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
 137         long dest = ((long)target & 0xffffffffL) | ((long)branch & 0xffff00000000L);
 138         long pc_page = (long)branch >> 12;
 139         long adr_page = (long)dest >> 12;
 140         offset = adr_page - pc_page;
 141         instructions = 2;
 142       }
 143     }
 144     int offset_lo = offset & 3;
 145     offset >>= 2;
 146     Instruction_aarch64::spatch(branch, 23, 5, offset);
 147     Instruction_aarch64::patch(branch, 30, 29, offset_lo);
 148   } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
 149     u_int64_t dest = (u_int64_t)target;
 150     // Move wide constant
 151     assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
 152     assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
 153     Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
 154     Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
 155     Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
 156     assert(target_addr_for_insn(branch) == target, "should be");
 157     instructions = 3;
 158   } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
 159              Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
 160     // nothing to do
 161     assert(target == 0, "did not expect to relocate target for polling page load");
 162   } else {
 163     ShouldNotReachHere();
 164   }
 165   return instructions * NativeInstruction::instruction_size;
 166 }
 167 
 168 int MacroAssembler::patch_oop(address insn_addr, address o) {
 169   int instructions;

 188     Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
 189     instructions = 3;
 190   }
 191   return instructions * NativeInstruction::instruction_size;
 192 }
 193 
 194 int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
 195   // Metatdata pointers are either narrow (32 bits) or wide (48 bits).
 196   // We encode narrow ones by setting the upper 16 bits in the first
 197   // instruction.
 198   NativeInstruction *insn = nativeInstruction_at(insn_addr);
 199   assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
 200          nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
 201 
 202   Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
 203   Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
 204   return 2 * NativeInstruction::instruction_size;
 205 }
 206 
 207 address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
 208   long offset = 0;
 209   if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
 210     // Load register (literal)
 211     offset = Instruction_aarch64::sextract(insn, 23, 5);
 212     return address(((uint64_t)insn_addr + (offset << 2)));
 213   } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
 214     // Unconditional branch (immediate)
 215     offset = Instruction_aarch64::sextract(insn, 25, 0);
 216   } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
 217     // Conditional branch (immediate)
 218     offset = Instruction_aarch64::sextract(insn, 23, 5);
 219   } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
 220     // Compare & branch (immediate)
 221     offset = Instruction_aarch64::sextract(insn, 23, 5);
 222    } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
 223     // Test & branch (immediate)
 224     offset = Instruction_aarch64::sextract(insn, 18, 5);
 225   } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
 226     // PC-rel. addressing
 227     offset = Instruction_aarch64::extract(insn, 30, 29);
 228     offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2;

 255         return address(target_page + (byte_offset << size));
 256       } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
 257                 Instruction_aarch64::extract(insn, 4, 0) ==
 258                         Instruction_aarch64::extract(insn2, 4, 0)) {
 259         // add (immediate)
 260         unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
 261         return address(target_page + byte_offset);
 262       } else {
 263         if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110  &&
 264                Instruction_aarch64::extract(insn, 4, 0) ==
 265                  Instruction_aarch64::extract(insn2, 4, 0)) {
 266           target_page = (target_page & 0xffffffff) |
 267                          ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32);
 268         }
 269         return (address)target_page;
 270       }
 271     } else {
 272       ShouldNotReachHere();
 273     }
 274   } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
 275     u_int32_t *insns = (u_int32_t *)insn_addr;
 276     // Move wide constant: movz, movk, movk.  See movptr().
 277     assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
 278     assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
 279     return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5))
 280                    + (u_int64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
 281                    + (u_int64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
 282   } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
 283              Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
 284     return 0;
 285   } else {
 286     ShouldNotReachHere();
 287   }
 288   return address(((uint64_t)insn_addr + (offset << 2)));
 289 }
 290 
 291 void MacroAssembler::safepoint_poll(Label& slow_path) {
 292   ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
 293   tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
 294 }
 295 
 296 // Just like safepoint_poll, but use an acquiring load for thread-
 297 // local polling.
 298 //
 299 // We need an acquire here to ensure that any subsequent load of the
 300 // global SafepointSynchronize::_state flag is ordered after this load
 301 // of the local Thread::_polling page.  We don't want this poll to

 372 }
 373 
 374 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 375                                          Register last_java_fp,
 376                                          Label &L,
 377                                          Register scratch) {
 378   if (L.is_bound()) {
 379     set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch);
 380   } else {
 381     InstructionMark im(this);
 382     L.add_patch_at(code(), locator());
 383     set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, scratch);
 384   }
 385 }
 386 
 387 void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
 388   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
 389   assert(CodeCache::find_blob(entry.target()) != NULL,
 390          "destination of far call not found in code cache");
 391   if (far_branches()) {
 392     unsigned long offset;
 393     // We can use ADRP here because we know that the total size of
 394     // the code cache cannot exceed 2Gb.
 395     adrp(tmp, entry, offset);
 396     add(tmp, tmp, offset);
 397     if (cbuf) cbuf->set_insts_mark();
 398     blr(tmp);
 399   } else {
 400     if (cbuf) cbuf->set_insts_mark();
 401     bl(entry);
 402   }
 403 }
 404 
 405 void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
 406   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
 407   assert(CodeCache::find_blob(entry.target()) != NULL,
 408          "destination of far call not found in code cache");
 409   if (far_branches()) {
 410     unsigned long offset;
 411     // We can use ADRP here because we know that the total size of
 412     // the code cache cannot exceed 2Gb.
 413     adrp(tmp, entry, offset);
 414     add(tmp, tmp, offset);
 415     if (cbuf) cbuf->set_insts_mark();
 416     br(tmp);
 417   } else {
 418     if (cbuf) cbuf->set_insts_mark();
 419     b(entry);
 420   }
 421 }
 422 
 423 void MacroAssembler::reserved_stack_check() {
 424     // testing if reserved zone needs to be enabled
 425     Label no_reserved_zone_enabling;
 426 
 427     ldr(rscratch1, Address(rthread, JavaThread::reserved_stack_activation_offset()));
 428     cmp(sp, rscratch1);
 429     br(Assembler::LO, no_reserved_zone_enabling);
 430

 807   isb();
 808   mov_metadata(rmethod, (Metadata*)NULL);
 809 
 810   // Jump to the entry point of the i2c stub.
 811   movptr(rscratch1, 0);
 812   br(rscratch1);
 813 }
 814 
 815 void MacroAssembler::c2bool(Register x) {
 816   // implements x == 0 ? 0 : 1
 817   // note: must only look at least-significant byte of x
 818   //       since C-style booleans are stored in one byte
 819   //       only! (was bug)
 820   tst(x, 0xff);
 821   cset(x, Assembler::NE);
 822 }
 823 
 824 address MacroAssembler::ic_call(address entry, jint method_index) {
 825   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
 826   // address const_ptr = long_constant((jlong)Universe::non_oop_word());
 827   // unsigned long offset;
 828   // ldr_constant(rscratch2, const_ptr);
 829   movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
 830   return trampoline_call(Address(entry, rh));
 831 }
 832 
 833 // Implementation of call_VM versions
 834 
 835 void MacroAssembler::call_VM(Register oop_result,
 836                              address entry_point,
 837                              bool check_exceptions) {
 838   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 839 }
 840 
 841 void MacroAssembler::call_VM(Register oop_result,
 842                              address entry_point,
 843                              Register arg_1,
 844                              bool check_exceptions) {
 845   pass_arg1(this, arg_1);
 846   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 847 }

1474   MacroAssembler::call_VM_leaf_base(entry_point, 4);
1475 }
1476 
1477 void MacroAssembler::null_check(Register reg, int offset) {
1478   if (needs_explicit_null_check(offset)) {
1479     // provoke OS NULL exception if reg = NULL by
1480     // accessing M[reg] w/o changing any registers
1481     // NOTE: this is plenty to provoke a segv
1482     ldr(zr, Address(reg));
1483   } else {
1484     // nothing to do, (later) access of M[reg + offset]
1485     // will provoke OS NULL exception if reg = NULL
1486   }
1487 }
1488 
1489 // MacroAssembler protected routines needed to implement
1490 // public methods
1491 
1492 void MacroAssembler::mov(Register r, Address dest) {
1493   code_section()->relocate(pc(), dest.rspec());
1494   u_int64_t imm64 = (u_int64_t)dest.target();
1495   movptr(r, imm64);
1496 }
1497 
1498 // Move a constant pointer into r.  In AArch64 mode the virtual
1499 // address space is 48 bits in size, so we only need three
1500 // instructions to create a patchable instruction sequence that can
1501 // reach anywhere.
1502 void MacroAssembler::movptr(Register r, uintptr_t imm64) {
1503 #ifndef PRODUCT
1504   {
1505     char buffer[64];
1506     snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
1507     block_comment(buffer);
1508   }
1509 #endif
1510   assert(imm64 < (1ul << 48), "48-bit overflow in address constant");
1511   movz(r, imm64 & 0xffff);
1512   imm64 >>= 16;
1513   movk(r, imm64 & 0xffff, 16);
1514   imm64 >>= 16;
1515   movk(r, imm64 & 0xffff, 32);
1516 }
1517 
1518 // Macro to mov replicated immediate to vector register.
1519 //  Vd will get the following values for different arrangements in T
1520 //   imm32 == hex 000000gh  T8B:  Vd = ghghghghghghghgh
1521 //   imm32 == hex 000000gh  T16B: Vd = ghghghghghghghghghghghghghghghgh
1522 //   imm32 == hex 0000efgh  T4H:  Vd = efghefghefghefgh
1523 //   imm32 == hex 0000efgh  T8H:  Vd = efghefghefghefghefghefghefghefgh
1524 //   imm32 == hex abcdefgh  T2S:  Vd = abcdefghabcdefgh
1525 //   imm32 == hex abcdefgh  T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
1526 //   T1D/T2D: invalid
1527 void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
1528   assert(T != T1D && T != T2D, "invalid arrangement");
1529   if (T == T8B || T == T16B) {
1530     assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
1531     movi(Vd, T, imm32 & 0xff, 0);
1532     return;
1533   }
1534   u_int32_t nimm32 = ~imm32;
1535   if (T == T4H || T == T8H) {
1536     assert((imm32  & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
1537     imm32 &= 0xffff;
1538     nimm32 &= 0xffff;
1539   }
1540   u_int32_t x = imm32;
1541   int movi_cnt = 0;
1542   int movn_cnt = 0;
1543   while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
1544   x = nimm32;
1545   while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
1546   if (movn_cnt < movi_cnt) imm32 = nimm32;
1547   unsigned lsl = 0;
1548   while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1549   if (movn_cnt < movi_cnt)
1550     mvni(Vd, T, imm32 & 0xff, lsl);
1551   else
1552     movi(Vd, T, imm32 & 0xff, lsl);
1553   imm32 >>= 8; lsl += 8;
1554   while (imm32) {
1555     while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1556     if (movn_cnt < movi_cnt)
1557       bici(Vd, T, imm32 & 0xff, lsl);
1558     else
1559       orri(Vd, T, imm32 & 0xff, lsl);
1560     lsl += 8; imm32 >>= 8;
1561   }
1562 }
1563 
1564 void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64)
1565 {
1566 #ifndef PRODUCT
1567   {
1568     char buffer[64];
1569     snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
1570     block_comment(buffer);
1571   }
1572 #endif
1573   if (operand_valid_for_logical_immediate(false, imm64)) {
1574     orr(dst, zr, imm64);
1575   } else {
1576     // we can use a combination of MOVZ or MOVN with
1577     // MOVK to build up the constant
1578     u_int64_t imm_h[4];
1579     int zero_count = 0;
1580     int neg_count = 0;
1581     int i;
1582     for (i = 0; i < 4; i++) {
1583       imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL);
1584       if (imm_h[i] == 0) {
1585         zero_count++;
1586       } else if (imm_h[i] == 0xffffL) {
1587         neg_count++;
1588       }
1589     }
1590     if (zero_count == 4) {
1591       // one MOVZ will do
1592       movz(dst, 0);
1593     } else if (neg_count == 4) {
1594       // one MOVN will do
1595       movn(dst, 0);
1596     } else if (zero_count == 3) {
1597       for (i = 0; i < 4; i++) {
1598         if (imm_h[i] != 0L) {
1599           movz(dst, (u_int32_t)imm_h[i], (i << 4));
1600           break;
1601         }
1602       }
1603     } else if (neg_count == 3) {
1604       // one MOVN will do
1605       for (int i = 0; i < 4; i++) {
1606         if (imm_h[i] != 0xffffL) {
1607           movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
1608           break;
1609         }
1610       }
1611     } else if (zero_count == 2) {
1612       // one MOVZ and one MOVK will do
1613       for (i = 0; i < 3; i++) {
1614         if (imm_h[i] != 0L) {
1615           movz(dst, (u_int32_t)imm_h[i], (i << 4));
1616           i++;
1617           break;
1618         }
1619       }
1620       for (;i < 4; i++) {
1621         if (imm_h[i] != 0L) {
1622           movk(dst, (u_int32_t)imm_h[i], (i << 4));
1623         }
1624       }
1625     } else if (neg_count == 2) {
1626       // one MOVN and one MOVK will do
1627       for (i = 0; i < 4; i++) {
1628         if (imm_h[i] != 0xffffL) {
1629           movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
1630           i++;
1631           break;
1632         }
1633       }
1634       for (;i < 4; i++) {
1635         if (imm_h[i] != 0xffffL) {
1636           movk(dst, (u_int32_t)imm_h[i], (i << 4));
1637         }
1638       }
1639     } else if (zero_count == 1) {
1640       // one MOVZ and two MOVKs will do
1641       for (i = 0; i < 4; i++) {
1642         if (imm_h[i] != 0L) {
1643           movz(dst, (u_int32_t)imm_h[i], (i << 4));
1644           i++;
1645           break;
1646         }
1647       }
1648       for (;i < 4; i++) {
1649         if (imm_h[i] != 0x0L) {
1650           movk(dst, (u_int32_t)imm_h[i], (i << 4));
1651         }
1652       }
1653     } else if (neg_count == 1) {
1654       // one MOVN and two MOVKs will do
1655       for (i = 0; i < 4; i++) {
1656         if (imm_h[i] != 0xffffL) {
1657           movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
1658           i++;
1659           break;
1660         }
1661       }
1662       for (;i < 4; i++) {
1663         if (imm_h[i] != 0xffffL) {
1664           movk(dst, (u_int32_t)imm_h[i], (i << 4));
1665         }
1666       }
1667     } else {
1668       // use a MOVZ and 3 MOVKs (makes it easier to debug)
1669       movz(dst, (u_int32_t)imm_h[0], 0);
1670       for (i = 1; i < 4; i++) {
1671         movk(dst, (u_int32_t)imm_h[i], (i << 4));
1672       }
1673     }
1674   }
1675 }
1676 
1677 void MacroAssembler::mov_immediate32(Register dst, u_int32_t imm32)
1678 {
1679 #ifndef PRODUCT
1680     {
1681       char buffer[64];
1682       snprintf(buffer, sizeof(buffer), "0x%" PRIX32, imm32);
1683       block_comment(buffer);
1684     }
1685 #endif
1686   if (operand_valid_for_logical_immediate(true, imm32)) {
1687     orrw(dst, zr, imm32);
1688   } else {
1689     // we can use MOVZ, MOVN or two calls to MOVK to build up the
1690     // constant
1691     u_int32_t imm_h[2];
1692     imm_h[0] = imm32 & 0xffff;
1693     imm_h[1] = ((imm32 >> 16) & 0xffff);
1694     if (imm_h[0] == 0) {
1695       movzw(dst, imm_h[1], 16);
1696     } else if (imm_h[0] == 0xffff) {
1697       movnw(dst, imm_h[1] ^ 0xffff, 16);
1698     } else if (imm_h[1] == 0) {
1699       movzw(dst, imm_h[0], 0);
1700     } else if (imm_h[1] == 0xffff) {
1701       movnw(dst, imm_h[0] ^ 0xffff, 0);
1702     } else {
1703       // use a MOVZ and MOVK (makes it easier to debug)
1704       movzw(dst, imm_h[0], 0);
1705       movkw(dst, imm_h[1], 16);
1706     }
1707   }
1708 }
1709 
1710 // Form an address from base + offset in Rd.  Rd may or may
1711 // not actually be used: you must use the Address that is returned.
1712 // It is up to you to ensure that the shift provided matches the size
1713 // of your data.
1714 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) {
1715   if (Address::offset_ok_for_immed(byte_offset, shift))
1716     // It fits; no need for any heroics
1717     return Address(base, byte_offset);
1718 
1719   // Don't do anything clever with negative or misaligned offsets
1720   unsigned mask = (1 << shift) - 1;
1721   if (byte_offset < 0 || byte_offset & mask) {
1722     mov(Rd, byte_offset);
1723     add(Rd, base, Rd);
1724     return Address(Rd);
1725   }
1726 
1727   // See if we can do this with two 12-bit offsets
1728   {
1729     unsigned long word_offset = byte_offset >> shift;
1730     unsigned long masked_offset = word_offset & 0xfff000;
1731     if (Address::offset_ok_for_immed(word_offset - masked_offset, 0)
1732         && Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) {
1733       add(Rd, base, masked_offset << shift);
1734       word_offset -= masked_offset;
1735       return Address(Rd, word_offset << shift);
1736     }
1737   }
1738 
1739   // Do it the hard way
1740   mov(Rd, byte_offset);
1741   add(Rd, base, Rd);
1742   return Address(Rd);
1743 }
1744 
1745 void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
1746   if (UseLSE) {
1747     mov(tmp, 1);
1748     ldadd(Assembler::word, tmp, zr, counter_addr);
1749     return;
1750   }

1951 
1952 void MacroAssembler::decrementw(Register reg, int value)
1953 {
1954   if (value < 0)  { incrementw(reg, -value);      return; }
1955   if (value == 0) {                               return; }
1956   if (value < (1 << 12)) { subw(reg, reg, value); return; }
1957   /* else */ {
1958     guarantee(reg != rscratch2, "invalid dst for register decrement");
1959     movw(rscratch2, (unsigned)value);
1960     subw(reg, reg, rscratch2);
1961   }
1962 }
1963 
1964 void MacroAssembler::decrement(Register reg, int value)
1965 {
1966   if (value < 0)  { increment(reg, -value);      return; }
1967   if (value == 0) {                              return; }
1968   if (value < (1 << 12)) { sub(reg, reg, value); return; }
1969   /* else */ {
1970     assert(reg != rscratch2, "invalid dst for register decrement");
1971     mov(rscratch2, (unsigned long)value);
1972     sub(reg, reg, rscratch2);
1973   }
1974 }
1975 
1976 void MacroAssembler::decrementw(Address dst, int value)
1977 {
1978   assert(!dst.uses(rscratch1), "invalid dst for address decrement");
1979   if (dst.getMode() == Address::literal) {
1980     assert(abs(value) < (1 << 12), "invalid value and address mode combination");
1981     lea(rscratch2, dst);
1982     dst = Address(rscratch2);
1983   }
1984   ldrw(rscratch1, dst);
1985   decrementw(rscratch1, value);
1986   strw(rscratch1, dst);
1987 }
1988 
1989 void MacroAssembler::decrement(Address dst, int value)
1990 {
1991   assert(!dst.uses(rscratch1), "invalid address for decrement");

2583       tty->print_cr("r19 = 0x%016lx", regs[19]);
2584       tty->print_cr("r20 = 0x%016lx", regs[20]);
2585       tty->print_cr("r21 = 0x%016lx", regs[21]);
2586       tty->print_cr("r22 = 0x%016lx", regs[22]);
2587       tty->print_cr("r23 = 0x%016lx", regs[23]);
2588       tty->print_cr("r24 = 0x%016lx", regs[24]);
2589       tty->print_cr("r25 = 0x%016lx", regs[25]);
2590       tty->print_cr("r26 = 0x%016lx", regs[26]);
2591       tty->print_cr("r27 = 0x%016lx", regs[27]);
2592       tty->print_cr("r28 = 0x%016lx", regs[28]);
2593       tty->print_cr("r30 = 0x%016lx", regs[30]);
2594       tty->print_cr("r31 = 0x%016lx", regs[31]);
2595       BREAKPOINT;
2596     }
2597   }
2598   fatal("DEBUG MESSAGE: %s", msg);
2599 }
2600 
2601 void MacroAssembler::push_call_clobbered_registers() {
2602   int step = 4 * wordSize;
2603   push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2604   sub(sp, sp, step);
2605   mov(rscratch1, -step);
2606   // Push v0-v7, v16-v31.
2607   for (int i = 31; i>= 4; i -= 4) {
2608     if (i <= v7->encoding() || i >= v16->encoding())
2609       st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1),
2610           as_FloatRegister(i), T1D, Address(post(sp, rscratch1)));
2611   }
2612   st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2),
2613       as_FloatRegister(3), T1D, Address(sp));
2614 }
2615 
2616 void MacroAssembler::pop_call_clobbered_registers() {
2617   for (int i = 0; i < 32; i += 4) {
2618     if (i <= v7->encoding() || i >= v16->encoding())
2619       ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2620           as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
2621   }
2622 
2623   pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2624 }
2625 
2626 void MacroAssembler::push_CPU_state(bool save_vectors) {
2627   int step = (save_vectors ? 8 : 4) * wordSize;
2628   push(0x3fffffff, sp);         // integer registers except lr & sp
2629   mov(rscratch1, -step);
2630   sub(sp, sp, step);
2631   for (int i = 28; i >= 4; i -= 4) {
2632     st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2633         as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
2634   }
2635   st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
2636 }
2637 
2638 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
2639   int step = (restore_vectors ? 8 : 4) * wordSize;
2640   for (int i = 0; i <= 28; i += 4)
2641     ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2642         as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
2643   pop(0x3fffffff, sp);         // integer registers except lr & sp

2675   Register base = sp;
2676   if ((offset & (size-1)) && offset >= (1<<8)) {
2677     add(tmp, base, offset & ((1<<12)-1));
2678     base = tmp;
2679     offset &= -1u<<12;
2680   }
2681 
2682   if (offset >= (1<<12) * size) {
2683     add(tmp, base, offset & (((1<<12)-1)<<12));
2684     base = tmp;
2685     offset &= ~(((1<<12)-1)<<12);
2686   }
2687 
2688   return Address(base, offset);
2689 }
2690 
2691 // Checks whether offset is aligned.
2692 // Returns true if it is, else false.
2693 bool MacroAssembler::merge_alignment_check(Register base,
2694                                            size_t size,
2695                                            long cur_offset,
2696                                            long prev_offset) const {
2697   if (AvoidUnalignedAccesses) {
2698     if (base == sp) {
2699       // Checks whether low offset if aligned to pair of registers.
2700       long pair_mask = size * 2 - 1;
2701       long offset = prev_offset > cur_offset ? cur_offset : prev_offset;
2702       return (offset & pair_mask) == 0;
2703     } else { // If base is not sp, we can't guarantee the access is aligned.
2704       return false;
2705     }
2706   } else {
2707     long mask = size - 1;
2708     // Load/store pair instruction only supports element size aligned offset.
2709     return (cur_offset & mask) == 0 && (prev_offset & mask) == 0;
2710   }
2711 }
2712 
2713 // Checks whether current and previous loads/stores can be merged.
2714 // Returns true if it can be merged, else false.
2715 bool MacroAssembler::ldst_can_merge(Register rt,
2716                                     const Address &adr,
2717                                     size_t cur_size_in_bytes,
2718                                     bool is_store) const {
2719   address prev = pc() - NativeInstruction::instruction_size;
2720   address last = code()->last_insn();
2721 
2722   if (last == NULL || !nativeInstruction_at(last)->is_Imm_LdSt()) {
2723     return false;
2724   }
2725 
2726   if (adr.getMode() != Address::base_plus_offset || prev != last) {
2727     return false;
2728   }
2729 
2730   NativeLdSt* prev_ldst = NativeLdSt_at(prev);
2731   size_t prev_size_in_bytes = prev_ldst->size_in_bytes();
2732 
2733   assert(prev_size_in_bytes == 4 || prev_size_in_bytes == 8, "only supports 64/32bit merging.");
2734   assert(cur_size_in_bytes == 4 || cur_size_in_bytes == 8, "only supports 64/32bit merging.");
2735 
2736   if (cur_size_in_bytes != prev_size_in_bytes || is_store != prev_ldst->is_store()) {
2737     return false;
2738   }
2739 
2740   long max_offset = 63 * prev_size_in_bytes;
2741   long min_offset = -64 * prev_size_in_bytes;
2742 
2743   assert(prev_ldst->is_not_pre_post_index(), "pre-index or post-index is not supported to be merged.");
2744 
2745   // Only same base can be merged.
2746   if (adr.base() != prev_ldst->base()) {
2747     return false;
2748   }
2749 
2750   long cur_offset = adr.offset();
2751   long prev_offset = prev_ldst->offset();
2752   size_t diff = abs(cur_offset - prev_offset);
2753   if (diff != prev_size_in_bytes) {
2754     return false;
2755   }
2756 
2757   // Following cases can not be merged:
2758   // ldr x2, [x2, #8]
2759   // ldr x3, [x2, #16]
2760   // or:
2761   // ldr x2, [x3, #8]
2762   // ldr x2, [x3, #16]
2763   // If t1 and t2 is the same in "ldp t1, t2, [xn, #imm]", we'll get SIGILL.
2764   if (!is_store && (adr.base() == prev_ldst->target() || rt == prev_ldst->target())) {
2765     return false;
2766   }
2767 
2768   long low_offset = prev_offset > cur_offset ? cur_offset : prev_offset;
2769   // Offset range must be in ldp/stp instruction's range.
2770   if (low_offset > max_offset || low_offset < min_offset) {
2771     return false;
2772   }
2773 
2774   if (merge_alignment_check(adr.base(), prev_size_in_bytes, cur_offset, prev_offset)) {
2775     return true;
2776   }
2777 
2778   return false;
2779 }
2780 
2781 // Merge current load/store with previous load/store into ldp/stp.
2782 void MacroAssembler::merge_ldst(Register rt,
2783                                 const Address &adr,
2784                                 size_t cur_size_in_bytes,
2785                                 bool is_store) {
2786 
2787   assert(ldst_can_merge(rt, adr, cur_size_in_bytes, is_store) == true, "cur and prev must be able to be merged.");
2788 
2789   Register rt_low, rt_high;
2790   address prev = pc() - NativeInstruction::instruction_size;
2791   NativeLdSt* prev_ldst = NativeLdSt_at(prev);
2792 
2793   long offset;
2794 
2795   if (adr.offset() < prev_ldst->offset()) {
2796     offset = adr.offset();
2797     rt_low = rt;
2798     rt_high = prev_ldst->target();
2799   } else {
2800     offset = prev_ldst->offset();
2801     rt_low = prev_ldst->target();
2802     rt_high = rt;
2803   }
2804 
2805   Address adr_p = Address(prev_ldst->base(), offset);
2806   // Overwrite previous generated binary.
2807   code_section()->set_end(prev);
2808 
2809   const int sz = prev_ldst->size_in_bytes();
2810   assert(sz == 8 || sz == 4, "only supports 64/32bit merging.");
2811   if (!is_store) {
2812     BLOCK_COMMENT("merged ldr pair");
2813     if (sz == 8) {

3020  *
3021  */
3022 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3023                                      Register z, Register zlen,
3024                                      Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3025                                      Register tmp5, Register tmp6, Register product_hi) {
3026 
3027   assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3028 
3029   const Register idx = tmp1;
3030   const Register kdx = tmp2;
3031   const Register xstart = tmp3;
3032 
3033   const Register y_idx = tmp4;
3034   const Register carry = tmp5;
3035   const Register product  = xlen;
3036   const Register x_xstart = zlen;  // reuse register
3037 
3038   // First Loop.
3039   //
3040   //  final static long LONG_MASK = 0xffffffffL;
3041   //  int xstart = xlen - 1;
3042   //  int ystart = ylen - 1;
3043   //  long carry = 0;
3044   //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
3045   //    long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
3046   //    z[kdx] = (int)product;
3047   //    carry = product >>> 32;
3048   //  }
3049   //  z[xstart] = (int)carry;
3050   //
3051 
3052   movw(idx, ylen);      // idx = ylen;
3053   movw(kdx, zlen);      // kdx = xlen+ylen;
3054   mov(carry, zr);       // carry = 0;
3055 
3056   Label L_done;
3057 
3058   movw(xstart, xlen);
3059   subsw(xstart, xstart, 1);
3060   br(Assembler::MI, L_done);
3061 
3062   multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3063 
3064   Label L_second_loop;
3065   cbzw(kdx, L_second_loop);
3066 
3067   Label L_carry;
3068   subw(kdx, kdx, 1);
3069   cbzw(kdx, L_carry);
3070 
3071   strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
3072   lsr(carry, carry, 32);
3073   subw(kdx, kdx, 1);
3074 
3075   bind(L_carry);
3076   strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
3077 
3078   // Second and third (nested) loops.
3079   //
3080   // for (int i = xstart-1; i >= 0; i--) { // Second loop
3081   //   carry = 0;
3082   //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3083   //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3084   //                    (z[k] & LONG_MASK) + carry;
3085   //     z[k] = (int)product;
3086   //     carry = product >>> 32;
3087   //   }
3088   //   z[i] = (int)carry;
3089   // }
3090   //
3091   // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3092 
3093   const Register jdx = tmp1;
3094 
3095   bind(L_second_loop);
3096   mov(carry, zr);                // carry = 0;
3097   movw(jdx, ylen);               // j = ystart+1
3098 
3099   subsw(xstart, xstart, 1);      // i = xstart-1;
3100   br(Assembler::MI, L_done);
3101 
3102   str(z, Address(pre(sp, -4 * wordSize)));
3103

3319 
3320     sub(len, len, 64);
3321     add(buf, buf, 8);
3322     cmn(len, 128);
3323     br(Assembler::NE, CRC_less64);
3324   BIND(L_exit);
3325     mvnw(crc, crc);
3326 }
3327 
3328 /**
3329  * @param crc   register containing existing CRC (32-bit)
3330  * @param buf   register pointing to input byte buffer (byte*)
3331  * @param len   register containing number of bytes
3332  * @param table register that will contain address of CRC table
3333  * @param tmp   scratch register
3334  */
3335 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
3336         Register table0, Register table1, Register table2, Register table3,
3337         Register tmp, Register tmp2, Register tmp3) {
3338   Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
3339   unsigned long offset;
3340 
3341   if (UseCRC32) {
3342       kernel_crc32_using_crc32(crc, buf, len, table0, table1, table2, table3);
3343       return;
3344   }
3345 
3346     mvnw(crc, crc);
3347 
3348     adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset);
3349     if (offset) add(table0, table0, offset);
3350     add(table1, table0, 1*256*sizeof(juint));
3351     add(table2, table0, 2*256*sizeof(juint));
3352     add(table3, table0, 3*256*sizeof(juint));
3353 
3354   if (UseNeon) {
3355       cmp(len, (u1)64);
3356       br(Assembler::LT, L_by16);
3357       eor(v16, T16B, v16, v16);
3358 
3359     Label L_fold;

3621   BIND(L_exit);
3622 }
3623 
3624 /**
3625  * @param crc   register containing existing CRC (32-bit)
3626  * @param buf   register pointing to input byte buffer (byte*)
3627  * @param len   register containing number of bytes
3628  * @param table register that will contain address of CRC table
3629  * @param tmp   scratch register
3630  */
3631 void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
3632         Register table0, Register table1, Register table2, Register table3,
3633         Register tmp, Register tmp2, Register tmp3) {
3634   kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3);
3635 }
3636 
3637 
3638 SkipIfEqual::SkipIfEqual(
3639     MacroAssembler* masm, const bool* flag_addr, bool value) {
3640   _masm = masm;
3641   unsigned long offset;
3642   _masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset);
3643   _masm->ldrb(rscratch1, Address(rscratch1, offset));
3644   _masm->cbzw(rscratch1, _label);
3645 }
3646 
3647 SkipIfEqual::~SkipIfEqual() {
3648   _masm->bind(_label);
3649 }
3650 
3651 void MacroAssembler::addptr(const Address &dst, int32_t src) {
3652   Address adr;
3653   switch(dst.getMode()) {
3654   case Address::base_plus_offset:
3655     // This is the expected mode, although we allow all the other
3656     // forms below.
3657     adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord);
3658     break;
3659   default:
3660     lea(rscratch2, dst);
3661     adr = Address(rscratch2);
3662     break;
3663   }
3664   ldr(rscratch1, adr);
3665   add(rscratch1, rscratch1, src);
3666   str(rscratch1, adr);
3667 }
3668 
3669 void MacroAssembler::cmpptr(Register src1, Address src2) {
3670   unsigned long offset;
3671   adrp(rscratch1, src2, offset);
3672   ldr(rscratch1, Address(rscratch1, offset));
3673   cmp(src1, rscratch1);
3674 }
3675 
3676 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
3677   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3678   bs->obj_equals(this, obj1, obj2);
3679 }
3680 
3681 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
3682   load_method_holder(rresult, rmethod);
3683   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
3684 }
3685 
3686 void MacroAssembler::load_method_holder(Register holder, Register method) {
3687   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
3688   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
3689   ldr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
3690 }

4312 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
4313   ldr(dest, Address(rthread, Thread::polling_page_offset()));
4314 }
4315 
4316 // Move the address of the polling page into r, then read the polling
4317 // page.
4318 address MacroAssembler::fetch_and_read_polling_page(Register r, relocInfo::relocType rtype) {
4319   get_polling_page(r, rtype);
4320   return read_polling_page(r, rtype);
4321 }
4322 
4323 // Read the polling page.  The address of the polling page must
4324 // already be in r.
4325 address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
4326   InstructionMark im(this);
4327   code_section()->relocate(inst_mark(), rtype);
4328   ldrw(zr, Address(r, 0));
4329   return inst_mark();
4330 }
4331 
4332 void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
4333   relocInfo::relocType rtype = dest.rspec().reloc()->type();
4334   unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12;
4335   unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12;
4336   unsigned long dest_page = (unsigned long)dest.target() >> 12;
4337   long offset_low = dest_page - low_page;
4338   long offset_high = dest_page - high_page;
4339 
4340   assert(is_valid_AArch64_address(dest.target()), "bad address");
4341   assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
4342 
4343   InstructionMark im(this);
4344   code_section()->relocate(inst_mark(), dest.rspec());
4345   // 8143067: Ensure that the adrp can reach the dest from anywhere within
4346   // the code cache so that if it is relocated we know it will still reach
4347   if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
4348     _adrp(reg1, dest.target());
4349   } else {
4350     unsigned long target = (unsigned long)dest.target();
4351     unsigned long adrp_target
4352       = (target & 0xffffffffUL) | ((unsigned long)pc() & 0xffff00000000UL);
4353 
4354     _adrp(reg1, (address)adrp_target);
4355     movk(reg1, target >> 32, 32);
4356   }
4357   byte_offset = (unsigned long)dest.target() & 0xfff;
4358 }
4359 
4360 void MacroAssembler::load_byte_map_base(Register reg) {
4361   CardTable::CardValue* byte_map_base =
4362     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
4363 
4364   if (is_valid_AArch64_address((address)byte_map_base)) {
4365     // Strictly speaking the byte_map_base isn't an address at all,
4366     // and it might even be negative.
4367     unsigned long offset;
4368     adrp(reg, ExternalAddress((address)byte_map_base), offset);
4369     // We expect offset to be zero with most collectors.
4370     if (offset != 0) {
4371       add(reg, reg, offset);
4372     }
4373   } else {
4374     mov(reg, (uint64_t)byte_map_base);
4375   }
4376 }
4377 
4378 void MacroAssembler::build_frame(int framesize) {
4379   assert(framesize > 0, "framesize must be > 0");
4380   if (framesize < ((1 << 9) + 2 * wordSize)) {
4381     sub(sp, sp, framesize);
4382     stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4383     if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
4384   } else {
4385     stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
4386     if (PreserveFramePointer) mov(rfp, sp);
4387     if (framesize < ((1 << 12) + 2 * wordSize))

4392     }
4393   }
4394 }
4395 
4396 void MacroAssembler::remove_frame(int framesize) {
4397   assert(framesize > 0, "framesize must be > 0");
4398   if (framesize < ((1 << 9) + 2 * wordSize)) {
4399     ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4400     add(sp, sp, framesize);
4401   } else {
4402     if (framesize < ((1 << 12) + 2 * wordSize))
4403       add(sp, sp, framesize - 2 * wordSize);
4404     else {
4405       mov(rscratch1, framesize - 2 * wordSize);
4406       add(sp, sp, rscratch1);
4407     }
4408     ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
4409   }
4410 }
4411 
4412 
4413 // This method checks if provided byte array contains byte with highest bit set.
4414 void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
4415     // Simple and most common case of aligned small array which is not at the
4416     // end of memory page is placed here. All other cases are in stub.
4417     Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
4418     const uint64_t UPPER_BIT_MASK=0x8080808080808080;
4419     assert_different_registers(ary1, len, result);
4420 
4421     cmpw(len, 0);
4422     br(LE, SET_RESULT);
4423     cmpw(len, 4 * wordSize);
4424     br(GE, STUB_LONG); // size > 32 then go to stub
4425 
4426     int shift = 64 - exact_log2(os::vm_page_size());
4427     lsl(rscratch1, ary1, shift);
4428     mov(rscratch2, (size_t)(4 * wordSize) << shift);
4429     adds(rscratch2, rscratch1, rscratch2);  // At end of page?
4430     br(CS, STUB); // at the end of page then go to stub
4431     subs(len, len, wordSize);
4432     br(LT, END);

4790   for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
4791     Label l;
4792     tbz(cnt, exact_log2(i), l);
4793     for (int j = 0; j < i; j += 2) {
4794       stp(zr, zr, post(ptr, 16));
4795     }
4796     bind(l);
4797   }
4798   {
4799     Label l;
4800     tbz(cnt, 0, l);
4801     str(zr, Address(ptr));
4802     bind(l);
4803   }
4804   BLOCK_COMMENT("} zero_words");
4805 }
4806 
4807 // base:         Address of a buffer to be zeroed, 8 bytes aligned.
4808 // cnt:          Immediate count in HeapWords.
4809 #define SmallArraySize (18 * BytesPerLong)
4810 void MacroAssembler::zero_words(Register base, u_int64_t cnt)
4811 {
4812   BLOCK_COMMENT("zero_words {");
4813   int i = cnt & 1;  // store any odd word to start
4814   if (i) str(zr, Address(base));
4815 
4816   if (cnt <= SmallArraySize / BytesPerLong) {
4817     for (; i < (int)cnt; i += 2)
4818       stp(zr, zr, Address(base, i * wordSize));
4819   } else {
4820     const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
4821     int remainder = cnt % (2 * unroll);
4822     for (; i < remainder; i += 2)
4823       stp(zr, zr, Address(base, i * wordSize));
4824 
4825     Label loop;
4826     Register cnt_reg = rscratch1;
4827     Register loop_base = rscratch2;
4828     cnt = cnt - remainder;
4829     mov(cnt_reg, cnt);
4830     // adjust base and prebias by -2 * wordSize so we can pre-increment

  53 #endif
  54 #ifdef COMPILER2
  55 #include "oops/oop.hpp"
  56 #include "opto/compile.hpp"
  57 #include "opto/node.hpp"
  58 #include "opto/output.hpp"
  59 #endif
  60 
  61 #ifdef PRODUCT
  62 #define BLOCK_COMMENT(str) /* nothing */
  63 #else
  64 #define BLOCK_COMMENT(str) block_comment(str)
  65 #endif
  66 #define STOP(str) stop(str);
  67 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  68 
  69 // Patch any kind of instruction; there may be several instructions.
  70 // Return the total length (in bytes) of the instructions.
  71 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
  72   int instructions = 1;
  73   assert((uint64_t)target < ((uint64_t)1 << 48), "48-bit overflow in address constant");
  74   int64_t offset = (target - branch) >> 2;
  75   unsigned insn = *(unsigned*)branch;
  76   if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) {
  77     // Load register (literal)
  78     Instruction_aarch64::spatch(branch, 23, 5, offset);
  79   } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
  80     // Unconditional branch (immediate)
  81     Instruction_aarch64::spatch(branch, 25, 0, offset);
  82   } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
  83     // Conditional branch (immediate)
  84     Instruction_aarch64::spatch(branch, 23, 5, offset);
  85   } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
  86     // Compare & branch (immediate)
  87     Instruction_aarch64::spatch(branch, 23, 5, offset);
  88   } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
  89     // Test & branch (immediate)
  90     Instruction_aarch64::spatch(branch, 18, 5, offset);
  91   } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
  92     // PC-rel. addressing
  93     offset = target-branch;
  94     int shift = Instruction_aarch64::extract(insn, 31, 31);
  95     if (shift) {
  96       uint64_t dest = (uint64_t)target;
  97       uint64_t pc_page = (uint64_t)branch >> 12;
  98       uint64_t adr_page = (uint64_t)target >> 12;
  99       unsigned offset_lo = dest & 0xfff;
 100       offset = adr_page - pc_page;
 101 
 102       // We handle 4 types of PC relative addressing
 103       //   1 - adrp    Rx, target_page
 104       //       ldr/str Ry, [Rx, #offset_in_page]
 105       //   2 - adrp    Rx, target_page
 106       //       add     Ry, Rx, #offset_in_page
 107       //   3 - adrp    Rx, target_page (page aligned reloc, offset == 0)
 108       //       movk    Rx, #imm16<<32
 109       //   4 - adrp    Rx, target_page (page aligned reloc, offset == 0)
 110       // In the first 3 cases we must check that Rx is the same in the adrp and the
 111       // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end
 112       // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened
 113       // to be followed by a random unrelated ldr/str, add or movk instruction.
 114       //
 115       unsigned insn2 = ((unsigned*)branch)[1];
 116       if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
 117                 Instruction_aarch64::extract(insn, 4, 0) ==
 118                         Instruction_aarch64::extract(insn2, 9, 5)) {
 119         // Load/store register (unsigned immediate)
 120         unsigned size = Instruction_aarch64::extract(insn2, 31, 30);
 121         Instruction_aarch64::patch(branch + sizeof (unsigned),
 122                                     21, 10, offset_lo >> size);
 123         guarantee(((dest >> size) << size) == dest, "misaligned target");
 124         instructions = 2;
 125       } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
 126                 Instruction_aarch64::extract(insn, 4, 0) ==
 127                         Instruction_aarch64::extract(insn2, 4, 0)) {
 128         // add (immediate)
 129         Instruction_aarch64::patch(branch + sizeof (unsigned),
 130                                    21, 10, offset_lo);
 131         instructions = 2;
 132       } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
 133                    Instruction_aarch64::extract(insn, 4, 0) ==
 134                      Instruction_aarch64::extract(insn2, 4, 0)) {
 135         // movk #imm16<<32
 136         Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
 137         int64_t dest = ((int64_t)target & 0xffffffffL) | ((int64_t)branch & 0xffff00000000L);
 138         int64_t pc_page = (int64_t)branch >> 12;
 139         int64_t adr_page = (int64_t)dest >> 12;
 140         offset = adr_page - pc_page;
 141         instructions = 2;
 142       }
 143     }
 144     int offset_lo = offset & 3;
 145     offset >>= 2;
 146     Instruction_aarch64::spatch(branch, 23, 5, offset);
 147     Instruction_aarch64::patch(branch, 30, 29, offset_lo);
 148   } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
 149     uint64_t dest = (uint64_t)target;
 150     // Move wide constant
 151     assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
 152     assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
 153     Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
 154     Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
 155     Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
 156     assert(target_addr_for_insn(branch) == target, "should be");
 157     instructions = 3;
 158   } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
 159              Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
 160     // nothing to do
 161     assert(target == 0, "did not expect to relocate target for polling page load");
 162   } else {
 163     ShouldNotReachHere();
 164   }
 165   return instructions * NativeInstruction::instruction_size;
 166 }
 167 
 168 int MacroAssembler::patch_oop(address insn_addr, address o) {
 169   int instructions;

 188     Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
 189     instructions = 3;
 190   }
 191   return instructions * NativeInstruction::instruction_size;
 192 }
 193 
 194 int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
 195   // Metatdata pointers are either narrow (32 bits) or wide (48 bits).
 196   // We encode narrow ones by setting the upper 16 bits in the first
 197   // instruction.
 198   NativeInstruction *insn = nativeInstruction_at(insn_addr);
 199   assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
 200          nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
 201 
 202   Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
 203   Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
 204   return 2 * NativeInstruction::instruction_size;
 205 }
 206 
 207 address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
 208   int64_t offset = 0;
 209   if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
 210     // Load register (literal)
 211     offset = Instruction_aarch64::sextract(insn, 23, 5);
 212     return address(((uint64_t)insn_addr + (offset << 2)));
 213   } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
 214     // Unconditional branch (immediate)
 215     offset = Instruction_aarch64::sextract(insn, 25, 0);
 216   } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
 217     // Conditional branch (immediate)
 218     offset = Instruction_aarch64::sextract(insn, 23, 5);
 219   } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
 220     // Compare & branch (immediate)
 221     offset = Instruction_aarch64::sextract(insn, 23, 5);
 222    } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
 223     // Test & branch (immediate)
 224     offset = Instruction_aarch64::sextract(insn, 18, 5);
 225   } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
 226     // PC-rel. addressing
 227     offset = Instruction_aarch64::extract(insn, 30, 29);
 228     offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2;

 255         return address(target_page + (byte_offset << size));
 256       } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
 257                 Instruction_aarch64::extract(insn, 4, 0) ==
 258                         Instruction_aarch64::extract(insn2, 4, 0)) {
 259         // add (immediate)
 260         unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
 261         return address(target_page + byte_offset);
 262       } else {
 263         if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110  &&
 264                Instruction_aarch64::extract(insn, 4, 0) ==
 265                  Instruction_aarch64::extract(insn2, 4, 0)) {
 266           target_page = (target_page & 0xffffffff) |
 267                          ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32);
 268         }
 269         return (address)target_page;
 270       }
 271     } else {
 272       ShouldNotReachHere();
 273     }
 274   } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
 275     uint32_t *insns = (uint32_t *)insn_addr;
 276     // Move wide constant: movz, movk, movk.  See movptr().
 277     assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
 278     assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
 279     return address(uint64_t(Instruction_aarch64::extract(insns[0], 20, 5))
 280                    + (uint64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
 281                    + (uint64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
 282   } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
 283              Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
 284     return 0;
 285   } else {
 286     ShouldNotReachHere();
 287   }
 288   return address(((uint64_t)insn_addr + (offset << 2)));
 289 }
 290 
 291 void MacroAssembler::safepoint_poll(Label& slow_path) {
 292   ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
 293   tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
 294 }
 295 
 296 // Just like safepoint_poll, but use an acquiring load for thread-
 297 // local polling.
 298 //
 299 // We need an acquire here to ensure that any subsequent load of the
 300 // global SafepointSynchronize::_state flag is ordered after this load
 301 // of the local Thread::_polling page.  We don't want this poll to

 372 }
 373 
 374 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 375                                          Register last_java_fp,
 376                                          Label &L,
 377                                          Register scratch) {
 378   if (L.is_bound()) {
 379     set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch);
 380   } else {
 381     InstructionMark im(this);
 382     L.add_patch_at(code(), locator());
 383     set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, scratch);
 384   }
 385 }
 386 
 387 void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
 388   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
 389   assert(CodeCache::find_blob(entry.target()) != NULL,
 390          "destination of far call not found in code cache");
 391   if (far_branches()) {
 392     uint64_t offset;
 393     // We can use ADRP here because we know that the total size of
 394     // the code cache cannot exceed 2Gb.
 395     adrp(tmp, entry, offset);
 396     add(tmp, tmp, offset);
 397     if (cbuf) cbuf->set_insts_mark();
 398     blr(tmp);
 399   } else {
 400     if (cbuf) cbuf->set_insts_mark();
 401     bl(entry);
 402   }
 403 }
 404 
 405 void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
 406   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
 407   assert(CodeCache::find_blob(entry.target()) != NULL,
 408          "destination of far call not found in code cache");
 409   if (far_branches()) {
 410     uint64_t offset;
 411     // We can use ADRP here because we know that the total size of
 412     // the code cache cannot exceed 2Gb.
 413     adrp(tmp, entry, offset);
 414     add(tmp, tmp, offset);
 415     if (cbuf) cbuf->set_insts_mark();
 416     br(tmp);
 417   } else {
 418     if (cbuf) cbuf->set_insts_mark();
 419     b(entry);
 420   }
 421 }
 422 
 423 void MacroAssembler::reserved_stack_check() {
 424     // testing if reserved zone needs to be enabled
 425     Label no_reserved_zone_enabling;
 426 
 427     ldr(rscratch1, Address(rthread, JavaThread::reserved_stack_activation_offset()));
 428     cmp(sp, rscratch1);
 429     br(Assembler::LO, no_reserved_zone_enabling);
 430

 807   isb();
 808   mov_metadata(rmethod, (Metadata*)NULL);
 809 
 810   // Jump to the entry point of the i2c stub.
 811   movptr(rscratch1, 0);
 812   br(rscratch1);
 813 }
 814 
 815 void MacroAssembler::c2bool(Register x) {
 816   // implements x == 0 ? 0 : 1
 817   // note: must only look at least-significant byte of x
 818   //       since C-style booleans are stored in one byte
 819   //       only! (was bug)
 820   tst(x, 0xff);
 821   cset(x, Assembler::NE);
 822 }
 823 
 824 address MacroAssembler::ic_call(address entry, jint method_index) {
 825   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
 826   // address const_ptr = long_constant((jlong)Universe::non_oop_word());
 827   // uint64_t offset;
 828   // ldr_constant(rscratch2, const_ptr);
 829   movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
 830   return trampoline_call(Address(entry, rh));
 831 }
 832 
 833 // Implementation of call_VM versions
 834 
 835 void MacroAssembler::call_VM(Register oop_result,
 836                              address entry_point,
 837                              bool check_exceptions) {
 838   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 839 }
 840 
 841 void MacroAssembler::call_VM(Register oop_result,
 842                              address entry_point,
 843                              Register arg_1,
 844                              bool check_exceptions) {
 845   pass_arg1(this, arg_1);
 846   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 847 }

1474   MacroAssembler::call_VM_leaf_base(entry_point, 4);
1475 }
1476 
1477 void MacroAssembler::null_check(Register reg, int offset) {
1478   if (needs_explicit_null_check(offset)) {
1479     // provoke OS NULL exception if reg = NULL by
1480     // accessing M[reg] w/o changing any registers
1481     // NOTE: this is plenty to provoke a segv
1482     ldr(zr, Address(reg));
1483   } else {
1484     // nothing to do, (later) access of M[reg + offset]
1485     // will provoke OS NULL exception if reg = NULL
1486   }
1487 }
1488 
1489 // MacroAssembler protected routines needed to implement
1490 // public methods
1491 
1492 void MacroAssembler::mov(Register r, Address dest) {
1493   code_section()->relocate(pc(), dest.rspec());
1494   uint64_t imm64 = (uint64_t)dest.target();
1495   movptr(r, imm64);
1496 }
1497 
1498 // Move a constant pointer into r.  In AArch64 mode the virtual
1499 // address space is 48 bits in size, so we only need three
1500 // instructions to create a patchable instruction sequence that can
1501 // reach anywhere.
1502 void MacroAssembler::movptr(Register r, uintptr_t imm64) {
1503 #ifndef PRODUCT
1504   {
1505     char buffer[64];
1506     snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
1507     block_comment(buffer);
1508   }
1509 #endif
1510   assert(imm64 < (1ull << 48), "48-bit overflow in address constant");
1511   movz(r, imm64 & 0xffff);
1512   imm64 >>= 16;
1513   movk(r, imm64 & 0xffff, 16);
1514   imm64 >>= 16;
1515   movk(r, imm64 & 0xffff, 32);
1516 }
1517 
1518 // Macro to mov replicated immediate to vector register.
1519 //  Vd will get the following values for different arrangements in T
1520 //   imm32 == hex 000000gh  T8B:  Vd = ghghghghghghghgh
1521 //   imm32 == hex 000000gh  T16B: Vd = ghghghghghghghghghghghghghghghgh
1522 //   imm32 == hex 0000efgh  T4H:  Vd = efghefghefghefgh
1523 //   imm32 == hex 0000efgh  T8H:  Vd = efghefghefghefghefghefghefghefgh
1524 //   imm32 == hex abcdefgh  T2S:  Vd = abcdefghabcdefgh
1525 //   imm32 == hex abcdefgh  T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
1526 //   T1D/T2D: invalid
1527 void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32) {
1528   assert(T != T1D && T != T2D, "invalid arrangement");
1529   if (T == T8B || T == T16B) {
1530     assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
1531     movi(Vd, T, imm32 & 0xff, 0);
1532     return;
1533   }
1534   uint32_t nimm32 = ~imm32;
1535   if (T == T4H || T == T8H) {
1536     assert((imm32  & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
1537     imm32 &= 0xffff;
1538     nimm32 &= 0xffff;
1539   }
1540   uint32_t x = imm32;
1541   int movi_cnt = 0;
1542   int movn_cnt = 0;
1543   while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
1544   x = nimm32;
1545   while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
1546   if (movn_cnt < movi_cnt) imm32 = nimm32;
1547   unsigned lsl = 0;
1548   while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1549   if (movn_cnt < movi_cnt)
1550     mvni(Vd, T, imm32 & 0xff, lsl);
1551   else
1552     movi(Vd, T, imm32 & 0xff, lsl);
1553   imm32 >>= 8; lsl += 8;
1554   while (imm32) {
1555     while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1556     if (movn_cnt < movi_cnt)
1557       bici(Vd, T, imm32 & 0xff, lsl);
1558     else
1559       orri(Vd, T, imm32 & 0xff, lsl);
1560     lsl += 8; imm32 >>= 8;
1561   }
1562 }
1563 
1564 void MacroAssembler::mov_immediate64(Register dst, uint64_t imm64)
1565 {
1566 #ifndef PRODUCT
1567   {
1568     char buffer[64];
1569     snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
1570     block_comment(buffer);
1571   }
1572 #endif
1573   if (operand_valid_for_logical_immediate(false, imm64)) {
1574     orr(dst, zr, imm64);
1575   } else {
1576     // we can use a combination of MOVZ or MOVN with
1577     // MOVK to build up the constant
1578     uint64_t imm_h[4];
1579     int zero_count = 0;
1580     int neg_count = 0;
1581     int i;
1582     for (i = 0; i < 4; i++) {
1583       imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL);
1584       if (imm_h[i] == 0) {
1585         zero_count++;
1586       } else if (imm_h[i] == 0xffffL) {
1587         neg_count++;
1588       }
1589     }
1590     if (zero_count == 4) {
1591       // one MOVZ will do
1592       movz(dst, 0);
1593     } else if (neg_count == 4) {
1594       // one MOVN will do
1595       movn(dst, 0);
1596     } else if (zero_count == 3) {
1597       for (i = 0; i < 4; i++) {
1598         if (imm_h[i] != 0L) {
1599           movz(dst, (uint32_t)imm_h[i], (i << 4));
1600           break;
1601         }
1602       }
1603     } else if (neg_count == 3) {
1604       // one MOVN will do
1605       for (int i = 0; i < 4; i++) {
1606         if (imm_h[i] != 0xffffL) {
1607           movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
1608           break;
1609         }
1610       }
1611     } else if (zero_count == 2) {
1612       // one MOVZ and one MOVK will do
1613       for (i = 0; i < 3; i++) {
1614         if (imm_h[i] != 0L) {
1615           movz(dst, (uint32_t)imm_h[i], (i << 4));
1616           i++;
1617           break;
1618         }
1619       }
1620       for (;i < 4; i++) {
1621         if (imm_h[i] != 0L) {
1622           movk(dst, (uint32_t)imm_h[i], (i << 4));
1623         }
1624       }
1625     } else if (neg_count == 2) {
1626       // one MOVN and one MOVK will do
1627       for (i = 0; i < 4; i++) {
1628         if (imm_h[i] != 0xffffL) {
1629           movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
1630           i++;
1631           break;
1632         }
1633       }
1634       for (;i < 4; i++) {
1635         if (imm_h[i] != 0xffffL) {
1636           movk(dst, (uint32_t)imm_h[i], (i << 4));
1637         }
1638       }
1639     } else if (zero_count == 1) {
1640       // one MOVZ and two MOVKs will do
1641       for (i = 0; i < 4; i++) {
1642         if (imm_h[i] != 0L) {
1643           movz(dst, (uint32_t)imm_h[i], (i << 4));
1644           i++;
1645           break;
1646         }
1647       }
1648       for (;i < 4; i++) {
1649         if (imm_h[i] != 0x0L) {
1650           movk(dst, (uint32_t)imm_h[i], (i << 4));
1651         }
1652       }
1653     } else if (neg_count == 1) {
1654       // one MOVN and two MOVKs will do
1655       for (i = 0; i < 4; i++) {
1656         if (imm_h[i] != 0xffffL) {
1657           movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
1658           i++;
1659           break;
1660         }
1661       }
1662       for (;i < 4; i++) {
1663         if (imm_h[i] != 0xffffL) {
1664           movk(dst, (uint32_t)imm_h[i], (i << 4));
1665         }
1666       }
1667     } else {
1668       // use a MOVZ and 3 MOVKs (makes it easier to debug)
1669       movz(dst, (uint32_t)imm_h[0], 0);
1670       for (i = 1; i < 4; i++) {
1671         movk(dst, (uint32_t)imm_h[i], (i << 4));
1672       }
1673     }
1674   }
1675 }
1676 
1677 void MacroAssembler::mov_immediate32(Register dst, uint32_t imm32)
1678 {
1679 #ifndef PRODUCT
1680     {
1681       char buffer[64];
1682       snprintf(buffer, sizeof(buffer), "0x%" PRIX32, imm32);
1683       block_comment(buffer);
1684     }
1685 #endif
1686   if (operand_valid_for_logical_immediate(true, imm32)) {
1687     orrw(dst, zr, imm32);
1688   } else {
1689     // we can use MOVZ, MOVN or two calls to MOVK to build up the
1690     // constant
1691     uint32_t imm_h[2];
1692     imm_h[0] = imm32 & 0xffff;
1693     imm_h[1] = ((imm32 >> 16) & 0xffff);
1694     if (imm_h[0] == 0) {
1695       movzw(dst, imm_h[1], 16);
1696     } else if (imm_h[0] == 0xffff) {
1697       movnw(dst, imm_h[1] ^ 0xffff, 16);
1698     } else if (imm_h[1] == 0) {
1699       movzw(dst, imm_h[0], 0);
1700     } else if (imm_h[1] == 0xffff) {
1701       movnw(dst, imm_h[0] ^ 0xffff, 0);
1702     } else {
1703       // use a MOVZ and MOVK (makes it easier to debug)
1704       movzw(dst, imm_h[0], 0);
1705       movkw(dst, imm_h[1], 16);
1706     }
1707   }
1708 }
1709 
1710 // Form an address from base + offset in Rd.  Rd may or may
1711 // not actually be used: you must use the Address that is returned.
1712 // It is up to you to ensure that the shift provided matches the size
1713 // of your data.
1714 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset, int shift) {
1715   if (Address::offset_ok_for_immed(byte_offset, shift))
1716     // It fits; no need for any heroics
1717     return Address(base, byte_offset);
1718 
1719   // Don't do anything clever with negative or misaligned offsets
1720   unsigned mask = (1 << shift) - 1;
1721   if (byte_offset < 0 || byte_offset & mask) {
1722     mov(Rd, byte_offset);
1723     add(Rd, base, Rd);
1724     return Address(Rd);
1725   }
1726 
1727   // See if we can do this with two 12-bit offsets
1728   {
1729     uint64_t word_offset = byte_offset >> shift;
1730     uint64_t masked_offset = word_offset & 0xfff000;
1731     if (Address::offset_ok_for_immed(word_offset - masked_offset, 0)
1732         && Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) {
1733       add(Rd, base, masked_offset << shift);
1734       word_offset -= masked_offset;
1735       return Address(Rd, word_offset << shift);
1736     }
1737   }
1738 
1739   // Do it the hard way
1740   mov(Rd, byte_offset);
1741   add(Rd, base, Rd);
1742   return Address(Rd);
1743 }
1744 
1745 void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
1746   if (UseLSE) {
1747     mov(tmp, 1);
1748     ldadd(Assembler::word, tmp, zr, counter_addr);
1749     return;
1750   }

1951 
1952 void MacroAssembler::decrementw(Register reg, int value)
1953 {
1954   if (value < 0)  { incrementw(reg, -value);      return; }
1955   if (value == 0) {                               return; }
1956   if (value < (1 << 12)) { subw(reg, reg, value); return; }
1957   /* else */ {
1958     guarantee(reg != rscratch2, "invalid dst for register decrement");
1959     movw(rscratch2, (unsigned)value);
1960     subw(reg, reg, rscratch2);
1961   }
1962 }
1963 
1964 void MacroAssembler::decrement(Register reg, int value)
1965 {
1966   if (value < 0)  { increment(reg, -value);      return; }
1967   if (value == 0) {                              return; }
1968   if (value < (1 << 12)) { sub(reg, reg, value); return; }
1969   /* else */ {
1970     assert(reg != rscratch2, "invalid dst for register decrement");
1971     mov(rscratch2, (uint64_t) value);
1972     sub(reg, reg, rscratch2);
1973   }
1974 }
1975 
1976 void MacroAssembler::decrementw(Address dst, int value)
1977 {
1978   assert(!dst.uses(rscratch1), "invalid dst for address decrement");
1979   if (dst.getMode() == Address::literal) {
1980     assert(abs(value) < (1 << 12), "invalid value and address mode combination");
1981     lea(rscratch2, dst);
1982     dst = Address(rscratch2);
1983   }
1984   ldrw(rscratch1, dst);
1985   decrementw(rscratch1, value);
1986   strw(rscratch1, dst);
1987 }
1988 
1989 void MacroAssembler::decrement(Address dst, int value)
1990 {
1991   assert(!dst.uses(rscratch1), "invalid address for decrement");

2583       tty->print_cr("r19 = 0x%016lx", regs[19]);
2584       tty->print_cr("r20 = 0x%016lx", regs[20]);
2585       tty->print_cr("r21 = 0x%016lx", regs[21]);
2586       tty->print_cr("r22 = 0x%016lx", regs[22]);
2587       tty->print_cr("r23 = 0x%016lx", regs[23]);
2588       tty->print_cr("r24 = 0x%016lx", regs[24]);
2589       tty->print_cr("r25 = 0x%016lx", regs[25]);
2590       tty->print_cr("r26 = 0x%016lx", regs[26]);
2591       tty->print_cr("r27 = 0x%016lx", regs[27]);
2592       tty->print_cr("r28 = 0x%016lx", regs[28]);
2593       tty->print_cr("r30 = 0x%016lx", regs[30]);
2594       tty->print_cr("r31 = 0x%016lx", regs[31]);
2595       BREAKPOINT;
2596     }
2597   }
2598   fatal("DEBUG MESSAGE: %s", msg);
2599 }
2600 
2601 void MacroAssembler::push_call_clobbered_registers() {
2602   int step = 4 * wordSize;
2603   push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) WIN64_ONLY(- r18), sp);
2604   sub(sp, sp, step);
2605   mov(rscratch1, -step);
2606   // Push v0-v7, v16-v31.
2607   for (int i = 31; i>= 4; i -= 4) {
2608     if (i <= v7->encoding() || i >= v16->encoding())
2609       st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1),
2610           as_FloatRegister(i), T1D, Address(post(sp, rscratch1)));
2611   }
2612   st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2),
2613       as_FloatRegister(3), T1D, Address(sp));
2614 }
2615 
2616 void MacroAssembler::pop_call_clobbered_registers() {
2617   for (int i = 0; i < 32; i += 4) {
2618     if (i <= v7->encoding() || i >= v16->encoding())
2619       ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2620           as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
2621   }
2622   pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) WIN64_ONLY(- r18), sp);

2623 }
2624 
2625 void MacroAssembler::push_CPU_state(bool save_vectors) {
2626   int step = (save_vectors ? 8 : 4) * wordSize;
2627   push(0x3fffffff, sp);         // integer registers except lr & sp
2628   mov(rscratch1, -step);
2629   sub(sp, sp, step);
2630   for (int i = 28; i >= 4; i -= 4) {
2631     st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2632         as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
2633   }
2634   st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
2635 }
2636 
2637 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
2638   int step = (restore_vectors ? 8 : 4) * wordSize;
2639   for (int i = 0; i <= 28; i += 4)
2640     ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2641         as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
2642   pop(0x3fffffff, sp);         // integer registers except lr & sp

2674   Register base = sp;
2675   if ((offset & (size-1)) && offset >= (1<<8)) {
2676     add(tmp, base, offset & ((1<<12)-1));
2677     base = tmp;
2678     offset &= -1u<<12;
2679   }
2680 
2681   if (offset >= (1<<12) * size) {
2682     add(tmp, base, offset & (((1<<12)-1)<<12));
2683     base = tmp;
2684     offset &= ~(((1<<12)-1)<<12);
2685   }
2686 
2687   return Address(base, offset);
2688 }
2689 
2690 // Checks whether offset is aligned.
2691 // Returns true if it is, else false.
2692 bool MacroAssembler::merge_alignment_check(Register base,
2693                                            size_t size,
2694                                            int64_t cur_offset,
2695                                            int64_t prev_offset) const {
2696   if (AvoidUnalignedAccesses) {
2697     if (base == sp) {
2698       // Checks whether low offset if aligned to pair of registers.
2699       int64_t pair_mask = size * 2 - 1;
2700       int64_t offset = prev_offset > cur_offset ? cur_offset : prev_offset;
2701       return (offset & pair_mask) == 0;
2702     } else { // If base is not sp, we can't guarantee the access is aligned.
2703       return false;
2704     }
2705   } else {
2706     int64_t mask = size - 1;
2707     // Load/store pair instruction only supports element size aligned offset.
2708     return (cur_offset & mask) == 0 && (prev_offset & mask) == 0;
2709   }
2710 }
2711 
2712 // Checks whether current and previous loads/stores can be merged.
2713 // Returns true if it can be merged, else false.
2714 bool MacroAssembler::ldst_can_merge(Register rt,
2715                                     const Address &adr,
2716                                     size_t cur_size_in_bytes,
2717                                     bool is_store) const {
2718   address prev = pc() - NativeInstruction::instruction_size;
2719   address last = code()->last_insn();
2720 
2721   if (last == NULL || !nativeInstruction_at(last)->is_Imm_LdSt()) {
2722     return false;
2723   }
2724 
2725   if (adr.getMode() != Address::base_plus_offset || prev != last) {
2726     return false;
2727   }
2728 
2729   NativeLdSt* prev_ldst = NativeLdSt_at(prev);
2730   size_t prev_size_in_bytes = prev_ldst->size_in_bytes();
2731 
2732   assert(prev_size_in_bytes == 4 || prev_size_in_bytes == 8, "only supports 64/32bit merging.");
2733   assert(cur_size_in_bytes == 4 || cur_size_in_bytes == 8, "only supports 64/32bit merging.");
2734 
2735   if (cur_size_in_bytes != prev_size_in_bytes || is_store != prev_ldst->is_store()) {
2736     return false;
2737   }
2738 
2739   int64_t max_offset = 63 * prev_size_in_bytes;
2740   int64_t min_offset = -64 * prev_size_in_bytes;
2741 
2742   assert(prev_ldst->is_not_pre_post_index(), "pre-index or post-index is not supported to be merged.");
2743 
2744   // Only same base can be merged.
2745   if (adr.base() != prev_ldst->base()) {
2746     return false;
2747   }
2748 
2749   int64_t cur_offset = adr.offset();
2750   int64_t prev_offset = prev_ldst->offset();
2751   size_t diff = abs(cur_offset - prev_offset);
2752   if (diff != prev_size_in_bytes) {
2753     return false;
2754   }
2755 
2756   // Following cases can not be merged:
2757   // ldr x2, [x2, #8]
2758   // ldr x3, [x2, #16]
2759   // or:
2760   // ldr x2, [x3, #8]
2761   // ldr x2, [x3, #16]
2762   // If t1 and t2 is the same in "ldp t1, t2, [xn, #imm]", we'll get SIGILL.
2763   if (!is_store && (adr.base() == prev_ldst->target() || rt == prev_ldst->target())) {
2764     return false;
2765   }
2766 
2767   int64_t low_offset = prev_offset > cur_offset ? cur_offset : prev_offset;
2768   // Offset range must be in ldp/stp instruction's range.
2769   if (low_offset > max_offset || low_offset < min_offset) {
2770     return false;
2771   }
2772 
2773   if (merge_alignment_check(adr.base(), prev_size_in_bytes, cur_offset, prev_offset)) {
2774     return true;
2775   }
2776 
2777   return false;
2778 }
2779 
2780 // Merge current load/store with previous load/store into ldp/stp.
2781 void MacroAssembler::merge_ldst(Register rt,
2782                                 const Address &adr,
2783                                 size_t cur_size_in_bytes,
2784                                 bool is_store) {
2785 
2786   assert(ldst_can_merge(rt, adr, cur_size_in_bytes, is_store) == true, "cur and prev must be able to be merged.");
2787 
2788   Register rt_low, rt_high;
2789   address prev = pc() - NativeInstruction::instruction_size;
2790   NativeLdSt* prev_ldst = NativeLdSt_at(prev);
2791 
2792   int64_t offset;
2793 
2794   if (adr.offset() < prev_ldst->offset()) {
2795     offset = adr.offset();
2796     rt_low = rt;
2797     rt_high = prev_ldst->target();
2798   } else {
2799     offset = prev_ldst->offset();
2800     rt_low = prev_ldst->target();
2801     rt_high = rt;
2802   }
2803 
2804   Address adr_p = Address(prev_ldst->base(), offset);
2805   // Overwrite previous generated binary.
2806   code_section()->set_end(prev);
2807 
2808   const int sz = prev_ldst->size_in_bytes();
2809   assert(sz == 8 || sz == 4, "only supports 64/32bit merging.");
2810   if (!is_store) {
2811     BLOCK_COMMENT("merged ldr pair");
2812     if (sz == 8) {

3019  *
3020  */
3021 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3022                                      Register z, Register zlen,
3023                                      Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3024                                      Register tmp5, Register tmp6, Register product_hi) {
3025 
3026   assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3027 
3028   const Register idx = tmp1;
3029   const Register kdx = tmp2;
3030   const Register xstart = tmp3;
3031 
3032   const Register y_idx = tmp4;
3033   const Register carry = tmp5;
3034   const Register product  = xlen;
3035   const Register x_xstart = zlen;  // reuse register
3036 
3037   // First Loop.
3038   //
3039   //  final static int64_t LONG_MASK = 0xffffffffL;
3040   //  int xstart = xlen - 1;
3041   //  int ystart = ylen - 1;
3042   //  int64_t carry = 0;
3043   //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
3044   //    int64_t product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
3045   //    z[kdx] = (int)product;
3046   //    carry = product >>> 32;
3047   //  }
3048   //  z[xstart] = (int)carry;
3049   //
3050 
3051   movw(idx, ylen);      // idx = ylen;
3052   movw(kdx, zlen);      // kdx = xlen+ylen;
3053   mov(carry, zr);       // carry = 0;
3054 
3055   Label L_done;
3056 
3057   movw(xstart, xlen);
3058   subsw(xstart, xstart, 1);
3059   br(Assembler::MI, L_done);
3060 
3061   multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3062 
3063   Label L_second_loop;
3064   cbzw(kdx, L_second_loop);
3065 
3066   Label L_carry;
3067   subw(kdx, kdx, 1);
3068   cbzw(kdx, L_carry);
3069 
3070   strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
3071   lsr(carry, carry, 32);
3072   subw(kdx, kdx, 1);
3073 
3074   bind(L_carry);
3075   strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
3076 
3077   // Second and third (nested) loops.
3078   //
3079   // for (int i = xstart-1; i >= 0; i--) { // Second loop
3080   //   carry = 0;
3081   //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3082   //     int64_t product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3083   //                    (z[k] & LONG_MASK) + carry;
3084   //     z[k] = (int)product;
3085   //     carry = product >>> 32;
3086   //   }
3087   //   z[i] = (int)carry;
3088   // }
3089   //
3090   // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3091 
3092   const Register jdx = tmp1;
3093 
3094   bind(L_second_loop);
3095   mov(carry, zr);                // carry = 0;
3096   movw(jdx, ylen);               // j = ystart+1
3097 
3098   subsw(xstart, xstart, 1);      // i = xstart-1;
3099   br(Assembler::MI, L_done);
3100 
3101   str(z, Address(pre(sp, -4 * wordSize)));
3102

3318 
3319     sub(len, len, 64);
3320     add(buf, buf, 8);
3321     cmn(len, 128);
3322     br(Assembler::NE, CRC_less64);
3323   BIND(L_exit);
3324     mvnw(crc, crc);
3325 }
3326 
3327 /**
3328  * @param crc   register containing existing CRC (32-bit)
3329  * @param buf   register pointing to input byte buffer (byte*)
3330  * @param len   register containing number of bytes
3331  * @param table register that will contain address of CRC table
3332  * @param tmp   scratch register
3333  */
3334 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
3335         Register table0, Register table1, Register table2, Register table3,
3336         Register tmp, Register tmp2, Register tmp3) {
3337   Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
3338   uint64_t offset;
3339 
3340   if (UseCRC32) {
3341       kernel_crc32_using_crc32(crc, buf, len, table0, table1, table2, table3);
3342       return;
3343   }
3344 
3345     mvnw(crc, crc);
3346 
3347     adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset);
3348     if (offset) add(table0, table0, offset);
3349     add(table1, table0, 1*256*sizeof(juint));
3350     add(table2, table0, 2*256*sizeof(juint));
3351     add(table3, table0, 3*256*sizeof(juint));
3352 
3353   if (UseNeon) {
3354       cmp(len, (u1)64);
3355       br(Assembler::LT, L_by16);
3356       eor(v16, T16B, v16, v16);
3357 
3358     Label L_fold;

3620   BIND(L_exit);
3621 }
3622 
3623 /**
3624  * @param crc   register containing existing CRC (32-bit)
3625  * @param buf   register pointing to input byte buffer (byte*)
3626  * @param len   register containing number of bytes
3627  * @param table register that will contain address of CRC table
3628  * @param tmp   scratch register
3629  */
3630 void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
3631         Register table0, Register table1, Register table2, Register table3,
3632         Register tmp, Register tmp2, Register tmp3) {
3633   kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3);
3634 }
3635 
3636 
3637 SkipIfEqual::SkipIfEqual(
3638     MacroAssembler* masm, const bool* flag_addr, bool value) {
3639   _masm = masm;
3640   uint64_t offset;
3641   _masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset);
3642   _masm->ldrb(rscratch1, Address(rscratch1, offset));
3643   _masm->cbzw(rscratch1, _label);
3644 }
3645 
3646 SkipIfEqual::~SkipIfEqual() {
3647   _masm->bind(_label);
3648 }
3649 
3650 void MacroAssembler::addptr(const Address &dst, int32_t src) {
3651   Address adr;
3652   switch(dst.getMode()) {
3653   case Address::base_plus_offset:
3654     // This is the expected mode, although we allow all the other
3655     // forms below.
3656     adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord);
3657     break;
3658   default:
3659     lea(rscratch2, dst);
3660     adr = Address(rscratch2);
3661     break;
3662   }
3663   ldr(rscratch1, adr);
3664   add(rscratch1, rscratch1, src);
3665   str(rscratch1, adr);
3666 }
3667 
3668 void MacroAssembler::cmpptr(Register src1, Address src2) {
3669   uint64_t offset;
3670   adrp(rscratch1, src2, offset);
3671   ldr(rscratch1, Address(rscratch1, offset));
3672   cmp(src1, rscratch1);
3673 }
3674 
3675 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
3676   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3677   bs->obj_equals(this, obj1, obj2);
3678 }
3679 
3680 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
3681   load_method_holder(rresult, rmethod);
3682   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
3683 }
3684 
3685 void MacroAssembler::load_method_holder(Register holder, Register method) {
3686   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
3687   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
3688   ldr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
3689 }

4311 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
4312   ldr(dest, Address(rthread, Thread::polling_page_offset()));
4313 }
4314 
4315 // Move the address of the polling page into r, then read the polling
4316 // page.
4317 address MacroAssembler::fetch_and_read_polling_page(Register r, relocInfo::relocType rtype) {
4318   get_polling_page(r, rtype);
4319   return read_polling_page(r, rtype);
4320 }
4321 
4322 // Read the polling page.  The address of the polling page must
4323 // already be in r.
4324 address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
4325   InstructionMark im(this);
4326   code_section()->relocate(inst_mark(), rtype);
4327   ldrw(zr, Address(r, 0));
4328   return inst_mark();
4329 }
4330 
4331 void MacroAssembler::adrp(Register reg1, const Address &dest, uint64_t &byte_offset) {
4332   relocInfo::relocType rtype = dest.rspec().reloc()->type();
4333   uint64_t low_page = (uint64_t)CodeCache::low_bound() >> 12;
4334   uint64_t high_page = (uint64_t)(CodeCache::high_bound() - 1) >> 12;
4335   uint64_t dest_page = (uint64_t)dest.target() >> 12;
4336   int64_t offset_low = dest_page - low_page;
4337   int64_t offset_high = dest_page - high_page;
4338 
4339   assert(is_valid_AArch64_address(dest.target()), "bad address");
4340   assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
4341 
4342   InstructionMark im(this);
4343   code_section()->relocate(inst_mark(), dest.rspec());
4344   // 8143067: Ensure that the adrp can reach the dest from anywhere within
4345   // the code cache so that if it is relocated we know it will still reach
4346   if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
4347     _adrp(reg1, dest.target());
4348   } else {
4349     uint64_t target = (uint64_t)dest.target();
4350     uint64_t adrp_target
4351       = (target & 0xffffffffUL) | ((uint64_t)pc() & 0xffff00000000UL);
4352 
4353     _adrp(reg1, (address)adrp_target);
4354     movk(reg1, target >> 32, 32);
4355   }
4356   byte_offset = (uint64_t)dest.target() & 0xfff;
4357 }
4358 
4359 void MacroAssembler::load_byte_map_base(Register reg) {
4360   CardTable::CardValue* byte_map_base =
4361     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
4362 
4363   if (is_valid_AArch64_address((address)byte_map_base)) {
4364     // Strictly speaking the byte_map_base isn't an address at all,
4365     // and it might even be negative.
4366     uint64_t offset;
4367     adrp(reg, ExternalAddress((address)byte_map_base), offset);
4368     // We expect offset to be zero with most collectors.
4369     if (offset != 0) {
4370       add(reg, reg, offset);
4371     }
4372   } else {
4373     mov(reg, (uint64_t)byte_map_base);
4374   }
4375 }
4376 
4377 void MacroAssembler::build_frame(int framesize) {
4378   assert(framesize > 0, "framesize must be > 0");
4379   if (framesize < ((1 << 9) + 2 * wordSize)) {
4380     sub(sp, sp, framesize);
4381     stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4382     if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
4383   } else {
4384     stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
4385     if (PreserveFramePointer) mov(rfp, sp);
4386     if (framesize < ((1 << 12) + 2 * wordSize))

4391     }
4392   }
4393 }
4394 
4395 void MacroAssembler::remove_frame(int framesize) {
4396   assert(framesize > 0, "framesize must be > 0");
4397   if (framesize < ((1 << 9) + 2 * wordSize)) {
4398     ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4399     add(sp, sp, framesize);
4400   } else {
4401     if (framesize < ((1 << 12) + 2 * wordSize))
4402       add(sp, sp, framesize - 2 * wordSize);
4403     else {
4404       mov(rscratch1, framesize - 2 * wordSize);
4405       add(sp, sp, rscratch1);
4406     }
4407     ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
4408   }
4409 }
4410 

4411 // This method checks if provided byte array contains byte with highest bit set.
4412 void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
4413     // Simple and most common case of aligned small array which is not at the
4414     // end of memory page is placed here. All other cases are in stub.
4415     Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
4416     const uint64_t UPPER_BIT_MASK=0x8080808080808080;
4417     assert_different_registers(ary1, len, result);
4418 
4419     cmpw(len, 0);
4420     br(LE, SET_RESULT);
4421     cmpw(len, 4 * wordSize);
4422     br(GE, STUB_LONG); // size > 32 then go to stub
4423 
4424     int shift = 64 - exact_log2(os::vm_page_size());
4425     lsl(rscratch1, ary1, shift);
4426     mov(rscratch2, (size_t)(4 * wordSize) << shift);
4427     adds(rscratch2, rscratch1, rscratch2);  // At end of page?
4428     br(CS, STUB); // at the end of page then go to stub
4429     subs(len, len, wordSize);
4430     br(LT, END);

4788   for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
4789     Label l;
4790     tbz(cnt, exact_log2(i), l);
4791     for (int j = 0; j < i; j += 2) {
4792       stp(zr, zr, post(ptr, 16));
4793     }
4794     bind(l);
4795   }
4796   {
4797     Label l;
4798     tbz(cnt, 0, l);
4799     str(zr, Address(ptr));
4800     bind(l);
4801   }
4802   BLOCK_COMMENT("} zero_words");
4803 }
4804 
4805 // base:         Address of a buffer to be zeroed, 8 bytes aligned.
4806 // cnt:          Immediate count in HeapWords.
4807 #define SmallArraySize (18 * BytesPerLong)
4808 void MacroAssembler::zero_words(Register base, uint64_t cnt)
4809 {
4810   BLOCK_COMMENT("zero_words {");
4811   int i = cnt & 1;  // store any odd word to start
4812   if (i) str(zr, Address(base));
4813 
4814   if (cnt <= SmallArraySize / BytesPerLong) {
4815     for (; i < (int)cnt; i += 2)
4816       stp(zr, zr, Address(base, i * wordSize));
4817   } else {
4818     const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
4819     int remainder = cnt % (2 * unroll);
4820     for (; i < remainder; i += 2)
4821       stp(zr, zr, Address(base, i * wordSize));
4822 
4823     Label loop;
4824     Register cnt_reg = rscratch1;
4825     Register loop_base = rscratch2;
4826     cnt = cnt - remainder;
4827     mov(cnt_reg, cnt);
4828     // adjust base and prebias by -2 * wordSize so we can pre-increment

< prev index next >