53 #endif
54 #ifdef COMPILER2
55 #include "oops/oop.hpp"
56 #include "opto/compile.hpp"
57 #include "opto/node.hpp"
58 #include "opto/output.hpp"
59 #endif
60
61 #ifdef PRODUCT
62 #define BLOCK_COMMENT(str) /* nothing */
63 #else
64 #define BLOCK_COMMENT(str) block_comment(str)
65 #endif
66 #define STOP(str) stop(str);
67 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
68
69 // Patch any kind of instruction; there may be several instructions.
70 // Return the total length (in bytes) of the instructions.
71 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
72 int instructions = 1;
73 assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant");
74 long offset = (target - branch) >> 2;
75 unsigned insn = *(unsigned*)branch;
76 if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) {
77 // Load register (literal)
78 Instruction_aarch64::spatch(branch, 23, 5, offset);
79 } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
80 // Unconditional branch (immediate)
81 Instruction_aarch64::spatch(branch, 25, 0, offset);
82 } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
83 // Conditional branch (immediate)
84 Instruction_aarch64::spatch(branch, 23, 5, offset);
85 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
86 // Compare & branch (immediate)
87 Instruction_aarch64::spatch(branch, 23, 5, offset);
88 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
89 // Test & branch (immediate)
90 Instruction_aarch64::spatch(branch, 18, 5, offset);
91 } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
92 // PC-rel. addressing
93 offset = target-branch;
94 int shift = Instruction_aarch64::extract(insn, 31, 31);
95 if (shift) {
96 u_int64_t dest = (u_int64_t)target;
97 uint64_t pc_page = (uint64_t)branch >> 12;
98 uint64_t adr_page = (uint64_t)target >> 12;
99 unsigned offset_lo = dest & 0xfff;
100 offset = adr_page - pc_page;
101
102 // We handle 4 types of PC relative addressing
103 // 1 - adrp Rx, target_page
104 // ldr/str Ry, [Rx, #offset_in_page]
105 // 2 - adrp Rx, target_page
106 // add Ry, Rx, #offset_in_page
107 // 3 - adrp Rx, target_page (page aligned reloc, offset == 0)
108 // movk Rx, #imm16<<32
109 // 4 - adrp Rx, target_page (page aligned reloc, offset == 0)
110 // In the first 3 cases we must check that Rx is the same in the adrp and the
111 // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end
112 // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened
113 // to be followed by a random unrelated ldr/str, add or movk instruction.
114 //
115 unsigned insn2 = ((unsigned*)branch)[1];
116 if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
117 Instruction_aarch64::extract(insn, 4, 0) ==
118 Instruction_aarch64::extract(insn2, 9, 5)) {
119 // Load/store register (unsigned immediate)
120 unsigned size = Instruction_aarch64::extract(insn2, 31, 30);
121 Instruction_aarch64::patch(branch + sizeof (unsigned),
122 21, 10, offset_lo >> size);
123 guarantee(((dest >> size) << size) == dest, "misaligned target");
124 instructions = 2;
125 } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
126 Instruction_aarch64::extract(insn, 4, 0) ==
127 Instruction_aarch64::extract(insn2, 4, 0)) {
128 // add (immediate)
129 Instruction_aarch64::patch(branch + sizeof (unsigned),
130 21, 10, offset_lo);
131 instructions = 2;
132 } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
133 Instruction_aarch64::extract(insn, 4, 0) ==
134 Instruction_aarch64::extract(insn2, 4, 0)) {
135 // movk #imm16<<32
136 Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
137 long dest = ((long)target & 0xffffffffL) | ((long)branch & 0xffff00000000L);
138 long pc_page = (long)branch >> 12;
139 long adr_page = (long)dest >> 12;
140 offset = adr_page - pc_page;
141 instructions = 2;
142 }
143 }
144 int offset_lo = offset & 3;
145 offset >>= 2;
146 Instruction_aarch64::spatch(branch, 23, 5, offset);
147 Instruction_aarch64::patch(branch, 30, 29, offset_lo);
148 } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
149 u_int64_t dest = (u_int64_t)target;
150 // Move wide constant
151 assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
152 assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
153 Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
154 Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
155 Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
156 assert(target_addr_for_insn(branch) == target, "should be");
157 instructions = 3;
158 } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
159 Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
160 // nothing to do
161 assert(target == 0, "did not expect to relocate target for polling page load");
162 } else {
163 ShouldNotReachHere();
164 }
165 return instructions * NativeInstruction::instruction_size;
166 }
167
168 int MacroAssembler::patch_oop(address insn_addr, address o) {
169 int instructions;
188 Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
189 instructions = 3;
190 }
191 return instructions * NativeInstruction::instruction_size;
192 }
193
194 int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
195 // Metatdata pointers are either narrow (32 bits) or wide (48 bits).
196 // We encode narrow ones by setting the upper 16 bits in the first
197 // instruction.
198 NativeInstruction *insn = nativeInstruction_at(insn_addr);
199 assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
200 nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
201
202 Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
203 Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
204 return 2 * NativeInstruction::instruction_size;
205 }
206
207 address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
208 long offset = 0;
209 if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
210 // Load register (literal)
211 offset = Instruction_aarch64::sextract(insn, 23, 5);
212 return address(((uint64_t)insn_addr + (offset << 2)));
213 } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
214 // Unconditional branch (immediate)
215 offset = Instruction_aarch64::sextract(insn, 25, 0);
216 } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
217 // Conditional branch (immediate)
218 offset = Instruction_aarch64::sextract(insn, 23, 5);
219 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
220 // Compare & branch (immediate)
221 offset = Instruction_aarch64::sextract(insn, 23, 5);
222 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
223 // Test & branch (immediate)
224 offset = Instruction_aarch64::sextract(insn, 18, 5);
225 } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
226 // PC-rel. addressing
227 offset = Instruction_aarch64::extract(insn, 30, 29);
228 offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2;
255 return address(target_page + (byte_offset << size));
256 } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
257 Instruction_aarch64::extract(insn, 4, 0) ==
258 Instruction_aarch64::extract(insn2, 4, 0)) {
259 // add (immediate)
260 unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
261 return address(target_page + byte_offset);
262 } else {
263 if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
264 Instruction_aarch64::extract(insn, 4, 0) ==
265 Instruction_aarch64::extract(insn2, 4, 0)) {
266 target_page = (target_page & 0xffffffff) |
267 ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32);
268 }
269 return (address)target_page;
270 }
271 } else {
272 ShouldNotReachHere();
273 }
274 } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
275 u_int32_t *insns = (u_int32_t *)insn_addr;
276 // Move wide constant: movz, movk, movk. See movptr().
277 assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
278 assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
279 return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5))
280 + (u_int64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
281 + (u_int64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
282 } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
283 Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
284 return 0;
285 } else {
286 ShouldNotReachHere();
287 }
288 return address(((uint64_t)insn_addr + (offset << 2)));
289 }
290
291 void MacroAssembler::safepoint_poll(Label& slow_path) {
292 ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
293 tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
294 }
295
296 // Just like safepoint_poll, but use an acquiring load for thread-
297 // local polling.
298 //
299 // We need an acquire here to ensure that any subsequent load of the
300 // global SafepointSynchronize::_state flag is ordered after this load
301 // of the local Thread::_polling page. We don't want this poll to
372 }
373
374 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
375 Register last_java_fp,
376 Label &L,
377 Register scratch) {
378 if (L.is_bound()) {
379 set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch);
380 } else {
381 InstructionMark im(this);
382 L.add_patch_at(code(), locator());
383 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, scratch);
384 }
385 }
386
387 void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
388 assert(ReservedCodeCacheSize < 4*G, "branch out of range");
389 assert(CodeCache::find_blob(entry.target()) != NULL,
390 "destination of far call not found in code cache");
391 if (far_branches()) {
392 unsigned long offset;
393 // We can use ADRP here because we know that the total size of
394 // the code cache cannot exceed 2Gb.
395 adrp(tmp, entry, offset);
396 add(tmp, tmp, offset);
397 if (cbuf) cbuf->set_insts_mark();
398 blr(tmp);
399 } else {
400 if (cbuf) cbuf->set_insts_mark();
401 bl(entry);
402 }
403 }
404
405 void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
406 assert(ReservedCodeCacheSize < 4*G, "branch out of range");
407 assert(CodeCache::find_blob(entry.target()) != NULL,
408 "destination of far call not found in code cache");
409 if (far_branches()) {
410 unsigned long offset;
411 // We can use ADRP here because we know that the total size of
412 // the code cache cannot exceed 2Gb.
413 adrp(tmp, entry, offset);
414 add(tmp, tmp, offset);
415 if (cbuf) cbuf->set_insts_mark();
416 br(tmp);
417 } else {
418 if (cbuf) cbuf->set_insts_mark();
419 b(entry);
420 }
421 }
422
423 void MacroAssembler::reserved_stack_check() {
424 // testing if reserved zone needs to be enabled
425 Label no_reserved_zone_enabling;
426
427 ldr(rscratch1, Address(rthread, JavaThread::reserved_stack_activation_offset()));
428 cmp(sp, rscratch1);
429 br(Assembler::LO, no_reserved_zone_enabling);
430
807 isb();
808 mov_metadata(rmethod, (Metadata*)NULL);
809
810 // Jump to the entry point of the i2c stub.
811 movptr(rscratch1, 0);
812 br(rscratch1);
813 }
814
815 void MacroAssembler::c2bool(Register x) {
816 // implements x == 0 ? 0 : 1
817 // note: must only look at least-significant byte of x
818 // since C-style booleans are stored in one byte
819 // only! (was bug)
820 tst(x, 0xff);
821 cset(x, Assembler::NE);
822 }
823
824 address MacroAssembler::ic_call(address entry, jint method_index) {
825 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
826 // address const_ptr = long_constant((jlong)Universe::non_oop_word());
827 // unsigned long offset;
828 // ldr_constant(rscratch2, const_ptr);
829 movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
830 return trampoline_call(Address(entry, rh));
831 }
832
833 // Implementation of call_VM versions
834
835 void MacroAssembler::call_VM(Register oop_result,
836 address entry_point,
837 bool check_exceptions) {
838 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
839 }
840
841 void MacroAssembler::call_VM(Register oop_result,
842 address entry_point,
843 Register arg_1,
844 bool check_exceptions) {
845 pass_arg1(this, arg_1);
846 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
847 }
1474 MacroAssembler::call_VM_leaf_base(entry_point, 4);
1475 }
1476
1477 void MacroAssembler::null_check(Register reg, int offset) {
1478 if (needs_explicit_null_check(offset)) {
1479 // provoke OS NULL exception if reg = NULL by
1480 // accessing M[reg] w/o changing any registers
1481 // NOTE: this is plenty to provoke a segv
1482 ldr(zr, Address(reg));
1483 } else {
1484 // nothing to do, (later) access of M[reg + offset]
1485 // will provoke OS NULL exception if reg = NULL
1486 }
1487 }
1488
1489 // MacroAssembler protected routines needed to implement
1490 // public methods
1491
1492 void MacroAssembler::mov(Register r, Address dest) {
1493 code_section()->relocate(pc(), dest.rspec());
1494 u_int64_t imm64 = (u_int64_t)dest.target();
1495 movptr(r, imm64);
1496 }
1497
1498 // Move a constant pointer into r. In AArch64 mode the virtual
1499 // address space is 48 bits in size, so we only need three
1500 // instructions to create a patchable instruction sequence that can
1501 // reach anywhere.
1502 void MacroAssembler::movptr(Register r, uintptr_t imm64) {
1503 #ifndef PRODUCT
1504 {
1505 char buffer[64];
1506 snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
1507 block_comment(buffer);
1508 }
1509 #endif
1510 assert(imm64 < (1ul << 48), "48-bit overflow in address constant");
1511 movz(r, imm64 & 0xffff);
1512 imm64 >>= 16;
1513 movk(r, imm64 & 0xffff, 16);
1514 imm64 >>= 16;
1515 movk(r, imm64 & 0xffff, 32);
1516 }
1517
1518 // Macro to mov replicated immediate to vector register.
1519 // Vd will get the following values for different arrangements in T
1520 // imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh
1521 // imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
1522 // imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh
1523 // imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
1524 // imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
1525 // imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
1526 // T1D/T2D: invalid
1527 void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
1528 assert(T != T1D && T != T2D, "invalid arrangement");
1529 if (T == T8B || T == T16B) {
1530 assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
1531 movi(Vd, T, imm32 & 0xff, 0);
1532 return;
1533 }
1534 u_int32_t nimm32 = ~imm32;
1535 if (T == T4H || T == T8H) {
1536 assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
1537 imm32 &= 0xffff;
1538 nimm32 &= 0xffff;
1539 }
1540 u_int32_t x = imm32;
1541 int movi_cnt = 0;
1542 int movn_cnt = 0;
1543 while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
1544 x = nimm32;
1545 while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
1546 if (movn_cnt < movi_cnt) imm32 = nimm32;
1547 unsigned lsl = 0;
1548 while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1549 if (movn_cnt < movi_cnt)
1550 mvni(Vd, T, imm32 & 0xff, lsl);
1551 else
1552 movi(Vd, T, imm32 & 0xff, lsl);
1553 imm32 >>= 8; lsl += 8;
1554 while (imm32) {
1555 while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1556 if (movn_cnt < movi_cnt)
1557 bici(Vd, T, imm32 & 0xff, lsl);
1558 else
1559 orri(Vd, T, imm32 & 0xff, lsl);
1560 lsl += 8; imm32 >>= 8;
1561 }
1562 }
1563
1564 void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64)
1565 {
1566 #ifndef PRODUCT
1567 {
1568 char buffer[64];
1569 snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
1570 block_comment(buffer);
1571 }
1572 #endif
1573 if (operand_valid_for_logical_immediate(false, imm64)) {
1574 orr(dst, zr, imm64);
1575 } else {
1576 // we can use a combination of MOVZ or MOVN with
1577 // MOVK to build up the constant
1578 u_int64_t imm_h[4];
1579 int zero_count = 0;
1580 int neg_count = 0;
1581 int i;
1582 for (i = 0; i < 4; i++) {
1583 imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL);
1584 if (imm_h[i] == 0) {
1585 zero_count++;
1586 } else if (imm_h[i] == 0xffffL) {
1587 neg_count++;
1588 }
1589 }
1590 if (zero_count == 4) {
1591 // one MOVZ will do
1592 movz(dst, 0);
1593 } else if (neg_count == 4) {
1594 // one MOVN will do
1595 movn(dst, 0);
1596 } else if (zero_count == 3) {
1597 for (i = 0; i < 4; i++) {
1598 if (imm_h[i] != 0L) {
1599 movz(dst, (u_int32_t)imm_h[i], (i << 4));
1600 break;
1601 }
1602 }
1603 } else if (neg_count == 3) {
1604 // one MOVN will do
1605 for (int i = 0; i < 4; i++) {
1606 if (imm_h[i] != 0xffffL) {
1607 movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
1608 break;
1609 }
1610 }
1611 } else if (zero_count == 2) {
1612 // one MOVZ and one MOVK will do
1613 for (i = 0; i < 3; i++) {
1614 if (imm_h[i] != 0L) {
1615 movz(dst, (u_int32_t)imm_h[i], (i << 4));
1616 i++;
1617 break;
1618 }
1619 }
1620 for (;i < 4; i++) {
1621 if (imm_h[i] != 0L) {
1622 movk(dst, (u_int32_t)imm_h[i], (i << 4));
1623 }
1624 }
1625 } else if (neg_count == 2) {
1626 // one MOVN and one MOVK will do
1627 for (i = 0; i < 4; i++) {
1628 if (imm_h[i] != 0xffffL) {
1629 movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
1630 i++;
1631 break;
1632 }
1633 }
1634 for (;i < 4; i++) {
1635 if (imm_h[i] != 0xffffL) {
1636 movk(dst, (u_int32_t)imm_h[i], (i << 4));
1637 }
1638 }
1639 } else if (zero_count == 1) {
1640 // one MOVZ and two MOVKs will do
1641 for (i = 0; i < 4; i++) {
1642 if (imm_h[i] != 0L) {
1643 movz(dst, (u_int32_t)imm_h[i], (i << 4));
1644 i++;
1645 break;
1646 }
1647 }
1648 for (;i < 4; i++) {
1649 if (imm_h[i] != 0x0L) {
1650 movk(dst, (u_int32_t)imm_h[i], (i << 4));
1651 }
1652 }
1653 } else if (neg_count == 1) {
1654 // one MOVN and two MOVKs will do
1655 for (i = 0; i < 4; i++) {
1656 if (imm_h[i] != 0xffffL) {
1657 movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
1658 i++;
1659 break;
1660 }
1661 }
1662 for (;i < 4; i++) {
1663 if (imm_h[i] != 0xffffL) {
1664 movk(dst, (u_int32_t)imm_h[i], (i << 4));
1665 }
1666 }
1667 } else {
1668 // use a MOVZ and 3 MOVKs (makes it easier to debug)
1669 movz(dst, (u_int32_t)imm_h[0], 0);
1670 for (i = 1; i < 4; i++) {
1671 movk(dst, (u_int32_t)imm_h[i], (i << 4));
1672 }
1673 }
1674 }
1675 }
1676
1677 void MacroAssembler::mov_immediate32(Register dst, u_int32_t imm32)
1678 {
1679 #ifndef PRODUCT
1680 {
1681 char buffer[64];
1682 snprintf(buffer, sizeof(buffer), "0x%" PRIX32, imm32);
1683 block_comment(buffer);
1684 }
1685 #endif
1686 if (operand_valid_for_logical_immediate(true, imm32)) {
1687 orrw(dst, zr, imm32);
1688 } else {
1689 // we can use MOVZ, MOVN or two calls to MOVK to build up the
1690 // constant
1691 u_int32_t imm_h[2];
1692 imm_h[0] = imm32 & 0xffff;
1693 imm_h[1] = ((imm32 >> 16) & 0xffff);
1694 if (imm_h[0] == 0) {
1695 movzw(dst, imm_h[1], 16);
1696 } else if (imm_h[0] == 0xffff) {
1697 movnw(dst, imm_h[1] ^ 0xffff, 16);
1698 } else if (imm_h[1] == 0) {
1699 movzw(dst, imm_h[0], 0);
1700 } else if (imm_h[1] == 0xffff) {
1701 movnw(dst, imm_h[0] ^ 0xffff, 0);
1702 } else {
1703 // use a MOVZ and MOVK (makes it easier to debug)
1704 movzw(dst, imm_h[0], 0);
1705 movkw(dst, imm_h[1], 16);
1706 }
1707 }
1708 }
1709
1710 // Form an address from base + offset in Rd. Rd may or may
1711 // not actually be used: you must use the Address that is returned.
1712 // It is up to you to ensure that the shift provided matches the size
1713 // of your data.
1714 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) {
1715 if (Address::offset_ok_for_immed(byte_offset, shift))
1716 // It fits; no need for any heroics
1717 return Address(base, byte_offset);
1718
1719 // Don't do anything clever with negative or misaligned offsets
1720 unsigned mask = (1 << shift) - 1;
1721 if (byte_offset < 0 || byte_offset & mask) {
1722 mov(Rd, byte_offset);
1723 add(Rd, base, Rd);
1724 return Address(Rd);
1725 }
1726
1727 // See if we can do this with two 12-bit offsets
1728 {
1729 unsigned long word_offset = byte_offset >> shift;
1730 unsigned long masked_offset = word_offset & 0xfff000;
1731 if (Address::offset_ok_for_immed(word_offset - masked_offset, 0)
1732 && Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) {
1733 add(Rd, base, masked_offset << shift);
1734 word_offset -= masked_offset;
1735 return Address(Rd, word_offset << shift);
1736 }
1737 }
1738
1739 // Do it the hard way
1740 mov(Rd, byte_offset);
1741 add(Rd, base, Rd);
1742 return Address(Rd);
1743 }
1744
1745 void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
1746 if (UseLSE) {
1747 mov(tmp, 1);
1748 ldadd(Assembler::word, tmp, zr, counter_addr);
1749 return;
1750 }
1951
1952 void MacroAssembler::decrementw(Register reg, int value)
1953 {
1954 if (value < 0) { incrementw(reg, -value); return; }
1955 if (value == 0) { return; }
1956 if (value < (1 << 12)) { subw(reg, reg, value); return; }
1957 /* else */ {
1958 guarantee(reg != rscratch2, "invalid dst for register decrement");
1959 movw(rscratch2, (unsigned)value);
1960 subw(reg, reg, rscratch2);
1961 }
1962 }
1963
1964 void MacroAssembler::decrement(Register reg, int value)
1965 {
1966 if (value < 0) { increment(reg, -value); return; }
1967 if (value == 0) { return; }
1968 if (value < (1 << 12)) { sub(reg, reg, value); return; }
1969 /* else */ {
1970 assert(reg != rscratch2, "invalid dst for register decrement");
1971 mov(rscratch2, (unsigned long)value);
1972 sub(reg, reg, rscratch2);
1973 }
1974 }
1975
1976 void MacroAssembler::decrementw(Address dst, int value)
1977 {
1978 assert(!dst.uses(rscratch1), "invalid dst for address decrement");
1979 if (dst.getMode() == Address::literal) {
1980 assert(abs(value) < (1 << 12), "invalid value and address mode combination");
1981 lea(rscratch2, dst);
1982 dst = Address(rscratch2);
1983 }
1984 ldrw(rscratch1, dst);
1985 decrementw(rscratch1, value);
1986 strw(rscratch1, dst);
1987 }
1988
1989 void MacroAssembler::decrement(Address dst, int value)
1990 {
1991 assert(!dst.uses(rscratch1), "invalid address for decrement");
2583 tty->print_cr("r19 = 0x%016lx", regs[19]);
2584 tty->print_cr("r20 = 0x%016lx", regs[20]);
2585 tty->print_cr("r21 = 0x%016lx", regs[21]);
2586 tty->print_cr("r22 = 0x%016lx", regs[22]);
2587 tty->print_cr("r23 = 0x%016lx", regs[23]);
2588 tty->print_cr("r24 = 0x%016lx", regs[24]);
2589 tty->print_cr("r25 = 0x%016lx", regs[25]);
2590 tty->print_cr("r26 = 0x%016lx", regs[26]);
2591 tty->print_cr("r27 = 0x%016lx", regs[27]);
2592 tty->print_cr("r28 = 0x%016lx", regs[28]);
2593 tty->print_cr("r30 = 0x%016lx", regs[30]);
2594 tty->print_cr("r31 = 0x%016lx", regs[31]);
2595 BREAKPOINT;
2596 }
2597 }
2598 fatal("DEBUG MESSAGE: %s", msg);
2599 }
2600
2601 void MacroAssembler::push_call_clobbered_registers() {
2602 int step = 4 * wordSize;
2603 push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2604 sub(sp, sp, step);
2605 mov(rscratch1, -step);
2606 // Push v0-v7, v16-v31.
2607 for (int i = 31; i>= 4; i -= 4) {
2608 if (i <= v7->encoding() || i >= v16->encoding())
2609 st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1),
2610 as_FloatRegister(i), T1D, Address(post(sp, rscratch1)));
2611 }
2612 st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2),
2613 as_FloatRegister(3), T1D, Address(sp));
2614 }
2615
2616 void MacroAssembler::pop_call_clobbered_registers() {
2617 for (int i = 0; i < 32; i += 4) {
2618 if (i <= v7->encoding() || i >= v16->encoding())
2619 ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2620 as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
2621 }
2622
2623 pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2624 }
2625
2626 void MacroAssembler::push_CPU_state(bool save_vectors) {
2627 int step = (save_vectors ? 8 : 4) * wordSize;
2628 push(0x3fffffff, sp); // integer registers except lr & sp
2629 mov(rscratch1, -step);
2630 sub(sp, sp, step);
2631 for (int i = 28; i >= 4; i -= 4) {
2632 st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2633 as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
2634 }
2635 st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
2636 }
2637
2638 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
2639 int step = (restore_vectors ? 8 : 4) * wordSize;
2640 for (int i = 0; i <= 28; i += 4)
2641 ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2642 as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
2643 pop(0x3fffffff, sp); // integer registers except lr & sp
2675 Register base = sp;
2676 if ((offset & (size-1)) && offset >= (1<<8)) {
2677 add(tmp, base, offset & ((1<<12)-1));
2678 base = tmp;
2679 offset &= -1u<<12;
2680 }
2681
2682 if (offset >= (1<<12) * size) {
2683 add(tmp, base, offset & (((1<<12)-1)<<12));
2684 base = tmp;
2685 offset &= ~(((1<<12)-1)<<12);
2686 }
2687
2688 return Address(base, offset);
2689 }
2690
2691 // Checks whether offset is aligned.
2692 // Returns true if it is, else false.
2693 bool MacroAssembler::merge_alignment_check(Register base,
2694 size_t size,
2695 long cur_offset,
2696 long prev_offset) const {
2697 if (AvoidUnalignedAccesses) {
2698 if (base == sp) {
2699 // Checks whether low offset if aligned to pair of registers.
2700 long pair_mask = size * 2 - 1;
2701 long offset = prev_offset > cur_offset ? cur_offset : prev_offset;
2702 return (offset & pair_mask) == 0;
2703 } else { // If base is not sp, we can't guarantee the access is aligned.
2704 return false;
2705 }
2706 } else {
2707 long mask = size - 1;
2708 // Load/store pair instruction only supports element size aligned offset.
2709 return (cur_offset & mask) == 0 && (prev_offset & mask) == 0;
2710 }
2711 }
2712
2713 // Checks whether current and previous loads/stores can be merged.
2714 // Returns true if it can be merged, else false.
2715 bool MacroAssembler::ldst_can_merge(Register rt,
2716 const Address &adr,
2717 size_t cur_size_in_bytes,
2718 bool is_store) const {
2719 address prev = pc() - NativeInstruction::instruction_size;
2720 address last = code()->last_insn();
2721
2722 if (last == NULL || !nativeInstruction_at(last)->is_Imm_LdSt()) {
2723 return false;
2724 }
2725
2726 if (adr.getMode() != Address::base_plus_offset || prev != last) {
2727 return false;
2728 }
2729
2730 NativeLdSt* prev_ldst = NativeLdSt_at(prev);
2731 size_t prev_size_in_bytes = prev_ldst->size_in_bytes();
2732
2733 assert(prev_size_in_bytes == 4 || prev_size_in_bytes == 8, "only supports 64/32bit merging.");
2734 assert(cur_size_in_bytes == 4 || cur_size_in_bytes == 8, "only supports 64/32bit merging.");
2735
2736 if (cur_size_in_bytes != prev_size_in_bytes || is_store != prev_ldst->is_store()) {
2737 return false;
2738 }
2739
2740 long max_offset = 63 * prev_size_in_bytes;
2741 long min_offset = -64 * prev_size_in_bytes;
2742
2743 assert(prev_ldst->is_not_pre_post_index(), "pre-index or post-index is not supported to be merged.");
2744
2745 // Only same base can be merged.
2746 if (adr.base() != prev_ldst->base()) {
2747 return false;
2748 }
2749
2750 long cur_offset = adr.offset();
2751 long prev_offset = prev_ldst->offset();
2752 size_t diff = abs(cur_offset - prev_offset);
2753 if (diff != prev_size_in_bytes) {
2754 return false;
2755 }
2756
2757 // Following cases can not be merged:
2758 // ldr x2, [x2, #8]
2759 // ldr x3, [x2, #16]
2760 // or:
2761 // ldr x2, [x3, #8]
2762 // ldr x2, [x3, #16]
2763 // If t1 and t2 is the same in "ldp t1, t2, [xn, #imm]", we'll get SIGILL.
2764 if (!is_store && (adr.base() == prev_ldst->target() || rt == prev_ldst->target())) {
2765 return false;
2766 }
2767
2768 long low_offset = prev_offset > cur_offset ? cur_offset : prev_offset;
2769 // Offset range must be in ldp/stp instruction's range.
2770 if (low_offset > max_offset || low_offset < min_offset) {
2771 return false;
2772 }
2773
2774 if (merge_alignment_check(adr.base(), prev_size_in_bytes, cur_offset, prev_offset)) {
2775 return true;
2776 }
2777
2778 return false;
2779 }
2780
2781 // Merge current load/store with previous load/store into ldp/stp.
2782 void MacroAssembler::merge_ldst(Register rt,
2783 const Address &adr,
2784 size_t cur_size_in_bytes,
2785 bool is_store) {
2786
2787 assert(ldst_can_merge(rt, adr, cur_size_in_bytes, is_store) == true, "cur and prev must be able to be merged.");
2788
2789 Register rt_low, rt_high;
2790 address prev = pc() - NativeInstruction::instruction_size;
2791 NativeLdSt* prev_ldst = NativeLdSt_at(prev);
2792
2793 long offset;
2794
2795 if (adr.offset() < prev_ldst->offset()) {
2796 offset = adr.offset();
2797 rt_low = rt;
2798 rt_high = prev_ldst->target();
2799 } else {
2800 offset = prev_ldst->offset();
2801 rt_low = prev_ldst->target();
2802 rt_high = rt;
2803 }
2804
2805 Address adr_p = Address(prev_ldst->base(), offset);
2806 // Overwrite previous generated binary.
2807 code_section()->set_end(prev);
2808
2809 const int sz = prev_ldst->size_in_bytes();
2810 assert(sz == 8 || sz == 4, "only supports 64/32bit merging.");
2811 if (!is_store) {
2812 BLOCK_COMMENT("merged ldr pair");
2813 if (sz == 8) {
3020 *
3021 */
3022 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3023 Register z, Register zlen,
3024 Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3025 Register tmp5, Register tmp6, Register product_hi) {
3026
3027 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3028
3029 const Register idx = tmp1;
3030 const Register kdx = tmp2;
3031 const Register xstart = tmp3;
3032
3033 const Register y_idx = tmp4;
3034 const Register carry = tmp5;
3035 const Register product = xlen;
3036 const Register x_xstart = zlen; // reuse register
3037
3038 // First Loop.
3039 //
3040 // final static long LONG_MASK = 0xffffffffL;
3041 // int xstart = xlen - 1;
3042 // int ystart = ylen - 1;
3043 // long carry = 0;
3044 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
3045 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
3046 // z[kdx] = (int)product;
3047 // carry = product >>> 32;
3048 // }
3049 // z[xstart] = (int)carry;
3050 //
3051
3052 movw(idx, ylen); // idx = ylen;
3053 movw(kdx, zlen); // kdx = xlen+ylen;
3054 mov(carry, zr); // carry = 0;
3055
3056 Label L_done;
3057
3058 movw(xstart, xlen);
3059 subsw(xstart, xstart, 1);
3060 br(Assembler::MI, L_done);
3061
3062 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3063
3064 Label L_second_loop;
3065 cbzw(kdx, L_second_loop);
3066
3067 Label L_carry;
3068 subw(kdx, kdx, 1);
3069 cbzw(kdx, L_carry);
3070
3071 strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
3072 lsr(carry, carry, 32);
3073 subw(kdx, kdx, 1);
3074
3075 bind(L_carry);
3076 strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
3077
3078 // Second and third (nested) loops.
3079 //
3080 // for (int i = xstart-1; i >= 0; i--) { // Second loop
3081 // carry = 0;
3082 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3083 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3084 // (z[k] & LONG_MASK) + carry;
3085 // z[k] = (int)product;
3086 // carry = product >>> 32;
3087 // }
3088 // z[i] = (int)carry;
3089 // }
3090 //
3091 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3092
3093 const Register jdx = tmp1;
3094
3095 bind(L_second_loop);
3096 mov(carry, zr); // carry = 0;
3097 movw(jdx, ylen); // j = ystart+1
3098
3099 subsw(xstart, xstart, 1); // i = xstart-1;
3100 br(Assembler::MI, L_done);
3101
3102 str(z, Address(pre(sp, -4 * wordSize)));
3103
3319
3320 sub(len, len, 64);
3321 add(buf, buf, 8);
3322 cmn(len, 128);
3323 br(Assembler::NE, CRC_less64);
3324 BIND(L_exit);
3325 mvnw(crc, crc);
3326 }
3327
3328 /**
3329 * @param crc register containing existing CRC (32-bit)
3330 * @param buf register pointing to input byte buffer (byte*)
3331 * @param len register containing number of bytes
3332 * @param table register that will contain address of CRC table
3333 * @param tmp scratch register
3334 */
3335 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
3336 Register table0, Register table1, Register table2, Register table3,
3337 Register tmp, Register tmp2, Register tmp3) {
3338 Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
3339 unsigned long offset;
3340
3341 if (UseCRC32) {
3342 kernel_crc32_using_crc32(crc, buf, len, table0, table1, table2, table3);
3343 return;
3344 }
3345
3346 mvnw(crc, crc);
3347
3348 adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset);
3349 if (offset) add(table0, table0, offset);
3350 add(table1, table0, 1*256*sizeof(juint));
3351 add(table2, table0, 2*256*sizeof(juint));
3352 add(table3, table0, 3*256*sizeof(juint));
3353
3354 if (UseNeon) {
3355 cmp(len, (u1)64);
3356 br(Assembler::LT, L_by16);
3357 eor(v16, T16B, v16, v16);
3358
3359 Label L_fold;
3621 BIND(L_exit);
3622 }
3623
3624 /**
3625 * @param crc register containing existing CRC (32-bit)
3626 * @param buf register pointing to input byte buffer (byte*)
3627 * @param len register containing number of bytes
3628 * @param table register that will contain address of CRC table
3629 * @param tmp scratch register
3630 */
3631 void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
3632 Register table0, Register table1, Register table2, Register table3,
3633 Register tmp, Register tmp2, Register tmp3) {
3634 kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3);
3635 }
3636
3637
3638 SkipIfEqual::SkipIfEqual(
3639 MacroAssembler* masm, const bool* flag_addr, bool value) {
3640 _masm = masm;
3641 unsigned long offset;
3642 _masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset);
3643 _masm->ldrb(rscratch1, Address(rscratch1, offset));
3644 _masm->cbzw(rscratch1, _label);
3645 }
3646
3647 SkipIfEqual::~SkipIfEqual() {
3648 _masm->bind(_label);
3649 }
3650
3651 void MacroAssembler::addptr(const Address &dst, int32_t src) {
3652 Address adr;
3653 switch(dst.getMode()) {
3654 case Address::base_plus_offset:
3655 // This is the expected mode, although we allow all the other
3656 // forms below.
3657 adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord);
3658 break;
3659 default:
3660 lea(rscratch2, dst);
3661 adr = Address(rscratch2);
3662 break;
3663 }
3664 ldr(rscratch1, adr);
3665 add(rscratch1, rscratch1, src);
3666 str(rscratch1, adr);
3667 }
3668
3669 void MacroAssembler::cmpptr(Register src1, Address src2) {
3670 unsigned long offset;
3671 adrp(rscratch1, src2, offset);
3672 ldr(rscratch1, Address(rscratch1, offset));
3673 cmp(src1, rscratch1);
3674 }
3675
3676 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
3677 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3678 bs->obj_equals(this, obj1, obj2);
3679 }
3680
3681 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
3682 load_method_holder(rresult, rmethod);
3683 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
3684 }
3685
3686 void MacroAssembler::load_method_holder(Register holder, Register method) {
3687 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
3688 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
3689 ldr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
3690 }
4312 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
4313 ldr(dest, Address(rthread, Thread::polling_page_offset()));
4314 }
4315
4316 // Move the address of the polling page into r, then read the polling
4317 // page.
4318 address MacroAssembler::fetch_and_read_polling_page(Register r, relocInfo::relocType rtype) {
4319 get_polling_page(r, rtype);
4320 return read_polling_page(r, rtype);
4321 }
4322
4323 // Read the polling page. The address of the polling page must
4324 // already be in r.
4325 address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
4326 InstructionMark im(this);
4327 code_section()->relocate(inst_mark(), rtype);
4328 ldrw(zr, Address(r, 0));
4329 return inst_mark();
4330 }
4331
4332 void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
4333 relocInfo::relocType rtype = dest.rspec().reloc()->type();
4334 unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12;
4335 unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12;
4336 unsigned long dest_page = (unsigned long)dest.target() >> 12;
4337 long offset_low = dest_page - low_page;
4338 long offset_high = dest_page - high_page;
4339
4340 assert(is_valid_AArch64_address(dest.target()), "bad address");
4341 assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
4342
4343 InstructionMark im(this);
4344 code_section()->relocate(inst_mark(), dest.rspec());
4345 // 8143067: Ensure that the adrp can reach the dest from anywhere within
4346 // the code cache so that if it is relocated we know it will still reach
4347 if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
4348 _adrp(reg1, dest.target());
4349 } else {
4350 unsigned long target = (unsigned long)dest.target();
4351 unsigned long adrp_target
4352 = (target & 0xffffffffUL) | ((unsigned long)pc() & 0xffff00000000UL);
4353
4354 _adrp(reg1, (address)adrp_target);
4355 movk(reg1, target >> 32, 32);
4356 }
4357 byte_offset = (unsigned long)dest.target() & 0xfff;
4358 }
4359
4360 void MacroAssembler::load_byte_map_base(Register reg) {
4361 CardTable::CardValue* byte_map_base =
4362 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
4363
4364 if (is_valid_AArch64_address((address)byte_map_base)) {
4365 // Strictly speaking the byte_map_base isn't an address at all,
4366 // and it might even be negative.
4367 unsigned long offset;
4368 adrp(reg, ExternalAddress((address)byte_map_base), offset);
4369 // We expect offset to be zero with most collectors.
4370 if (offset != 0) {
4371 add(reg, reg, offset);
4372 }
4373 } else {
4374 mov(reg, (uint64_t)byte_map_base);
4375 }
4376 }
4377
4378 void MacroAssembler::build_frame(int framesize) {
4379 assert(framesize > 0, "framesize must be > 0");
4380 if (framesize < ((1 << 9) + 2 * wordSize)) {
4381 sub(sp, sp, framesize);
4382 stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4383 if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
4384 } else {
4385 stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
4386 if (PreserveFramePointer) mov(rfp, sp);
4387 if (framesize < ((1 << 12) + 2 * wordSize))
4392 }
4393 }
4394 }
4395
4396 void MacroAssembler::remove_frame(int framesize) {
4397 assert(framesize > 0, "framesize must be > 0");
4398 if (framesize < ((1 << 9) + 2 * wordSize)) {
4399 ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4400 add(sp, sp, framesize);
4401 } else {
4402 if (framesize < ((1 << 12) + 2 * wordSize))
4403 add(sp, sp, framesize - 2 * wordSize);
4404 else {
4405 mov(rscratch1, framesize - 2 * wordSize);
4406 add(sp, sp, rscratch1);
4407 }
4408 ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
4409 }
4410 }
4411
4412
4413 // This method checks if provided byte array contains byte with highest bit set.
4414 void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
4415 // Simple and most common case of aligned small array which is not at the
4416 // end of memory page is placed here. All other cases are in stub.
4417 Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
4418 const uint64_t UPPER_BIT_MASK=0x8080808080808080;
4419 assert_different_registers(ary1, len, result);
4420
4421 cmpw(len, 0);
4422 br(LE, SET_RESULT);
4423 cmpw(len, 4 * wordSize);
4424 br(GE, STUB_LONG); // size > 32 then go to stub
4425
4426 int shift = 64 - exact_log2(os::vm_page_size());
4427 lsl(rscratch1, ary1, shift);
4428 mov(rscratch2, (size_t)(4 * wordSize) << shift);
4429 adds(rscratch2, rscratch1, rscratch2); // At end of page?
4430 br(CS, STUB); // at the end of page then go to stub
4431 subs(len, len, wordSize);
4432 br(LT, END);
4790 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
4791 Label l;
4792 tbz(cnt, exact_log2(i), l);
4793 for (int j = 0; j < i; j += 2) {
4794 stp(zr, zr, post(ptr, 16));
4795 }
4796 bind(l);
4797 }
4798 {
4799 Label l;
4800 tbz(cnt, 0, l);
4801 str(zr, Address(ptr));
4802 bind(l);
4803 }
4804 BLOCK_COMMENT("} zero_words");
4805 }
4806
4807 // base: Address of a buffer to be zeroed, 8 bytes aligned.
4808 // cnt: Immediate count in HeapWords.
4809 #define SmallArraySize (18 * BytesPerLong)
4810 void MacroAssembler::zero_words(Register base, u_int64_t cnt)
4811 {
4812 BLOCK_COMMENT("zero_words {");
4813 int i = cnt & 1; // store any odd word to start
4814 if (i) str(zr, Address(base));
4815
4816 if (cnt <= SmallArraySize / BytesPerLong) {
4817 for (; i < (int)cnt; i += 2)
4818 stp(zr, zr, Address(base, i * wordSize));
4819 } else {
4820 const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
4821 int remainder = cnt % (2 * unroll);
4822 for (; i < remainder; i += 2)
4823 stp(zr, zr, Address(base, i * wordSize));
4824
4825 Label loop;
4826 Register cnt_reg = rscratch1;
4827 Register loop_base = rscratch2;
4828 cnt = cnt - remainder;
4829 mov(cnt_reg, cnt);
4830 // adjust base and prebias by -2 * wordSize so we can pre-increment
|
53 #endif
54 #ifdef COMPILER2
55 #include "oops/oop.hpp"
56 #include "opto/compile.hpp"
57 #include "opto/node.hpp"
58 #include "opto/output.hpp"
59 #endif
60
61 #ifdef PRODUCT
62 #define BLOCK_COMMENT(str) /* nothing */
63 #else
64 #define BLOCK_COMMENT(str) block_comment(str)
65 #endif
66 #define STOP(str) stop(str);
67 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
68
69 // Patch any kind of instruction; there may be several instructions.
70 // Return the total length (in bytes) of the instructions.
71 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
72 int instructions = 1;
73 assert((uint64_t)target < ((uint64_t)1 << 48), "48-bit overflow in address constant");
74 int64_t offset = (target - branch) >> 2;
75 unsigned insn = *(unsigned*)branch;
76 if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) {
77 // Load register (literal)
78 Instruction_aarch64::spatch(branch, 23, 5, offset);
79 } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
80 // Unconditional branch (immediate)
81 Instruction_aarch64::spatch(branch, 25, 0, offset);
82 } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
83 // Conditional branch (immediate)
84 Instruction_aarch64::spatch(branch, 23, 5, offset);
85 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
86 // Compare & branch (immediate)
87 Instruction_aarch64::spatch(branch, 23, 5, offset);
88 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
89 // Test & branch (immediate)
90 Instruction_aarch64::spatch(branch, 18, 5, offset);
91 } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
92 // PC-rel. addressing
93 offset = target-branch;
94 int shift = Instruction_aarch64::extract(insn, 31, 31);
95 if (shift) {
96 uint64_t dest = (uint64_t)target;
97 uint64_t pc_page = (uint64_t)branch >> 12;
98 uint64_t adr_page = (uint64_t)target >> 12;
99 unsigned offset_lo = dest & 0xfff;
100 offset = adr_page - pc_page;
101
102 // We handle 4 types of PC relative addressing
103 // 1 - adrp Rx, target_page
104 // ldr/str Ry, [Rx, #offset_in_page]
105 // 2 - adrp Rx, target_page
106 // add Ry, Rx, #offset_in_page
107 // 3 - adrp Rx, target_page (page aligned reloc, offset == 0)
108 // movk Rx, #imm16<<32
109 // 4 - adrp Rx, target_page (page aligned reloc, offset == 0)
110 // In the first 3 cases we must check that Rx is the same in the adrp and the
111 // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end
112 // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened
113 // to be followed by a random unrelated ldr/str, add or movk instruction.
114 //
115 unsigned insn2 = ((unsigned*)branch)[1];
116 if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
117 Instruction_aarch64::extract(insn, 4, 0) ==
118 Instruction_aarch64::extract(insn2, 9, 5)) {
119 // Load/store register (unsigned immediate)
120 unsigned size = Instruction_aarch64::extract(insn2, 31, 30);
121 Instruction_aarch64::patch(branch + sizeof (unsigned),
122 21, 10, offset_lo >> size);
123 guarantee(((dest >> size) << size) == dest, "misaligned target");
124 instructions = 2;
125 } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
126 Instruction_aarch64::extract(insn, 4, 0) ==
127 Instruction_aarch64::extract(insn2, 4, 0)) {
128 // add (immediate)
129 Instruction_aarch64::patch(branch + sizeof (unsigned),
130 21, 10, offset_lo);
131 instructions = 2;
132 } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
133 Instruction_aarch64::extract(insn, 4, 0) ==
134 Instruction_aarch64::extract(insn2, 4, 0)) {
135 // movk #imm16<<32
136 Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
137 int64_t dest = ((int64_t)target & 0xffffffffL) | ((int64_t)branch & 0xffff00000000L);
138 int64_t pc_page = (int64_t)branch >> 12;
139 int64_t adr_page = (int64_t)dest >> 12;
140 offset = adr_page - pc_page;
141 instructions = 2;
142 }
143 }
144 int offset_lo = offset & 3;
145 offset >>= 2;
146 Instruction_aarch64::spatch(branch, 23, 5, offset);
147 Instruction_aarch64::patch(branch, 30, 29, offset_lo);
148 } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
149 uint64_t dest = (uint64_t)target;
150 // Move wide constant
151 assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
152 assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
153 Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
154 Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
155 Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
156 assert(target_addr_for_insn(branch) == target, "should be");
157 instructions = 3;
158 } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
159 Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
160 // nothing to do
161 assert(target == 0, "did not expect to relocate target for polling page load");
162 } else {
163 ShouldNotReachHere();
164 }
165 return instructions * NativeInstruction::instruction_size;
166 }
167
168 int MacroAssembler::patch_oop(address insn_addr, address o) {
169 int instructions;
188 Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
189 instructions = 3;
190 }
191 return instructions * NativeInstruction::instruction_size;
192 }
193
194 int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
195 // Metatdata pointers are either narrow (32 bits) or wide (48 bits).
196 // We encode narrow ones by setting the upper 16 bits in the first
197 // instruction.
198 NativeInstruction *insn = nativeInstruction_at(insn_addr);
199 assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
200 nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
201
202 Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
203 Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
204 return 2 * NativeInstruction::instruction_size;
205 }
206
207 address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
208 int64_t offset = 0;
209 if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
210 // Load register (literal)
211 offset = Instruction_aarch64::sextract(insn, 23, 5);
212 return address(((uint64_t)insn_addr + (offset << 2)));
213 } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
214 // Unconditional branch (immediate)
215 offset = Instruction_aarch64::sextract(insn, 25, 0);
216 } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) {
217 // Conditional branch (immediate)
218 offset = Instruction_aarch64::sextract(insn, 23, 5);
219 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) {
220 // Compare & branch (immediate)
221 offset = Instruction_aarch64::sextract(insn, 23, 5);
222 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) {
223 // Test & branch (immediate)
224 offset = Instruction_aarch64::sextract(insn, 18, 5);
225 } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
226 // PC-rel. addressing
227 offset = Instruction_aarch64::extract(insn, 30, 29);
228 offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2;
255 return address(target_page + (byte_offset << size));
256 } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
257 Instruction_aarch64::extract(insn, 4, 0) ==
258 Instruction_aarch64::extract(insn2, 4, 0)) {
259 // add (immediate)
260 unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
261 return address(target_page + byte_offset);
262 } else {
263 if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
264 Instruction_aarch64::extract(insn, 4, 0) ==
265 Instruction_aarch64::extract(insn2, 4, 0)) {
266 target_page = (target_page & 0xffffffff) |
267 ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32);
268 }
269 return (address)target_page;
270 }
271 } else {
272 ShouldNotReachHere();
273 }
274 } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
275 uint32_t *insns = (uint32_t *)insn_addr;
276 // Move wide constant: movz, movk, movk. See movptr().
277 assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
278 assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
279 return address(uint64_t(Instruction_aarch64::extract(insns[0], 20, 5))
280 + (uint64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
281 + (uint64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
282 } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
283 Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
284 return 0;
285 } else {
286 ShouldNotReachHere();
287 }
288 return address(((uint64_t)insn_addr + (offset << 2)));
289 }
290
291 void MacroAssembler::safepoint_poll(Label& slow_path) {
292 ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
293 tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
294 }
295
296 // Just like safepoint_poll, but use an acquiring load for thread-
297 // local polling.
298 //
299 // We need an acquire here to ensure that any subsequent load of the
300 // global SafepointSynchronize::_state flag is ordered after this load
301 // of the local Thread::_polling page. We don't want this poll to
372 }
373
374 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
375 Register last_java_fp,
376 Label &L,
377 Register scratch) {
378 if (L.is_bound()) {
379 set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch);
380 } else {
381 InstructionMark im(this);
382 L.add_patch_at(code(), locator());
383 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, scratch);
384 }
385 }
386
387 void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
388 assert(ReservedCodeCacheSize < 4*G, "branch out of range");
389 assert(CodeCache::find_blob(entry.target()) != NULL,
390 "destination of far call not found in code cache");
391 if (far_branches()) {
392 uint64_t offset;
393 // We can use ADRP here because we know that the total size of
394 // the code cache cannot exceed 2Gb.
395 adrp(tmp, entry, offset);
396 add(tmp, tmp, offset);
397 if (cbuf) cbuf->set_insts_mark();
398 blr(tmp);
399 } else {
400 if (cbuf) cbuf->set_insts_mark();
401 bl(entry);
402 }
403 }
404
405 void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
406 assert(ReservedCodeCacheSize < 4*G, "branch out of range");
407 assert(CodeCache::find_blob(entry.target()) != NULL,
408 "destination of far call not found in code cache");
409 if (far_branches()) {
410 uint64_t offset;
411 // We can use ADRP here because we know that the total size of
412 // the code cache cannot exceed 2Gb.
413 adrp(tmp, entry, offset);
414 add(tmp, tmp, offset);
415 if (cbuf) cbuf->set_insts_mark();
416 br(tmp);
417 } else {
418 if (cbuf) cbuf->set_insts_mark();
419 b(entry);
420 }
421 }
422
423 void MacroAssembler::reserved_stack_check() {
424 // testing if reserved zone needs to be enabled
425 Label no_reserved_zone_enabling;
426
427 ldr(rscratch1, Address(rthread, JavaThread::reserved_stack_activation_offset()));
428 cmp(sp, rscratch1);
429 br(Assembler::LO, no_reserved_zone_enabling);
430
807 isb();
808 mov_metadata(rmethod, (Metadata*)NULL);
809
810 // Jump to the entry point of the i2c stub.
811 movptr(rscratch1, 0);
812 br(rscratch1);
813 }
814
815 void MacroAssembler::c2bool(Register x) {
816 // implements x == 0 ? 0 : 1
817 // note: must only look at least-significant byte of x
818 // since C-style booleans are stored in one byte
819 // only! (was bug)
820 tst(x, 0xff);
821 cset(x, Assembler::NE);
822 }
823
824 address MacroAssembler::ic_call(address entry, jint method_index) {
825 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
826 // address const_ptr = long_constant((jlong)Universe::non_oop_word());
827 // uint64_t offset;
828 // ldr_constant(rscratch2, const_ptr);
829 movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
830 return trampoline_call(Address(entry, rh));
831 }
832
833 // Implementation of call_VM versions
834
835 void MacroAssembler::call_VM(Register oop_result,
836 address entry_point,
837 bool check_exceptions) {
838 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
839 }
840
841 void MacroAssembler::call_VM(Register oop_result,
842 address entry_point,
843 Register arg_1,
844 bool check_exceptions) {
845 pass_arg1(this, arg_1);
846 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
847 }
1474 MacroAssembler::call_VM_leaf_base(entry_point, 4);
1475 }
1476
1477 void MacroAssembler::null_check(Register reg, int offset) {
1478 if (needs_explicit_null_check(offset)) {
1479 // provoke OS NULL exception if reg = NULL by
1480 // accessing M[reg] w/o changing any registers
1481 // NOTE: this is plenty to provoke a segv
1482 ldr(zr, Address(reg));
1483 } else {
1484 // nothing to do, (later) access of M[reg + offset]
1485 // will provoke OS NULL exception if reg = NULL
1486 }
1487 }
1488
1489 // MacroAssembler protected routines needed to implement
1490 // public methods
1491
1492 void MacroAssembler::mov(Register r, Address dest) {
1493 code_section()->relocate(pc(), dest.rspec());
1494 uint64_t imm64 = (uint64_t)dest.target();
1495 movptr(r, imm64);
1496 }
1497
1498 // Move a constant pointer into r. In AArch64 mode the virtual
1499 // address space is 48 bits in size, so we only need three
1500 // instructions to create a patchable instruction sequence that can
1501 // reach anywhere.
1502 void MacroAssembler::movptr(Register r, uintptr_t imm64) {
1503 #ifndef PRODUCT
1504 {
1505 char buffer[64];
1506 snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
1507 block_comment(buffer);
1508 }
1509 #endif
1510 assert(imm64 < (1ull << 48), "48-bit overflow in address constant");
1511 movz(r, imm64 & 0xffff);
1512 imm64 >>= 16;
1513 movk(r, imm64 & 0xffff, 16);
1514 imm64 >>= 16;
1515 movk(r, imm64 & 0xffff, 32);
1516 }
1517
1518 // Macro to mov replicated immediate to vector register.
1519 // Vd will get the following values for different arrangements in T
1520 // imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh
1521 // imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
1522 // imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh
1523 // imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
1524 // imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
1525 // imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
1526 // T1D/T2D: invalid
1527 void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32) {
1528 assert(T != T1D && T != T2D, "invalid arrangement");
1529 if (T == T8B || T == T16B) {
1530 assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
1531 movi(Vd, T, imm32 & 0xff, 0);
1532 return;
1533 }
1534 uint32_t nimm32 = ~imm32;
1535 if (T == T4H || T == T8H) {
1536 assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
1537 imm32 &= 0xffff;
1538 nimm32 &= 0xffff;
1539 }
1540 uint32_t x = imm32;
1541 int movi_cnt = 0;
1542 int movn_cnt = 0;
1543 while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
1544 x = nimm32;
1545 while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
1546 if (movn_cnt < movi_cnt) imm32 = nimm32;
1547 unsigned lsl = 0;
1548 while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1549 if (movn_cnt < movi_cnt)
1550 mvni(Vd, T, imm32 & 0xff, lsl);
1551 else
1552 movi(Vd, T, imm32 & 0xff, lsl);
1553 imm32 >>= 8; lsl += 8;
1554 while (imm32) {
1555 while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1556 if (movn_cnt < movi_cnt)
1557 bici(Vd, T, imm32 & 0xff, lsl);
1558 else
1559 orri(Vd, T, imm32 & 0xff, lsl);
1560 lsl += 8; imm32 >>= 8;
1561 }
1562 }
1563
1564 void MacroAssembler::mov_immediate64(Register dst, uint64_t imm64)
1565 {
1566 #ifndef PRODUCT
1567 {
1568 char buffer[64];
1569 snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
1570 block_comment(buffer);
1571 }
1572 #endif
1573 if (operand_valid_for_logical_immediate(false, imm64)) {
1574 orr(dst, zr, imm64);
1575 } else {
1576 // we can use a combination of MOVZ or MOVN with
1577 // MOVK to build up the constant
1578 uint64_t imm_h[4];
1579 int zero_count = 0;
1580 int neg_count = 0;
1581 int i;
1582 for (i = 0; i < 4; i++) {
1583 imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL);
1584 if (imm_h[i] == 0) {
1585 zero_count++;
1586 } else if (imm_h[i] == 0xffffL) {
1587 neg_count++;
1588 }
1589 }
1590 if (zero_count == 4) {
1591 // one MOVZ will do
1592 movz(dst, 0);
1593 } else if (neg_count == 4) {
1594 // one MOVN will do
1595 movn(dst, 0);
1596 } else if (zero_count == 3) {
1597 for (i = 0; i < 4; i++) {
1598 if (imm_h[i] != 0L) {
1599 movz(dst, (uint32_t)imm_h[i], (i << 4));
1600 break;
1601 }
1602 }
1603 } else if (neg_count == 3) {
1604 // one MOVN will do
1605 for (int i = 0; i < 4; i++) {
1606 if (imm_h[i] != 0xffffL) {
1607 movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
1608 break;
1609 }
1610 }
1611 } else if (zero_count == 2) {
1612 // one MOVZ and one MOVK will do
1613 for (i = 0; i < 3; i++) {
1614 if (imm_h[i] != 0L) {
1615 movz(dst, (uint32_t)imm_h[i], (i << 4));
1616 i++;
1617 break;
1618 }
1619 }
1620 for (;i < 4; i++) {
1621 if (imm_h[i] != 0L) {
1622 movk(dst, (uint32_t)imm_h[i], (i << 4));
1623 }
1624 }
1625 } else if (neg_count == 2) {
1626 // one MOVN and one MOVK will do
1627 for (i = 0; i < 4; i++) {
1628 if (imm_h[i] != 0xffffL) {
1629 movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
1630 i++;
1631 break;
1632 }
1633 }
1634 for (;i < 4; i++) {
1635 if (imm_h[i] != 0xffffL) {
1636 movk(dst, (uint32_t)imm_h[i], (i << 4));
1637 }
1638 }
1639 } else if (zero_count == 1) {
1640 // one MOVZ and two MOVKs will do
1641 for (i = 0; i < 4; i++) {
1642 if (imm_h[i] != 0L) {
1643 movz(dst, (uint32_t)imm_h[i], (i << 4));
1644 i++;
1645 break;
1646 }
1647 }
1648 for (;i < 4; i++) {
1649 if (imm_h[i] != 0x0L) {
1650 movk(dst, (uint32_t)imm_h[i], (i << 4));
1651 }
1652 }
1653 } else if (neg_count == 1) {
1654 // one MOVN and two MOVKs will do
1655 for (i = 0; i < 4; i++) {
1656 if (imm_h[i] != 0xffffL) {
1657 movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
1658 i++;
1659 break;
1660 }
1661 }
1662 for (;i < 4; i++) {
1663 if (imm_h[i] != 0xffffL) {
1664 movk(dst, (uint32_t)imm_h[i], (i << 4));
1665 }
1666 }
1667 } else {
1668 // use a MOVZ and 3 MOVKs (makes it easier to debug)
1669 movz(dst, (uint32_t)imm_h[0], 0);
1670 for (i = 1; i < 4; i++) {
1671 movk(dst, (uint32_t)imm_h[i], (i << 4));
1672 }
1673 }
1674 }
1675 }
1676
1677 void MacroAssembler::mov_immediate32(Register dst, uint32_t imm32)
1678 {
1679 #ifndef PRODUCT
1680 {
1681 char buffer[64];
1682 snprintf(buffer, sizeof(buffer), "0x%" PRIX32, imm32);
1683 block_comment(buffer);
1684 }
1685 #endif
1686 if (operand_valid_for_logical_immediate(true, imm32)) {
1687 orrw(dst, zr, imm32);
1688 } else {
1689 // we can use MOVZ, MOVN or two calls to MOVK to build up the
1690 // constant
1691 uint32_t imm_h[2];
1692 imm_h[0] = imm32 & 0xffff;
1693 imm_h[1] = ((imm32 >> 16) & 0xffff);
1694 if (imm_h[0] == 0) {
1695 movzw(dst, imm_h[1], 16);
1696 } else if (imm_h[0] == 0xffff) {
1697 movnw(dst, imm_h[1] ^ 0xffff, 16);
1698 } else if (imm_h[1] == 0) {
1699 movzw(dst, imm_h[0], 0);
1700 } else if (imm_h[1] == 0xffff) {
1701 movnw(dst, imm_h[0] ^ 0xffff, 0);
1702 } else {
1703 // use a MOVZ and MOVK (makes it easier to debug)
1704 movzw(dst, imm_h[0], 0);
1705 movkw(dst, imm_h[1], 16);
1706 }
1707 }
1708 }
1709
1710 // Form an address from base + offset in Rd. Rd may or may
1711 // not actually be used: you must use the Address that is returned.
1712 // It is up to you to ensure that the shift provided matches the size
1713 // of your data.
1714 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset, int shift) {
1715 if (Address::offset_ok_for_immed(byte_offset, shift))
1716 // It fits; no need for any heroics
1717 return Address(base, byte_offset);
1718
1719 // Don't do anything clever with negative or misaligned offsets
1720 unsigned mask = (1 << shift) - 1;
1721 if (byte_offset < 0 || byte_offset & mask) {
1722 mov(Rd, byte_offset);
1723 add(Rd, base, Rd);
1724 return Address(Rd);
1725 }
1726
1727 // See if we can do this with two 12-bit offsets
1728 {
1729 uint64_t word_offset = byte_offset >> shift;
1730 uint64_t masked_offset = word_offset & 0xfff000;
1731 if (Address::offset_ok_for_immed(word_offset - masked_offset, 0)
1732 && Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) {
1733 add(Rd, base, masked_offset << shift);
1734 word_offset -= masked_offset;
1735 return Address(Rd, word_offset << shift);
1736 }
1737 }
1738
1739 // Do it the hard way
1740 mov(Rd, byte_offset);
1741 add(Rd, base, Rd);
1742 return Address(Rd);
1743 }
1744
1745 void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
1746 if (UseLSE) {
1747 mov(tmp, 1);
1748 ldadd(Assembler::word, tmp, zr, counter_addr);
1749 return;
1750 }
1951
1952 void MacroAssembler::decrementw(Register reg, int value)
1953 {
1954 if (value < 0) { incrementw(reg, -value); return; }
1955 if (value == 0) { return; }
1956 if (value < (1 << 12)) { subw(reg, reg, value); return; }
1957 /* else */ {
1958 guarantee(reg != rscratch2, "invalid dst for register decrement");
1959 movw(rscratch2, (unsigned)value);
1960 subw(reg, reg, rscratch2);
1961 }
1962 }
1963
1964 void MacroAssembler::decrement(Register reg, int value)
1965 {
1966 if (value < 0) { increment(reg, -value); return; }
1967 if (value == 0) { return; }
1968 if (value < (1 << 12)) { sub(reg, reg, value); return; }
1969 /* else */ {
1970 assert(reg != rscratch2, "invalid dst for register decrement");
1971 mov(rscratch2, (uint64_t) value);
1972 sub(reg, reg, rscratch2);
1973 }
1974 }
1975
1976 void MacroAssembler::decrementw(Address dst, int value)
1977 {
1978 assert(!dst.uses(rscratch1), "invalid dst for address decrement");
1979 if (dst.getMode() == Address::literal) {
1980 assert(abs(value) < (1 << 12), "invalid value and address mode combination");
1981 lea(rscratch2, dst);
1982 dst = Address(rscratch2);
1983 }
1984 ldrw(rscratch1, dst);
1985 decrementw(rscratch1, value);
1986 strw(rscratch1, dst);
1987 }
1988
1989 void MacroAssembler::decrement(Address dst, int value)
1990 {
1991 assert(!dst.uses(rscratch1), "invalid address for decrement");
2583 tty->print_cr("r19 = 0x%016lx", regs[19]);
2584 tty->print_cr("r20 = 0x%016lx", regs[20]);
2585 tty->print_cr("r21 = 0x%016lx", regs[21]);
2586 tty->print_cr("r22 = 0x%016lx", regs[22]);
2587 tty->print_cr("r23 = 0x%016lx", regs[23]);
2588 tty->print_cr("r24 = 0x%016lx", regs[24]);
2589 tty->print_cr("r25 = 0x%016lx", regs[25]);
2590 tty->print_cr("r26 = 0x%016lx", regs[26]);
2591 tty->print_cr("r27 = 0x%016lx", regs[27]);
2592 tty->print_cr("r28 = 0x%016lx", regs[28]);
2593 tty->print_cr("r30 = 0x%016lx", regs[30]);
2594 tty->print_cr("r31 = 0x%016lx", regs[31]);
2595 BREAKPOINT;
2596 }
2597 }
2598 fatal("DEBUG MESSAGE: %s", msg);
2599 }
2600
2601 void MacroAssembler::push_call_clobbered_registers() {
2602 int step = 4 * wordSize;
2603 push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) WIN64_ONLY(- r18), sp);
2604 sub(sp, sp, step);
2605 mov(rscratch1, -step);
2606 // Push v0-v7, v16-v31.
2607 for (int i = 31; i>= 4; i -= 4) {
2608 if (i <= v7->encoding() || i >= v16->encoding())
2609 st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1),
2610 as_FloatRegister(i), T1D, Address(post(sp, rscratch1)));
2611 }
2612 st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2),
2613 as_FloatRegister(3), T1D, Address(sp));
2614 }
2615
2616 void MacroAssembler::pop_call_clobbered_registers() {
2617 for (int i = 0; i < 32; i += 4) {
2618 if (i <= v7->encoding() || i >= v16->encoding())
2619 ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2620 as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
2621 }
2622 pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) WIN64_ONLY(- r18), sp);
2623 }
2624
2625 void MacroAssembler::push_CPU_state(bool save_vectors) {
2626 int step = (save_vectors ? 8 : 4) * wordSize;
2627 push(0x3fffffff, sp); // integer registers except lr & sp
2628 mov(rscratch1, -step);
2629 sub(sp, sp, step);
2630 for (int i = 28; i >= 4; i -= 4) {
2631 st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2632 as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
2633 }
2634 st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
2635 }
2636
2637 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
2638 int step = (restore_vectors ? 8 : 4) * wordSize;
2639 for (int i = 0; i <= 28; i += 4)
2640 ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
2641 as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
2642 pop(0x3fffffff, sp); // integer registers except lr & sp
2674 Register base = sp;
2675 if ((offset & (size-1)) && offset >= (1<<8)) {
2676 add(tmp, base, offset & ((1<<12)-1));
2677 base = tmp;
2678 offset &= -1u<<12;
2679 }
2680
2681 if (offset >= (1<<12) * size) {
2682 add(tmp, base, offset & (((1<<12)-1)<<12));
2683 base = tmp;
2684 offset &= ~(((1<<12)-1)<<12);
2685 }
2686
2687 return Address(base, offset);
2688 }
2689
2690 // Checks whether offset is aligned.
2691 // Returns true if it is, else false.
2692 bool MacroAssembler::merge_alignment_check(Register base,
2693 size_t size,
2694 int64_t cur_offset,
2695 int64_t prev_offset) const {
2696 if (AvoidUnalignedAccesses) {
2697 if (base == sp) {
2698 // Checks whether low offset if aligned to pair of registers.
2699 int64_t pair_mask = size * 2 - 1;
2700 int64_t offset = prev_offset > cur_offset ? cur_offset : prev_offset;
2701 return (offset & pair_mask) == 0;
2702 } else { // If base is not sp, we can't guarantee the access is aligned.
2703 return false;
2704 }
2705 } else {
2706 int64_t mask = size - 1;
2707 // Load/store pair instruction only supports element size aligned offset.
2708 return (cur_offset & mask) == 0 && (prev_offset & mask) == 0;
2709 }
2710 }
2711
2712 // Checks whether current and previous loads/stores can be merged.
2713 // Returns true if it can be merged, else false.
2714 bool MacroAssembler::ldst_can_merge(Register rt,
2715 const Address &adr,
2716 size_t cur_size_in_bytes,
2717 bool is_store) const {
2718 address prev = pc() - NativeInstruction::instruction_size;
2719 address last = code()->last_insn();
2720
2721 if (last == NULL || !nativeInstruction_at(last)->is_Imm_LdSt()) {
2722 return false;
2723 }
2724
2725 if (adr.getMode() != Address::base_plus_offset || prev != last) {
2726 return false;
2727 }
2728
2729 NativeLdSt* prev_ldst = NativeLdSt_at(prev);
2730 size_t prev_size_in_bytes = prev_ldst->size_in_bytes();
2731
2732 assert(prev_size_in_bytes == 4 || prev_size_in_bytes == 8, "only supports 64/32bit merging.");
2733 assert(cur_size_in_bytes == 4 || cur_size_in_bytes == 8, "only supports 64/32bit merging.");
2734
2735 if (cur_size_in_bytes != prev_size_in_bytes || is_store != prev_ldst->is_store()) {
2736 return false;
2737 }
2738
2739 int64_t max_offset = 63 * prev_size_in_bytes;
2740 int64_t min_offset = -64 * prev_size_in_bytes;
2741
2742 assert(prev_ldst->is_not_pre_post_index(), "pre-index or post-index is not supported to be merged.");
2743
2744 // Only same base can be merged.
2745 if (adr.base() != prev_ldst->base()) {
2746 return false;
2747 }
2748
2749 int64_t cur_offset = adr.offset();
2750 int64_t prev_offset = prev_ldst->offset();
2751 size_t diff = abs(cur_offset - prev_offset);
2752 if (diff != prev_size_in_bytes) {
2753 return false;
2754 }
2755
2756 // Following cases can not be merged:
2757 // ldr x2, [x2, #8]
2758 // ldr x3, [x2, #16]
2759 // or:
2760 // ldr x2, [x3, #8]
2761 // ldr x2, [x3, #16]
2762 // If t1 and t2 is the same in "ldp t1, t2, [xn, #imm]", we'll get SIGILL.
2763 if (!is_store && (adr.base() == prev_ldst->target() || rt == prev_ldst->target())) {
2764 return false;
2765 }
2766
2767 int64_t low_offset = prev_offset > cur_offset ? cur_offset : prev_offset;
2768 // Offset range must be in ldp/stp instruction's range.
2769 if (low_offset > max_offset || low_offset < min_offset) {
2770 return false;
2771 }
2772
2773 if (merge_alignment_check(adr.base(), prev_size_in_bytes, cur_offset, prev_offset)) {
2774 return true;
2775 }
2776
2777 return false;
2778 }
2779
2780 // Merge current load/store with previous load/store into ldp/stp.
2781 void MacroAssembler::merge_ldst(Register rt,
2782 const Address &adr,
2783 size_t cur_size_in_bytes,
2784 bool is_store) {
2785
2786 assert(ldst_can_merge(rt, adr, cur_size_in_bytes, is_store) == true, "cur and prev must be able to be merged.");
2787
2788 Register rt_low, rt_high;
2789 address prev = pc() - NativeInstruction::instruction_size;
2790 NativeLdSt* prev_ldst = NativeLdSt_at(prev);
2791
2792 int64_t offset;
2793
2794 if (adr.offset() < prev_ldst->offset()) {
2795 offset = adr.offset();
2796 rt_low = rt;
2797 rt_high = prev_ldst->target();
2798 } else {
2799 offset = prev_ldst->offset();
2800 rt_low = prev_ldst->target();
2801 rt_high = rt;
2802 }
2803
2804 Address adr_p = Address(prev_ldst->base(), offset);
2805 // Overwrite previous generated binary.
2806 code_section()->set_end(prev);
2807
2808 const int sz = prev_ldst->size_in_bytes();
2809 assert(sz == 8 || sz == 4, "only supports 64/32bit merging.");
2810 if (!is_store) {
2811 BLOCK_COMMENT("merged ldr pair");
2812 if (sz == 8) {
3019 *
3020 */
3021 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3022 Register z, Register zlen,
3023 Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3024 Register tmp5, Register tmp6, Register product_hi) {
3025
3026 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3027
3028 const Register idx = tmp1;
3029 const Register kdx = tmp2;
3030 const Register xstart = tmp3;
3031
3032 const Register y_idx = tmp4;
3033 const Register carry = tmp5;
3034 const Register product = xlen;
3035 const Register x_xstart = zlen; // reuse register
3036
3037 // First Loop.
3038 //
3039 // final static int64_t LONG_MASK = 0xffffffffL;
3040 // int xstart = xlen - 1;
3041 // int ystart = ylen - 1;
3042 // int64_t carry = 0;
3043 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
3044 // int64_t product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
3045 // z[kdx] = (int)product;
3046 // carry = product >>> 32;
3047 // }
3048 // z[xstart] = (int)carry;
3049 //
3050
3051 movw(idx, ylen); // idx = ylen;
3052 movw(kdx, zlen); // kdx = xlen+ylen;
3053 mov(carry, zr); // carry = 0;
3054
3055 Label L_done;
3056
3057 movw(xstart, xlen);
3058 subsw(xstart, xstart, 1);
3059 br(Assembler::MI, L_done);
3060
3061 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3062
3063 Label L_second_loop;
3064 cbzw(kdx, L_second_loop);
3065
3066 Label L_carry;
3067 subw(kdx, kdx, 1);
3068 cbzw(kdx, L_carry);
3069
3070 strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
3071 lsr(carry, carry, 32);
3072 subw(kdx, kdx, 1);
3073
3074 bind(L_carry);
3075 strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
3076
3077 // Second and third (nested) loops.
3078 //
3079 // for (int i = xstart-1; i >= 0; i--) { // Second loop
3080 // carry = 0;
3081 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3082 // int64_t product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3083 // (z[k] & LONG_MASK) + carry;
3084 // z[k] = (int)product;
3085 // carry = product >>> 32;
3086 // }
3087 // z[i] = (int)carry;
3088 // }
3089 //
3090 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3091
3092 const Register jdx = tmp1;
3093
3094 bind(L_second_loop);
3095 mov(carry, zr); // carry = 0;
3096 movw(jdx, ylen); // j = ystart+1
3097
3098 subsw(xstart, xstart, 1); // i = xstart-1;
3099 br(Assembler::MI, L_done);
3100
3101 str(z, Address(pre(sp, -4 * wordSize)));
3102
3318
3319 sub(len, len, 64);
3320 add(buf, buf, 8);
3321 cmn(len, 128);
3322 br(Assembler::NE, CRC_less64);
3323 BIND(L_exit);
3324 mvnw(crc, crc);
3325 }
3326
3327 /**
3328 * @param crc register containing existing CRC (32-bit)
3329 * @param buf register pointing to input byte buffer (byte*)
3330 * @param len register containing number of bytes
3331 * @param table register that will contain address of CRC table
3332 * @param tmp scratch register
3333 */
3334 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
3335 Register table0, Register table1, Register table2, Register table3,
3336 Register tmp, Register tmp2, Register tmp3) {
3337 Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
3338 uint64_t offset;
3339
3340 if (UseCRC32) {
3341 kernel_crc32_using_crc32(crc, buf, len, table0, table1, table2, table3);
3342 return;
3343 }
3344
3345 mvnw(crc, crc);
3346
3347 adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset);
3348 if (offset) add(table0, table0, offset);
3349 add(table1, table0, 1*256*sizeof(juint));
3350 add(table2, table0, 2*256*sizeof(juint));
3351 add(table3, table0, 3*256*sizeof(juint));
3352
3353 if (UseNeon) {
3354 cmp(len, (u1)64);
3355 br(Assembler::LT, L_by16);
3356 eor(v16, T16B, v16, v16);
3357
3358 Label L_fold;
3620 BIND(L_exit);
3621 }
3622
3623 /**
3624 * @param crc register containing existing CRC (32-bit)
3625 * @param buf register pointing to input byte buffer (byte*)
3626 * @param len register containing number of bytes
3627 * @param table register that will contain address of CRC table
3628 * @param tmp scratch register
3629 */
3630 void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
3631 Register table0, Register table1, Register table2, Register table3,
3632 Register tmp, Register tmp2, Register tmp3) {
3633 kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3);
3634 }
3635
3636
3637 SkipIfEqual::SkipIfEqual(
3638 MacroAssembler* masm, const bool* flag_addr, bool value) {
3639 _masm = masm;
3640 uint64_t offset;
3641 _masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset);
3642 _masm->ldrb(rscratch1, Address(rscratch1, offset));
3643 _masm->cbzw(rscratch1, _label);
3644 }
3645
3646 SkipIfEqual::~SkipIfEqual() {
3647 _masm->bind(_label);
3648 }
3649
3650 void MacroAssembler::addptr(const Address &dst, int32_t src) {
3651 Address adr;
3652 switch(dst.getMode()) {
3653 case Address::base_plus_offset:
3654 // This is the expected mode, although we allow all the other
3655 // forms below.
3656 adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord);
3657 break;
3658 default:
3659 lea(rscratch2, dst);
3660 adr = Address(rscratch2);
3661 break;
3662 }
3663 ldr(rscratch1, adr);
3664 add(rscratch1, rscratch1, src);
3665 str(rscratch1, adr);
3666 }
3667
3668 void MacroAssembler::cmpptr(Register src1, Address src2) {
3669 uint64_t offset;
3670 adrp(rscratch1, src2, offset);
3671 ldr(rscratch1, Address(rscratch1, offset));
3672 cmp(src1, rscratch1);
3673 }
3674
3675 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
3676 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3677 bs->obj_equals(this, obj1, obj2);
3678 }
3679
3680 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
3681 load_method_holder(rresult, rmethod);
3682 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
3683 }
3684
3685 void MacroAssembler::load_method_holder(Register holder, Register method) {
3686 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
3687 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
3688 ldr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
3689 }
4311 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
4312 ldr(dest, Address(rthread, Thread::polling_page_offset()));
4313 }
4314
4315 // Move the address of the polling page into r, then read the polling
4316 // page.
4317 address MacroAssembler::fetch_and_read_polling_page(Register r, relocInfo::relocType rtype) {
4318 get_polling_page(r, rtype);
4319 return read_polling_page(r, rtype);
4320 }
4321
4322 // Read the polling page. The address of the polling page must
4323 // already be in r.
4324 address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
4325 InstructionMark im(this);
4326 code_section()->relocate(inst_mark(), rtype);
4327 ldrw(zr, Address(r, 0));
4328 return inst_mark();
4329 }
4330
4331 void MacroAssembler::adrp(Register reg1, const Address &dest, uint64_t &byte_offset) {
4332 relocInfo::relocType rtype = dest.rspec().reloc()->type();
4333 uint64_t low_page = (uint64_t)CodeCache::low_bound() >> 12;
4334 uint64_t high_page = (uint64_t)(CodeCache::high_bound() - 1) >> 12;
4335 uint64_t dest_page = (uint64_t)dest.target() >> 12;
4336 int64_t offset_low = dest_page - low_page;
4337 int64_t offset_high = dest_page - high_page;
4338
4339 assert(is_valid_AArch64_address(dest.target()), "bad address");
4340 assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
4341
4342 InstructionMark im(this);
4343 code_section()->relocate(inst_mark(), dest.rspec());
4344 // 8143067: Ensure that the adrp can reach the dest from anywhere within
4345 // the code cache so that if it is relocated we know it will still reach
4346 if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
4347 _adrp(reg1, dest.target());
4348 } else {
4349 uint64_t target = (uint64_t)dest.target();
4350 uint64_t adrp_target
4351 = (target & 0xffffffffUL) | ((uint64_t)pc() & 0xffff00000000UL);
4352
4353 _adrp(reg1, (address)adrp_target);
4354 movk(reg1, target >> 32, 32);
4355 }
4356 byte_offset = (uint64_t)dest.target() & 0xfff;
4357 }
4358
4359 void MacroAssembler::load_byte_map_base(Register reg) {
4360 CardTable::CardValue* byte_map_base =
4361 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
4362
4363 if (is_valid_AArch64_address((address)byte_map_base)) {
4364 // Strictly speaking the byte_map_base isn't an address at all,
4365 // and it might even be negative.
4366 uint64_t offset;
4367 adrp(reg, ExternalAddress((address)byte_map_base), offset);
4368 // We expect offset to be zero with most collectors.
4369 if (offset != 0) {
4370 add(reg, reg, offset);
4371 }
4372 } else {
4373 mov(reg, (uint64_t)byte_map_base);
4374 }
4375 }
4376
4377 void MacroAssembler::build_frame(int framesize) {
4378 assert(framesize > 0, "framesize must be > 0");
4379 if (framesize < ((1 << 9) + 2 * wordSize)) {
4380 sub(sp, sp, framesize);
4381 stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4382 if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
4383 } else {
4384 stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
4385 if (PreserveFramePointer) mov(rfp, sp);
4386 if (framesize < ((1 << 12) + 2 * wordSize))
4391 }
4392 }
4393 }
4394
4395 void MacroAssembler::remove_frame(int framesize) {
4396 assert(framesize > 0, "framesize must be > 0");
4397 if (framesize < ((1 << 9) + 2 * wordSize)) {
4398 ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
4399 add(sp, sp, framesize);
4400 } else {
4401 if (framesize < ((1 << 12) + 2 * wordSize))
4402 add(sp, sp, framesize - 2 * wordSize);
4403 else {
4404 mov(rscratch1, framesize - 2 * wordSize);
4405 add(sp, sp, rscratch1);
4406 }
4407 ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
4408 }
4409 }
4410
4411 // This method checks if provided byte array contains byte with highest bit set.
4412 void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
4413 // Simple and most common case of aligned small array which is not at the
4414 // end of memory page is placed here. All other cases are in stub.
4415 Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
4416 const uint64_t UPPER_BIT_MASK=0x8080808080808080;
4417 assert_different_registers(ary1, len, result);
4418
4419 cmpw(len, 0);
4420 br(LE, SET_RESULT);
4421 cmpw(len, 4 * wordSize);
4422 br(GE, STUB_LONG); // size > 32 then go to stub
4423
4424 int shift = 64 - exact_log2(os::vm_page_size());
4425 lsl(rscratch1, ary1, shift);
4426 mov(rscratch2, (size_t)(4 * wordSize) << shift);
4427 adds(rscratch2, rscratch1, rscratch2); // At end of page?
4428 br(CS, STUB); // at the end of page then go to stub
4429 subs(len, len, wordSize);
4430 br(LT, END);
4788 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
4789 Label l;
4790 tbz(cnt, exact_log2(i), l);
4791 for (int j = 0; j < i; j += 2) {
4792 stp(zr, zr, post(ptr, 16));
4793 }
4794 bind(l);
4795 }
4796 {
4797 Label l;
4798 tbz(cnt, 0, l);
4799 str(zr, Address(ptr));
4800 bind(l);
4801 }
4802 BLOCK_COMMENT("} zero_words");
4803 }
4804
4805 // base: Address of a buffer to be zeroed, 8 bytes aligned.
4806 // cnt: Immediate count in HeapWords.
4807 #define SmallArraySize (18 * BytesPerLong)
4808 void MacroAssembler::zero_words(Register base, uint64_t cnt)
4809 {
4810 BLOCK_COMMENT("zero_words {");
4811 int i = cnt & 1; // store any odd word to start
4812 if (i) str(zr, Address(base));
4813
4814 if (cnt <= SmallArraySize / BytesPerLong) {
4815 for (; i < (int)cnt; i += 2)
4816 stp(zr, zr, Address(base, i * wordSize));
4817 } else {
4818 const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
4819 int remainder = cnt % (2 * unroll);
4820 for (; i < remainder; i += 2)
4821 stp(zr, zr, Address(base, i * wordSize));
4822
4823 Label loop;
4824 Register cnt_reg = rscratch1;
4825 Register loop_base = rscratch2;
4826 cnt = cnt - remainder;
4827 mov(cnt_reg, cnt);
4828 // adjust base and prebias by -2 * wordSize so we can pre-increment
|