< prev index next >
src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Print this page
8248238: Adding Windows support to OpenJDK on AArch64
Summary: LP64 vs LLP64 changes to add Windows support
Contributed-by: Monica Beckwith <monica.beckwith@microsoft.com>, Ludovic Henry <luhenry@microsoft.com>
Reviewed-by:
8248238: Adding Windows support to OpenJDK on AArch64
Summary: Adding Windows support for AArch64
Contributed-by: Ludovic Henry <luhenry@microsoft.com>, Monica Beckwith <monica.beckwith@microsoft.com>
Reviewed-by:
*** 68,79 ****
// Patch any kind of instruction; there may be several instructions.
// Return the total length (in bytes) of the instructions.
int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
int instructions = 1;
! assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant");
! long offset = (target - branch) >> 2;
unsigned insn = *(unsigned*)branch;
if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) {
// Load register (literal)
Instruction_aarch64::spatch(branch, 23, 5, offset);
} else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
--- 68,79 ----
// Patch any kind of instruction; there may be several instructions.
// Return the total length (in bytes) of the instructions.
int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
int instructions = 1;
! assert((uint64_t)target < ((uint64_t)1 << 48), "48-bit overflow in address constant");
! int64_t offset = (target - branch) >> 2;
unsigned insn = *(unsigned*)branch;
if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) {
// Load register (literal)
Instruction_aarch64::spatch(branch, 23, 5, offset);
} else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
*** 91,101 ****
} else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
// PC-rel. addressing
offset = target-branch;
int shift = Instruction_aarch64::extract(insn, 31, 31);
if (shift) {
! u_int64_t dest = (u_int64_t)target;
uint64_t pc_page = (uint64_t)branch >> 12;
uint64_t adr_page = (uint64_t)target >> 12;
unsigned offset_lo = dest & 0xfff;
offset = adr_page - pc_page;
--- 91,101 ----
} else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
// PC-rel. addressing
offset = target-branch;
int shift = Instruction_aarch64::extract(insn, 31, 31);
if (shift) {
! uint64_t dest = (uint64_t)target;
uint64_t pc_page = (uint64_t)branch >> 12;
uint64_t adr_page = (uint64_t)target >> 12;
unsigned offset_lo = dest & 0xfff;
offset = adr_page - pc_page;
*** 132,154 ****
} else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
Instruction_aarch64::extract(insn, 4, 0) ==
Instruction_aarch64::extract(insn2, 4, 0)) {
// movk #imm16<<32
Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
! long dest = ((long)target & 0xffffffffL) | ((long)branch & 0xffff00000000L);
! long pc_page = (long)branch >> 12;
! long adr_page = (long)dest >> 12;
offset = adr_page - pc_page;
instructions = 2;
}
}
int offset_lo = offset & 3;
offset >>= 2;
Instruction_aarch64::spatch(branch, 23, 5, offset);
Instruction_aarch64::patch(branch, 30, 29, offset_lo);
} else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
! u_int64_t dest = (u_int64_t)target;
// Move wide constant
assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
--- 132,154 ----
} else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
Instruction_aarch64::extract(insn, 4, 0) ==
Instruction_aarch64::extract(insn2, 4, 0)) {
// movk #imm16<<32
Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
! int64_t dest = ((int64_t)target & 0xffffffffL) | ((int64_t)branch & 0xffff00000000L);
! int64_t pc_page = (int64_t)branch >> 12;
! int64_t adr_page = (int64_t)dest >> 12;
offset = adr_page - pc_page;
instructions = 2;
}
}
int offset_lo = offset & 3;
offset >>= 2;
Instruction_aarch64::spatch(branch, 23, 5, offset);
Instruction_aarch64::patch(branch, 30, 29, offset_lo);
} else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
! uint64_t dest = (uint64_t)target;
// Move wide constant
assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
*** 203,213 ****
Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
return 2 * NativeInstruction::instruction_size;
}
address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
! long offset = 0;
if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
// Load register (literal)
offset = Instruction_aarch64::sextract(insn, 23, 5);
return address(((uint64_t)insn_addr + (offset << 2)));
} else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
--- 203,213 ----
Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
return 2 * NativeInstruction::instruction_size;
}
address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
! int64_t offset = 0;
if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
// Load register (literal)
offset = Instruction_aarch64::sextract(insn, 23, 5);
return address(((uint64_t)insn_addr + (offset << 2)));
} else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) {
*** 270,286 ****
}
} else {
ShouldNotReachHere();
}
} else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
! u_int32_t *insns = (u_int32_t *)insn_addr;
// Move wide constant: movz, movk, movk. See movptr().
assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
! return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5))
! + (u_int64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
! + (u_int64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
} else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
return 0;
} else {
ShouldNotReachHere();
--- 270,286 ----
}
} else {
ShouldNotReachHere();
}
} else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
! uint32_t *insns = (uint32_t *)insn_addr;
// Move wide constant: movz, movk, movk. See movptr().
assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
! return address(uint64_t(Instruction_aarch64::extract(insns[0], 20, 5))
! + (uint64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
! + (uint64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
} else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
return 0;
} else {
ShouldNotReachHere();
*** 387,397 ****
void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL,
"destination of far call not found in code cache");
if (far_branches()) {
! unsigned long offset;
// We can use ADRP here because we know that the total size of
// the code cache cannot exceed 2Gb.
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
if (cbuf) cbuf->set_insts_mark();
--- 387,397 ----
void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL,
"destination of far call not found in code cache");
if (far_branches()) {
! uint64_t offset;
// We can use ADRP here because we know that the total size of
// the code cache cannot exceed 2Gb.
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
if (cbuf) cbuf->set_insts_mark();
*** 405,415 ****
void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL,
"destination of far call not found in code cache");
if (far_branches()) {
! unsigned long offset;
// We can use ADRP here because we know that the total size of
// the code cache cannot exceed 2Gb.
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
if (cbuf) cbuf->set_insts_mark();
--- 405,415 ----
void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL,
"destination of far call not found in code cache");
if (far_branches()) {
! uint64_t offset;
// We can use ADRP here because we know that the total size of
// the code cache cannot exceed 2Gb.
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
if (cbuf) cbuf->set_insts_mark();
*** 822,832 ****
}
address MacroAssembler::ic_call(address entry, jint method_index) {
RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
// address const_ptr = long_constant((jlong)Universe::non_oop_word());
! // unsigned long offset;
// ldr_constant(rscratch2, const_ptr);
movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
return trampoline_call(Address(entry, rh));
}
--- 822,832 ----
}
address MacroAssembler::ic_call(address entry, jint method_index) {
RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
// address const_ptr = long_constant((jlong)Universe::non_oop_word());
! // uint64_t offset;
// ldr_constant(rscratch2, const_ptr);
movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
return trampoline_call(Address(entry, rh));
}
*** 1489,1499 ****
// MacroAssembler protected routines needed to implement
// public methods
void MacroAssembler::mov(Register r, Address dest) {
code_section()->relocate(pc(), dest.rspec());
! u_int64_t imm64 = (u_int64_t)dest.target();
movptr(r, imm64);
}
// Move a constant pointer into r. In AArch64 mode the virtual
// address space is 48 bits in size, so we only need three
--- 1489,1499 ----
// MacroAssembler protected routines needed to implement
// public methods
void MacroAssembler::mov(Register r, Address dest) {
code_section()->relocate(pc(), dest.rspec());
! uint64_t imm64 = (uint64_t)dest.target();
movptr(r, imm64);
}
// Move a constant pointer into r. In AArch64 mode the virtual
// address space is 48 bits in size, so we only need three
*** 1505,1515 ****
char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
block_comment(buffer);
}
#endif
! assert(imm64 < (1ul << 48), "48-bit overflow in address constant");
movz(r, imm64 & 0xffff);
imm64 >>= 16;
movk(r, imm64 & 0xffff, 16);
imm64 >>= 16;
movk(r, imm64 & 0xffff, 32);
--- 1505,1515 ----
char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
block_comment(buffer);
}
#endif
! assert(imm64 < (1ull << 48), "48-bit overflow in address constant");
movz(r, imm64 & 0xffff);
imm64 >>= 16;
movk(r, imm64 & 0xffff, 16);
imm64 >>= 16;
movk(r, imm64 & 0xffff, 32);
*** 1522,1545 ****
// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh
// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
// T1D/T2D: invalid
! void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
assert(T != T1D && T != T2D, "invalid arrangement");
if (T == T8B || T == T16B) {
assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
movi(Vd, T, imm32 & 0xff, 0);
return;
}
! u_int32_t nimm32 = ~imm32;
if (T == T4H || T == T8H) {
assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
imm32 &= 0xffff;
nimm32 &= 0xffff;
}
! u_int32_t x = imm32;
int movi_cnt = 0;
int movn_cnt = 0;
while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
x = nimm32;
while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
--- 1522,1545 ----
// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh
// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
// T1D/T2D: invalid
! void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32) {
assert(T != T1D && T != T2D, "invalid arrangement");
if (T == T8B || T == T16B) {
assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
movi(Vd, T, imm32 & 0xff, 0);
return;
}
! uint32_t nimm32 = ~imm32;
if (T == T4H || T == T8H) {
assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
imm32 &= 0xffff;
nimm32 &= 0xffff;
}
! uint32_t x = imm32;
int movi_cnt = 0;
int movn_cnt = 0;
while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
x = nimm32;
while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
*** 1559,1569 ****
orri(Vd, T, imm32 & 0xff, lsl);
lsl += 8; imm32 >>= 8;
}
}
! void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64)
{
#ifndef PRODUCT
{
char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
--- 1559,1569 ----
orri(Vd, T, imm32 & 0xff, lsl);
lsl += 8; imm32 >>= 8;
}
}
! void MacroAssembler::mov_immediate64(Register dst, uint64_t imm64)
{
#ifndef PRODUCT
{
char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
*** 1573,1583 ****
if (operand_valid_for_logical_immediate(false, imm64)) {
orr(dst, zr, imm64);
} else {
// we can use a combination of MOVZ or MOVN with
// MOVK to build up the constant
! u_int64_t imm_h[4];
int zero_count = 0;
int neg_count = 0;
int i;
for (i = 0; i < 4; i++) {
imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL);
--- 1573,1583 ----
if (operand_valid_for_logical_immediate(false, imm64)) {
orr(dst, zr, imm64);
} else {
// we can use a combination of MOVZ or MOVN with
// MOVK to build up the constant
! uint64_t imm_h[4];
int zero_count = 0;
int neg_count = 0;
int i;
for (i = 0; i < 4; i++) {
imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL);
*** 1594,1682 ****
// one MOVN will do
movn(dst, 0);
} else if (zero_count == 3) {
for (i = 0; i < 4; i++) {
if (imm_h[i] != 0L) {
! movz(dst, (u_int32_t)imm_h[i], (i << 4));
break;
}
}
} else if (neg_count == 3) {
// one MOVN will do
for (int i = 0; i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
break;
}
}
} else if (zero_count == 2) {
// one MOVZ and one MOVK will do
for (i = 0; i < 3; i++) {
if (imm_h[i] != 0L) {
! movz(dst, (u_int32_t)imm_h[i], (i << 4));
i++;
break;
}
}
for (;i < 4; i++) {
if (imm_h[i] != 0L) {
! movk(dst, (u_int32_t)imm_h[i], (i << 4));
}
}
} else if (neg_count == 2) {
// one MOVN and one MOVK will do
for (i = 0; i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
i++;
break;
}
}
for (;i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movk(dst, (u_int32_t)imm_h[i], (i << 4));
}
}
} else if (zero_count == 1) {
// one MOVZ and two MOVKs will do
for (i = 0; i < 4; i++) {
if (imm_h[i] != 0L) {
! movz(dst, (u_int32_t)imm_h[i], (i << 4));
i++;
break;
}
}
for (;i < 4; i++) {
if (imm_h[i] != 0x0L) {
! movk(dst, (u_int32_t)imm_h[i], (i << 4));
}
}
} else if (neg_count == 1) {
// one MOVN and two MOVKs will do
for (i = 0; i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4));
i++;
break;
}
}
for (;i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movk(dst, (u_int32_t)imm_h[i], (i << 4));
}
}
} else {
// use a MOVZ and 3 MOVKs (makes it easier to debug)
! movz(dst, (u_int32_t)imm_h[0], 0);
for (i = 1; i < 4; i++) {
! movk(dst, (u_int32_t)imm_h[i], (i << 4));
}
}
}
}
! void MacroAssembler::mov_immediate32(Register dst, u_int32_t imm32)
{
#ifndef PRODUCT
{
char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX32, imm32);
--- 1594,1682 ----
// one MOVN will do
movn(dst, 0);
} else if (zero_count == 3) {
for (i = 0; i < 4; i++) {
if (imm_h[i] != 0L) {
! movz(dst, (uint32_t)imm_h[i], (i << 4));
break;
}
}
} else if (neg_count == 3) {
// one MOVN will do
for (int i = 0; i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
break;
}
}
} else if (zero_count == 2) {
// one MOVZ and one MOVK will do
for (i = 0; i < 3; i++) {
if (imm_h[i] != 0L) {
! movz(dst, (uint32_t)imm_h[i], (i << 4));
i++;
break;
}
}
for (;i < 4; i++) {
if (imm_h[i] != 0L) {
! movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} else if (neg_count == 2) {
// one MOVN and one MOVK will do
for (i = 0; i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
i++;
break;
}
}
for (;i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} else if (zero_count == 1) {
// one MOVZ and two MOVKs will do
for (i = 0; i < 4; i++) {
if (imm_h[i] != 0L) {
! movz(dst, (uint32_t)imm_h[i], (i << 4));
i++;
break;
}
}
for (;i < 4; i++) {
if (imm_h[i] != 0x0L) {
! movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} else if (neg_count == 1) {
// one MOVN and two MOVKs will do
for (i = 0; i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
i++;
break;
}
}
for (;i < 4; i++) {
if (imm_h[i] != 0xffffL) {
! movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} else {
// use a MOVZ and 3 MOVKs (makes it easier to debug)
! movz(dst, (uint32_t)imm_h[0], 0);
for (i = 1; i < 4; i++) {
! movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
}
}
! void MacroAssembler::mov_immediate32(Register dst, uint32_t imm32)
{
#ifndef PRODUCT
{
char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX32, imm32);
*** 1686,1696 ****
if (operand_valid_for_logical_immediate(true, imm32)) {
orrw(dst, zr, imm32);
} else {
// we can use MOVZ, MOVN or two calls to MOVK to build up the
// constant
! u_int32_t imm_h[2];
imm_h[0] = imm32 & 0xffff;
imm_h[1] = ((imm32 >> 16) & 0xffff);
if (imm_h[0] == 0) {
movzw(dst, imm_h[1], 16);
} else if (imm_h[0] == 0xffff) {
--- 1686,1696 ----
if (operand_valid_for_logical_immediate(true, imm32)) {
orrw(dst, zr, imm32);
} else {
// we can use MOVZ, MOVN or two calls to MOVK to build up the
// constant
! uint32_t imm_h[2];
imm_h[0] = imm32 & 0xffff;
imm_h[1] = ((imm32 >> 16) & 0xffff);
if (imm_h[0] == 0) {
movzw(dst, imm_h[1], 16);
} else if (imm_h[0] == 0xffff) {
*** 1709,1719 ****
// Form an address from base + offset in Rd. Rd may or may
// not actually be used: you must use the Address that is returned.
// It is up to you to ensure that the shift provided matches the size
// of your data.
! Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) {
if (Address::offset_ok_for_immed(byte_offset, shift))
// It fits; no need for any heroics
return Address(base, byte_offset);
// Don't do anything clever with negative or misaligned offsets
--- 1709,1719 ----
// Form an address from base + offset in Rd. Rd may or may
// not actually be used: you must use the Address that is returned.
// It is up to you to ensure that the shift provided matches the size
// of your data.
! Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset, int shift) {
if (Address::offset_ok_for_immed(byte_offset, shift))
// It fits; no need for any heroics
return Address(base, byte_offset);
// Don't do anything clever with negative or misaligned offsets
*** 1724,1735 ****
return Address(Rd);
}
// See if we can do this with two 12-bit offsets
{
! unsigned long word_offset = byte_offset >> shift;
! unsigned long masked_offset = word_offset & 0xfff000;
if (Address::offset_ok_for_immed(word_offset - masked_offset, 0)
&& Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) {
add(Rd, base, masked_offset << shift);
word_offset -= masked_offset;
return Address(Rd, word_offset << shift);
--- 1724,1735 ----
return Address(Rd);
}
// See if we can do this with two 12-bit offsets
{
! uint64_t word_offset = byte_offset >> shift;
! uint64_t masked_offset = word_offset & 0xfff000;
if (Address::offset_ok_for_immed(word_offset - masked_offset, 0)
&& Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) {
add(Rd, base, masked_offset << shift);
word_offset -= masked_offset;
return Address(Rd, word_offset << shift);
*** 1966,1976 ****
if (value < 0) { increment(reg, -value); return; }
if (value == 0) { return; }
if (value < (1 << 12)) { sub(reg, reg, value); return; }
/* else */ {
assert(reg != rscratch2, "invalid dst for register decrement");
! mov(rscratch2, (unsigned long)value);
sub(reg, reg, rscratch2);
}
}
void MacroAssembler::decrementw(Address dst, int value)
--- 1966,1976 ----
if (value < 0) { increment(reg, -value); return; }
if (value == 0) { return; }
if (value < (1 << 12)) { sub(reg, reg, value); return; }
/* else */ {
assert(reg != rscratch2, "invalid dst for register decrement");
! mov(rscratch2, (uint64_t) value);
sub(reg, reg, rscratch2);
}
}
void MacroAssembler::decrementw(Address dst, int value)
*** 2598,2608 ****
fatal("DEBUG MESSAGE: %s", msg);
}
void MacroAssembler::push_call_clobbered_registers() {
int step = 4 * wordSize;
! push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
sub(sp, sp, step);
mov(rscratch1, -step);
// Push v0-v7, v16-v31.
for (int i = 31; i>= 4; i -= 4) {
if (i <= v7->encoding() || i >= v16->encoding())
--- 2598,2608 ----
fatal("DEBUG MESSAGE: %s", msg);
}
void MacroAssembler::push_call_clobbered_registers() {
int step = 4 * wordSize;
! push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) WIN64_ONLY(- r18), sp);
sub(sp, sp, step);
mov(rscratch1, -step);
// Push v0-v7, v16-v31.
for (int i = 31; i>= 4; i -= 4) {
if (i <= v7->encoding() || i >= v16->encoding())
*** 2617,2628 ****
for (int i = 0; i < 32; i += 4) {
if (i <= v7->encoding() || i >= v16->encoding())
ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
}
!
! pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
}
void MacroAssembler::push_CPU_state(bool save_vectors) {
int step = (save_vectors ? 8 : 4) * wordSize;
push(0x3fffffff, sp); // integer registers except lr & sp
--- 2617,2627 ----
for (int i = 0; i < 32; i += 4) {
if (i <= v7->encoding() || i >= v16->encoding())
ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
}
! pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) WIN64_ONLY(- r18), sp);
}
void MacroAssembler::push_CPU_state(bool save_vectors) {
int step = (save_vectors ? 8 : 4) * wordSize;
push(0x3fffffff, sp); // integer registers except lr & sp
*** 2690,2712 ****
// Checks whether offset is aligned.
// Returns true if it is, else false.
bool MacroAssembler::merge_alignment_check(Register base,
size_t size,
! long cur_offset,
! long prev_offset) const {
if (AvoidUnalignedAccesses) {
if (base == sp) {
// Checks whether low offset if aligned to pair of registers.
! long pair_mask = size * 2 - 1;
! long offset = prev_offset > cur_offset ? cur_offset : prev_offset;
return (offset & pair_mask) == 0;
} else { // If base is not sp, we can't guarantee the access is aligned.
return false;
}
} else {
! long mask = size - 1;
// Load/store pair instruction only supports element size aligned offset.
return (cur_offset & mask) == 0 && (prev_offset & mask) == 0;
}
}
--- 2689,2711 ----
// Checks whether offset is aligned.
// Returns true if it is, else false.
bool MacroAssembler::merge_alignment_check(Register base,
size_t size,
! int64_t cur_offset,
! int64_t prev_offset) const {
if (AvoidUnalignedAccesses) {
if (base == sp) {
// Checks whether low offset if aligned to pair of registers.
! int64_t pair_mask = size * 2 - 1;
! int64_t offset = prev_offset > cur_offset ? cur_offset : prev_offset;
return (offset & pair_mask) == 0;
} else { // If base is not sp, we can't guarantee the access is aligned.
return false;
}
} else {
! int64_t mask = size - 1;
// Load/store pair instruction only supports element size aligned offset.
return (cur_offset & mask) == 0 && (prev_offset & mask) == 0;
}
}
*** 2735,2756 ****
if (cur_size_in_bytes != prev_size_in_bytes || is_store != prev_ldst->is_store()) {
return false;
}
! long max_offset = 63 * prev_size_in_bytes;
! long min_offset = -64 * prev_size_in_bytes;
assert(prev_ldst->is_not_pre_post_index(), "pre-index or post-index is not supported to be merged.");
// Only same base can be merged.
if (adr.base() != prev_ldst->base()) {
return false;
}
! long cur_offset = adr.offset();
! long prev_offset = prev_ldst->offset();
size_t diff = abs(cur_offset - prev_offset);
if (diff != prev_size_in_bytes) {
return false;
}
--- 2734,2755 ----
if (cur_size_in_bytes != prev_size_in_bytes || is_store != prev_ldst->is_store()) {
return false;
}
! int64_t max_offset = 63 * prev_size_in_bytes;
! int64_t min_offset = -64 * prev_size_in_bytes;
assert(prev_ldst->is_not_pre_post_index(), "pre-index or post-index is not supported to be merged.");
// Only same base can be merged.
if (adr.base() != prev_ldst->base()) {
return false;
}
! int64_t cur_offset = adr.offset();
! int64_t prev_offset = prev_ldst->offset();
size_t diff = abs(cur_offset - prev_offset);
if (diff != prev_size_in_bytes) {
return false;
}
*** 2763,2773 ****
// If t1 and t2 is the same in "ldp t1, t2, [xn, #imm]", we'll get SIGILL.
if (!is_store && (adr.base() == prev_ldst->target() || rt == prev_ldst->target())) {
return false;
}
! long low_offset = prev_offset > cur_offset ? cur_offset : prev_offset;
// Offset range must be in ldp/stp instruction's range.
if (low_offset > max_offset || low_offset < min_offset) {
return false;
}
--- 2762,2772 ----
// If t1 and t2 is the same in "ldp t1, t2, [xn, #imm]", we'll get SIGILL.
if (!is_store && (adr.base() == prev_ldst->target() || rt == prev_ldst->target())) {
return false;
}
! int64_t low_offset = prev_offset > cur_offset ? cur_offset : prev_offset;
// Offset range must be in ldp/stp instruction's range.
if (low_offset > max_offset || low_offset < min_offset) {
return false;
}
*** 2788,2798 ****
Register rt_low, rt_high;
address prev = pc() - NativeInstruction::instruction_size;
NativeLdSt* prev_ldst = NativeLdSt_at(prev);
! long offset;
if (adr.offset() < prev_ldst->offset()) {
offset = adr.offset();
rt_low = rt;
rt_high = prev_ldst->target();
--- 2787,2797 ----
Register rt_low, rt_high;
address prev = pc() - NativeInstruction::instruction_size;
NativeLdSt* prev_ldst = NativeLdSt_at(prev);
! int64_t offset;
if (adr.offset() < prev_ldst->offset()) {
offset = adr.offset();
rt_low = rt;
rt_high = prev_ldst->target();
*** 3035,3050 ****
const Register product = xlen;
const Register x_xstart = zlen; // reuse register
// First Loop.
//
! // final static long LONG_MASK = 0xffffffffL;
// int xstart = xlen - 1;
// int ystart = ylen - 1;
! // long carry = 0;
// for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
! // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
// z[kdx] = (int)product;
// carry = product >>> 32;
// }
// z[xstart] = (int)carry;
//
--- 3034,3049 ----
const Register product = xlen;
const Register x_xstart = zlen; // reuse register
// First Loop.
//
! // final static int64_t LONG_MASK = 0xffffffffL;
// int xstart = xlen - 1;
// int ystart = ylen - 1;
! // int64_t carry = 0;
// for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
! // int64_t product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
// z[kdx] = (int)product;
// carry = product >>> 32;
// }
// z[xstart] = (int)carry;
//
*** 3078,3088 ****
// Second and third (nested) loops.
//
// for (int i = xstart-1; i >= 0; i--) { // Second loop
// carry = 0;
// for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
! // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
// (z[k] & LONG_MASK) + carry;
// z[k] = (int)product;
// carry = product >>> 32;
// }
// z[i] = (int)carry;
--- 3077,3087 ----
// Second and third (nested) loops.
//
// for (int i = xstart-1; i >= 0; i--) { // Second loop
// carry = 0;
// for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
! // int64_t product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
// (z[k] & LONG_MASK) + carry;
// z[k] = (int)product;
// carry = product >>> 32;
// }
// z[i] = (int)carry;
*** 3334,3344 ****
*/
void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
Register table0, Register table1, Register table2, Register table3,
Register tmp, Register tmp2, Register tmp3) {
Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
! unsigned long offset;
if (UseCRC32) {
kernel_crc32_using_crc32(crc, buf, len, table0, table1, table2, table3);
return;
}
--- 3333,3343 ----
*/
void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
Register table0, Register table1, Register table2, Register table3,
Register tmp, Register tmp2, Register tmp3) {
Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
! uint64_t offset;
if (UseCRC32) {
kernel_crc32_using_crc32(crc, buf, len, table0, table1, table2, table3);
return;
}
*** 3636,3646 ****
SkipIfEqual::SkipIfEqual(
MacroAssembler* masm, const bool* flag_addr, bool value) {
_masm = masm;
! unsigned long offset;
_masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset);
_masm->ldrb(rscratch1, Address(rscratch1, offset));
_masm->cbzw(rscratch1, _label);
}
--- 3635,3645 ----
SkipIfEqual::SkipIfEqual(
MacroAssembler* masm, const bool* flag_addr, bool value) {
_masm = masm;
! uint64_t offset;
_masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset);
_masm->ldrb(rscratch1, Address(rscratch1, offset));
_masm->cbzw(rscratch1, _label);
}
*** 3665,3675 ****
add(rscratch1, rscratch1, src);
str(rscratch1, adr);
}
void MacroAssembler::cmpptr(Register src1, Address src2) {
! unsigned long offset;
adrp(rscratch1, src2, offset);
ldr(rscratch1, Address(rscratch1, offset));
cmp(src1, rscratch1);
}
--- 3664,3674 ----
add(rscratch1, rscratch1, src);
str(rscratch1, adr);
}
void MacroAssembler::cmpptr(Register src1, Address src2) {
! uint64_t offset;
adrp(rscratch1, src2, offset);
ldr(rscratch1, Address(rscratch1, offset));
cmp(src1, rscratch1);
}
*** 4327,4343 ****
code_section()->relocate(inst_mark(), rtype);
ldrw(zr, Address(r, 0));
return inst_mark();
}
! void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
relocInfo::relocType rtype = dest.rspec().reloc()->type();
! unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12;
! unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12;
! unsigned long dest_page = (unsigned long)dest.target() >> 12;
! long offset_low = dest_page - low_page;
! long offset_high = dest_page - high_page;
assert(is_valid_AArch64_address(dest.target()), "bad address");
assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
InstructionMark im(this);
--- 4326,4342 ----
code_section()->relocate(inst_mark(), rtype);
ldrw(zr, Address(r, 0));
return inst_mark();
}
! void MacroAssembler::adrp(Register reg1, const Address &dest, uint64_t &byte_offset) {
relocInfo::relocType rtype = dest.rspec().reloc()->type();
! uint64_t low_page = (uint64_t)CodeCache::low_bound() >> 12;
! uint64_t high_page = (uint64_t)(CodeCache::high_bound() - 1) >> 12;
! uint64_t dest_page = (uint64_t)dest.target() >> 12;
! int64_t offset_low = dest_page - low_page;
! int64_t offset_high = dest_page - high_page;
assert(is_valid_AArch64_address(dest.target()), "bad address");
assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
InstructionMark im(this);
*** 4345,4372 ****
// 8143067: Ensure that the adrp can reach the dest from anywhere within
// the code cache so that if it is relocated we know it will still reach
if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
_adrp(reg1, dest.target());
} else {
! unsigned long target = (unsigned long)dest.target();
! unsigned long adrp_target
! = (target & 0xffffffffUL) | ((unsigned long)pc() & 0xffff00000000UL);
_adrp(reg1, (address)adrp_target);
movk(reg1, target >> 32, 32);
}
! byte_offset = (unsigned long)dest.target() & 0xfff;
}
void MacroAssembler::load_byte_map_base(Register reg) {
CardTable::CardValue* byte_map_base =
((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
if (is_valid_AArch64_address((address)byte_map_base)) {
// Strictly speaking the byte_map_base isn't an address at all,
// and it might even be negative.
! unsigned long offset;
adrp(reg, ExternalAddress((address)byte_map_base), offset);
// We expect offset to be zero with most collectors.
if (offset != 0) {
add(reg, reg, offset);
}
--- 4344,4371 ----
// 8143067: Ensure that the adrp can reach the dest from anywhere within
// the code cache so that if it is relocated we know it will still reach
if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
_adrp(reg1, dest.target());
} else {
! uint64_t target = (uint64_t)dest.target();
! uint64_t adrp_target
! = (target & 0xffffffffUL) | ((uint64_t)pc() & 0xffff00000000UL);
_adrp(reg1, (address)adrp_target);
movk(reg1, target >> 32, 32);
}
! byte_offset = (uint64_t)dest.target() & 0xfff;
}
void MacroAssembler::load_byte_map_base(Register reg) {
CardTable::CardValue* byte_map_base =
((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
if (is_valid_AArch64_address((address)byte_map_base)) {
// Strictly speaking the byte_map_base isn't an address at all,
// and it might even be negative.
! uint64_t offset;
adrp(reg, ExternalAddress((address)byte_map_base), offset);
// We expect offset to be zero with most collectors.
if (offset != 0) {
add(reg, reg, offset);
}
*** 4407,4417 ****
}
ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
}
}
-
// This method checks if provided byte array contains byte with highest bit set.
void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
// Simple and most common case of aligned small array which is not at the
// end of memory page is placed here. All other cases are in stub.
Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
--- 4406,4415 ----
*** 4805,4815 ****
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Immediate count in HeapWords.
#define SmallArraySize (18 * BytesPerLong)
! void MacroAssembler::zero_words(Register base, u_int64_t cnt)
{
BLOCK_COMMENT("zero_words {");
int i = cnt & 1; // store any odd word to start
if (i) str(zr, Address(base));
--- 4803,4813 ----
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Immediate count in HeapWords.
#define SmallArraySize (18 * BytesPerLong)
! void MacroAssembler::zero_words(Register base, uint64_t cnt)
{
BLOCK_COMMENT("zero_words {");
int i = cnt & 1; // store any odd word to start
if (i) str(zr, Address(base));
< prev index next >