< prev index next >
src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Print this page
*** 5538,5567 ****
// Inflate byte[] array to char[].
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
Register tmp4) {
! Label big, done;
assert_different_registers(src, dst, len, tmp4, rscratch1);
! fmovd(vtmp1 , zr);
! lsrw(rscratch1, len, 3);
!
! cbnzw(rscratch1, big);
!
// Short string: less than 8 bytes.
{
! Label loop, around, tiny;
!
! subsw(len, len, 4);
! andw(len, len, 3);
! br(LO, tiny);
// Use SIMD to do 4 bytes.
ldrs(vtmp2, post(src, 4));
zip1(vtmp3, T8B, vtmp2, vtmp1);
strd(vtmp3, post(dst, 8));
cbzw(len, done);
// Do the remaining bytes by steam.
--- 5538,5565 ----
// Inflate byte[] array to char[].
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
Register tmp4) {
! Label big, done, after_init, to_stub;
assert_different_registers(src, dst, len, tmp4, rscratch1);
! fmovd(vtmp1, zr);
! lsrw(tmp4, len, 3);
! bind(after_init);
! cbnzw(tmp4, big);
// Short string: less than 8 bytes.
{
! Label loop, tiny;
+ cmpw(len, 4);
+ br(LT, tiny);
// Use SIMD to do 4 bytes.
ldrs(vtmp2, post(src, 4));
zip1(vtmp3, T8B, vtmp2, vtmp1);
+ subw(len, len, 4);
strd(vtmp3, post(dst, 8));
cbzw(len, done);
// Do the remaining bytes by steam.
*** 5571,5609 ****
subw(len, len, 1);
bind(tiny);
cbnz(len, loop);
- bind(around);
b(done);
}
// Unpack the bytes 8 at a time.
bind(big);
- andw(len, len, 7);
-
{
! Label loop, around;
bind(loop);
ldrd(vtmp2, post(src, 8));
! sub(rscratch1, rscratch1, 1);
! zip1(vtmp3, T16B, vtmp2, vtmp1);
st1(vtmp3, T8H, post(dst, 16));
! cbnz(rscratch1, loop);
!
bind(around);
}
// Do the tail of up to 8 bytes.
! sub(src, src, 8);
! add(src, src, len, ext::uxtw, 0);
! ldrd(vtmp2, Address(src));
! sub(dst, dst, 16);
add(dst, dst, len, ext::uxtw, 1);
! zip1(vtmp3, T16B, vtmp2, vtmp1);
! st1(vtmp3, T8H, Address(dst));
bind(done);
}
// Compress char[] array to byte[].
--- 5569,5637 ----
subw(len, len, 1);
bind(tiny);
cbnz(len, loop);
b(done);
}
+ if (SoftwarePrefetchHintDistance >= 0) {
+ bind(to_stub);
+ RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate());
+ assert(stub.target() != NULL, "large_byte_array_inflate stub has not been generated");
+ trampoline_call(stub);
+ b(after_init);
+ }
+
// Unpack the bytes 8 at a time.
bind(big);
{
! Label loop, around, loop_last, loop_start;
!
! if (SoftwarePrefetchHintDistance >= 0) {
! const int large_loop_threshold = (64 + 16)/8;
! ldrd(vtmp2, post(src, 8));
! andw(len, len, 7);
! cmp(tmp4, large_loop_threshold);
! br(GE, to_stub);
! b(loop_start);
bind(loop);
ldrd(vtmp2, post(src, 8));
! bind(loop_start);
! subs(tmp4, tmp4, 1);
! br(EQ, loop_last);
! zip1(vtmp2, T16B, vtmp2, vtmp1);
! ldrd(vtmp3, post(src, 8));
! st1(vtmp2, T8H, post(dst, 16));
! subs(tmp4, tmp4, 1);
! zip1(vtmp3, T16B, vtmp3, vtmp1);
st1(vtmp3, T8H, post(dst, 16));
! br(NE, loop);
! b(around);
! bind(loop_last);
! zip1(vtmp2, T16B, vtmp2, vtmp1);
! st1(vtmp2, T8H, post(dst, 16));
bind(around);
+ cbz(len, done);
+ } else {
+ andw(len, len, 7);
+ bind(loop);
+ ldrd(vtmp2, post(src, 8));
+ sub(tmp4, tmp4, 1);
+ zip1(vtmp3, T16B, vtmp2, vtmp1);
+ st1(vtmp3, T8H, post(dst, 16));
+ cbnz(tmp4, loop);
+ }
}
// Do the tail of up to 8 bytes.
! add(src, src, len);
! ldrd(vtmp3, Address(src, -8));
add(dst, dst, len, ext::uxtw, 1);
! zip1(vtmp3, T16B, vtmp3, vtmp1);
! strq(vtmp3, Address(dst, -16));
bind(done);
}
// Compress char[] array to byte[].
< prev index next >