< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page

        

*** 5538,5567 **** // Inflate byte[] array to char[]. void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, Register tmp4) { ! Label big, done; assert_different_registers(src, dst, len, tmp4, rscratch1); ! fmovd(vtmp1 , zr); ! lsrw(rscratch1, len, 3); ! ! cbnzw(rscratch1, big); ! // Short string: less than 8 bytes. { ! Label loop, around, tiny; ! ! subsw(len, len, 4); ! andw(len, len, 3); ! br(LO, tiny); // Use SIMD to do 4 bytes. ldrs(vtmp2, post(src, 4)); zip1(vtmp3, T8B, vtmp2, vtmp1); strd(vtmp3, post(dst, 8)); cbzw(len, done); // Do the remaining bytes by steam. --- 5538,5565 ---- // Inflate byte[] array to char[]. void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, Register tmp4) { ! Label big, done, after_init, to_stub; assert_different_registers(src, dst, len, tmp4, rscratch1); ! fmovd(vtmp1, zr); ! lsrw(tmp4, len, 3); ! bind(after_init); ! cbnzw(tmp4, big); // Short string: less than 8 bytes. { ! Label loop, tiny; + cmpw(len, 4); + br(LT, tiny); // Use SIMD to do 4 bytes. ldrs(vtmp2, post(src, 4)); zip1(vtmp3, T8B, vtmp2, vtmp1); + subw(len, len, 4); strd(vtmp3, post(dst, 8)); cbzw(len, done); // Do the remaining bytes by steam.
*** 5571,5609 **** subw(len, len, 1); bind(tiny); cbnz(len, loop); - bind(around); b(done); } // Unpack the bytes 8 at a time. bind(big); - andw(len, len, 7); - { ! Label loop, around; bind(loop); ldrd(vtmp2, post(src, 8)); ! sub(rscratch1, rscratch1, 1); ! zip1(vtmp3, T16B, vtmp2, vtmp1); st1(vtmp3, T8H, post(dst, 16)); ! cbnz(rscratch1, loop); ! bind(around); } // Do the tail of up to 8 bytes. ! sub(src, src, 8); ! add(src, src, len, ext::uxtw, 0); ! ldrd(vtmp2, Address(src)); ! sub(dst, dst, 16); add(dst, dst, len, ext::uxtw, 1); ! zip1(vtmp3, T16B, vtmp2, vtmp1); ! st1(vtmp3, T8H, Address(dst)); bind(done); } // Compress char[] array to byte[]. --- 5569,5637 ---- subw(len, len, 1); bind(tiny); cbnz(len, loop); b(done); } + if (SoftwarePrefetchHintDistance >= 0) { + bind(to_stub); + RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate()); + assert(stub.target() != NULL, "large_byte_array_inflate stub has not been generated"); + trampoline_call(stub); + b(after_init); + } + // Unpack the bytes 8 at a time. bind(big); { ! Label loop, around, loop_last, loop_start; ! ! if (SoftwarePrefetchHintDistance >= 0) { ! const int large_loop_threshold = (64 + 16)/8; ! ldrd(vtmp2, post(src, 8)); ! andw(len, len, 7); ! cmp(tmp4, large_loop_threshold); ! br(GE, to_stub); ! b(loop_start); bind(loop); ldrd(vtmp2, post(src, 8)); ! bind(loop_start); ! subs(tmp4, tmp4, 1); ! br(EQ, loop_last); ! zip1(vtmp2, T16B, vtmp2, vtmp1); ! ldrd(vtmp3, post(src, 8)); ! st1(vtmp2, T8H, post(dst, 16)); ! subs(tmp4, tmp4, 1); ! zip1(vtmp3, T16B, vtmp3, vtmp1); st1(vtmp3, T8H, post(dst, 16)); ! br(NE, loop); ! b(around); ! bind(loop_last); ! zip1(vtmp2, T16B, vtmp2, vtmp1); ! st1(vtmp2, T8H, post(dst, 16)); bind(around); + cbz(len, done); + } else { + andw(len, len, 7); + bind(loop); + ldrd(vtmp2, post(src, 8)); + sub(tmp4, tmp4, 1); + zip1(vtmp3, T16B, vtmp2, vtmp1); + st1(vtmp3, T8H, post(dst, 16)); + cbnz(tmp4, loop); + } } // Do the tail of up to 8 bytes. ! add(src, src, len); ! ldrd(vtmp3, Address(src, -8)); add(dst, dst, len, ext::uxtw, 1); ! zip1(vtmp3, T16B, vtmp3, vtmp1); ! strq(vtmp3, Address(dst, -16)); bind(done); } // Compress char[] array to byte[].
< prev index next >