5523 #endif 5524 BIND(NEXT_1); 5525 ldrh(tmp1, Address(post(src, 2))); 5526 tst(tmp1, 0xff00); 5527 br(NE, DONE); 5528 strb(tmp1, Address(post(dst, 1))); 5529 subs(len, len, 1); 5530 br(GT, NEXT_1); 5531 5532 BIND(DONE); 5533 sub(result, result, len); // Return index where we stopped 5534 // Return len == 0 if we processed all 5535 // characters 5536 } 5537 5538 5539 // Inflate byte[] array to char[]. 5540 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, 5541 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, 5542 Register tmp4) { 5543 Label big, done; 5544 5545 assert_different_registers(src, dst, len, tmp4, rscratch1); 5546 5547 fmovd(vtmp1 , zr); 5548 lsrw(rscratch1, len, 3); 5549 5550 cbnzw(rscratch1, big); 5551 5552 // Short string: less than 8 bytes. 5553 { 5554 Label loop, around, tiny; 5555 5556 subsw(len, len, 4); 5557 andw(len, len, 3); 5558 br(LO, tiny); 5559 5560 // Use SIMD to do 4 bytes. 5561 ldrs(vtmp2, post(src, 4)); 5562 zip1(vtmp3, T8B, vtmp2, vtmp1); 5563 strd(vtmp3, post(dst, 8)); 5564 5565 cbzw(len, done); 5566 5567 // Do the remaining bytes by steam. 5568 bind(loop); 5569 ldrb(tmp4, post(src, 1)); 5570 strh(tmp4, post(dst, 2)); 5571 subw(len, len, 1); 5572 5573 bind(tiny); 5574 cbnz(len, loop); 5575 5576 bind(around); 5577 b(done); 5578 } 5579 5580 // Unpack the bytes 8 at a time. 5581 bind(big); 5582 andw(len, len, 7); 5583 5584 { 5585 Label loop, around; 5586 5587 bind(loop); 5588 ldrd(vtmp2, post(src, 8)); 5589 sub(rscratch1, rscratch1, 1); 5590 zip1(vtmp3, T16B, vtmp2, vtmp1); 5591 st1(vtmp3, T8H, post(dst, 16)); 5592 cbnz(rscratch1, loop); 5593 5594 bind(around); 5595 } 5596 5597 // Do the tail of up to 8 bytes. 5598 sub(src, src, 8); 5599 add(src, src, len, ext::uxtw, 0); 5600 ldrd(vtmp2, Address(src)); 5601 sub(dst, dst, 16); 5602 add(dst, dst, len, ext::uxtw, 1); 5603 zip1(vtmp3, T16B, vtmp2, vtmp1); 5604 st1(vtmp3, T8H, Address(dst)); 5605 5606 bind(done); 5607 } 5608 5609 // Compress char[] array to byte[]. 5610 void MacroAssembler::char_array_compress(Register src, Register dst, Register len, 5611 FloatRegister tmp1Reg, FloatRegister tmp2Reg, 5612 FloatRegister tmp3Reg, FloatRegister tmp4Reg, 5613 Register result) { 5614 encode_iso_array(src, dst, len, result, 5615 tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg); 5616 cmp(len, zr); 5617 csel(result, result, zr, EQ); 5618 } 5619 5620 // get_thread() can be called anywhere inside generated code so we 5621 // need to save whatever non-callee save context might get clobbered 5622 // by the call to JavaThread::aarch64_get_thread_helper() or, indeed, 5623 // the call setup code. 5624 // | 5523 #endif 5524 BIND(NEXT_1); 5525 ldrh(tmp1, Address(post(src, 2))); 5526 tst(tmp1, 0xff00); 5527 br(NE, DONE); 5528 strb(tmp1, Address(post(dst, 1))); 5529 subs(len, len, 1); 5530 br(GT, NEXT_1); 5531 5532 BIND(DONE); 5533 sub(result, result, len); // Return index where we stopped 5534 // Return len == 0 if we processed all 5535 // characters 5536 } 5537 5538 5539 // Inflate byte[] array to char[]. 5540 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, 5541 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, 5542 Register tmp4) { 5543 Label big, done, after_init, to_stub; 5544 5545 assert_different_registers(src, dst, len, tmp4, rscratch1); 5546 5547 fmovd(vtmp1, zr); 5548 lsrw(tmp4, len, 3); 5549 bind(after_init); 5550 cbnzw(tmp4, big); 5551 // Short string: less than 8 bytes. 5552 { 5553 Label loop, tiny; 5554 5555 cmpw(len, 4); 5556 br(LT, tiny); 5557 // Use SIMD to do 4 bytes. 5558 ldrs(vtmp2, post(src, 4)); 5559 zip1(vtmp3, T8B, vtmp2, vtmp1); 5560 subw(len, len, 4); 5561 strd(vtmp3, post(dst, 8)); 5562 5563 cbzw(len, done); 5564 5565 // Do the remaining bytes by steam. 5566 bind(loop); 5567 ldrb(tmp4, post(src, 1)); 5568 strh(tmp4, post(dst, 2)); 5569 subw(len, len, 1); 5570 5571 bind(tiny); 5572 cbnz(len, loop); 5573 5574 b(done); 5575 } 5576 5577 if (SoftwarePrefetchHintDistance >= 0) { 5578 bind(to_stub); 5579 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate()); 5580 assert(stub.target() != NULL, "large_byte_array_inflate stub has not been generated"); 5581 trampoline_call(stub); 5582 b(after_init); 5583 } 5584 5585 // Unpack the bytes 8 at a time. 5586 bind(big); 5587 { 5588 Label loop, around, loop_last, loop_start; 5589 5590 if (SoftwarePrefetchHintDistance >= 0) { 5591 const int large_loop_threshold = (64 + 16)/8; 5592 ldrd(vtmp2, post(src, 8)); 5593 andw(len, len, 7); 5594 cmp(tmp4, large_loop_threshold); 5595 br(GE, to_stub); 5596 b(loop_start); 5597 5598 bind(loop); 5599 ldrd(vtmp2, post(src, 8)); 5600 bind(loop_start); 5601 subs(tmp4, tmp4, 1); 5602 br(EQ, loop_last); 5603 zip1(vtmp2, T16B, vtmp2, vtmp1); 5604 ldrd(vtmp3, post(src, 8)); 5605 st1(vtmp2, T8H, post(dst, 16)); 5606 subs(tmp4, tmp4, 1); 5607 zip1(vtmp3, T16B, vtmp3, vtmp1); 5608 st1(vtmp3, T8H, post(dst, 16)); 5609 br(NE, loop); 5610 b(around); 5611 bind(loop_last); 5612 zip1(vtmp2, T16B, vtmp2, vtmp1); 5613 st1(vtmp2, T8H, post(dst, 16)); 5614 bind(around); 5615 cbz(len, done); 5616 } else { 5617 andw(len, len, 7); 5618 bind(loop); 5619 ldrd(vtmp2, post(src, 8)); 5620 sub(tmp4, tmp4, 1); 5621 zip1(vtmp3, T16B, vtmp2, vtmp1); 5622 st1(vtmp3, T8H, post(dst, 16)); 5623 cbnz(tmp4, loop); 5624 } 5625 } 5626 5627 // Do the tail of up to 8 bytes. 5628 add(src, src, len); 5629 ldrd(vtmp3, Address(src, -8)); 5630 add(dst, dst, len, ext::uxtw, 1); 5631 zip1(vtmp3, T16B, vtmp3, vtmp1); 5632 strq(vtmp3, Address(dst, -16)); 5633 5634 bind(done); 5635 } 5636 5637 // Compress char[] array to byte[]. 5638 void MacroAssembler::char_array_compress(Register src, Register dst, Register len, 5639 FloatRegister tmp1Reg, FloatRegister tmp2Reg, 5640 FloatRegister tmp3Reg, FloatRegister tmp4Reg, 5641 Register result) { 5642 encode_iso_array(src, dst, len, result, 5643 tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg); 5644 cmp(len, zr); 5645 csel(result, result, zr, EQ); 5646 } 5647 5648 // get_thread() can be called anywhere inside generated code so we 5649 // need to save whatever non-callee save context might get clobbered 5650 // by the call to JavaThread::aarch64_get_thread_helper() or, indeed, 5651 // the call setup code. 5652 // |