5633 #ifndef BUILTIN_SIM
5634 cmp(len, (u1)8); // handle shortest strings first
5635 br(LT, LOOP_1);
5636 cmp(len, (u1)32);
5637 br(LT, NEXT_8);
5638 // The following code uses the SIMD 'uzp1' and 'uzp2' instructions
5639 // to convert chars to bytes
5640 if (SoftwarePrefetchHintDistance >= 0) {
5641 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5642 subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
5643 br(LE, NEXT_32_START);
5644 b(NEXT_32_PRFM_START);
5645 BIND(NEXT_32_PRFM);
5646 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5647 BIND(NEXT_32_PRFM_START);
5648 prfm(Address(src, SoftwarePrefetchHintDistance));
5649 orr(v4, T16B, Vtmp1, Vtmp2);
5650 orr(v5, T16B, Vtmp3, Vtmp4);
5651 uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
5652 uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);
5653 stpq(Vtmp1, Vtmp3, dst);
5654 uzp2(v5, T16B, v4, v5); // high bytes
5655 umov(tmp2, v5, D, 1);
5656 fmovd(tmp1, v5);
5657 orr(tmp1, tmp1, tmp2);
5658 cbnz(tmp1, LOOP_8);
5659 sub(len, len, 32);
5660 add(dst, dst, 32);
5661 add(src, src, 64);
5662 subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
5663 br(GE, NEXT_32_PRFM);
5664 cmp(len, (u1)32);
5665 br(LT, LOOP_8);
5666 BIND(NEXT_32);
5667 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5668 BIND(NEXT_32_START);
5669 } else {
5670 BIND(NEXT_32);
5671 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5672 }
5673 prfm(Address(src, SoftwarePrefetchHintDistance));
5674 uzp1(v4, T16B, Vtmp1, Vtmp2);
5675 uzp1(v5, T16B, Vtmp3, Vtmp4);
5676 stpq(v4, v5, dst);
5677 orr(Vtmp1, T16B, Vtmp1, Vtmp2);
5678 orr(Vtmp3, T16B, Vtmp3, Vtmp4);
5679 uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
5680 umov(tmp2, Vtmp1, D, 1);
5681 fmovd(tmp1, Vtmp1);
5682 orr(tmp1, tmp1, tmp2);
5683 cbnz(tmp1, LOOP_8);
5684 sub(len, len, 32);
5685 add(dst, dst, 32);
5686 add(src, src, 64);
5687 cmp(len, (u1)32);
5688 br(GE, NEXT_32);
5689 cbz(len, DONE);
5690
5691 BIND(LOOP_8);
5692 cmp(len, (u1)8);
5693 br(LT, LOOP_1);
5694 BIND(NEXT_8);
5695 ld1(Vtmp1, T8H, src);
5696 uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes
5697 uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes
5698 strd(Vtmp2, dst);
5699 fmovd(tmp1, Vtmp3);
5700 cbnz(tmp1, NEXT_1);
5701
5702 sub(len, len, 8);
5703 add(dst, dst, 8);
5704 add(src, src, 16);
5705 cmp(len, (u1)8);
5706 br(GE, NEXT_8);
5707
5708 BIND(LOOP_1);
5709 #endif
5710 cbz(len, DONE);
5711 BIND(NEXT_1);
5712 ldrh(tmp1, Address(post(src, 2)));
5713 strb(tmp1, Address(post(dst, 1)));
5714 tst(tmp1, 0xff00);
5715 br(NE, SET_RESULT);
5716 subs(len, len, 1);
5717 br(GT, NEXT_1);
5718
5719 BIND(SET_RESULT);
5720 sub(result, result, len); // Return index where we stopped
5721 // Return len == 0 if we processed all
5722 // characters
5723 BIND(DONE);
5724 }
5725
5726
5727 // Inflate byte[] array to char[].
5728 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
5729 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
5730 Register tmp4) {
5731 Label big, done, after_init, to_stub;
5732
5733 assert_different_registers(src, dst, len, tmp4, rscratch1);
5734
5735 fmovd(vtmp1, zr);
|
5633 #ifndef BUILTIN_SIM
5634 cmp(len, (u1)8); // handle shortest strings first
5635 br(LT, LOOP_1);
5636 cmp(len, (u1)32);
5637 br(LT, NEXT_8);
5638 // The following code uses the SIMD 'uzp1' and 'uzp2' instructions
5639 // to convert chars to bytes
5640 if (SoftwarePrefetchHintDistance >= 0) {
5641 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5642 subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
5643 br(LE, NEXT_32_START);
5644 b(NEXT_32_PRFM_START);
5645 BIND(NEXT_32_PRFM);
5646 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5647 BIND(NEXT_32_PRFM_START);
5648 prfm(Address(src, SoftwarePrefetchHintDistance));
5649 orr(v4, T16B, Vtmp1, Vtmp2);
5650 orr(v5, T16B, Vtmp3, Vtmp4);
5651 uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
5652 uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);
5653 uzp2(v5, T16B, v4, v5); // high bytes
5654 umov(tmp2, v5, D, 1);
5655 fmovd(tmp1, v5);
5656 orr(tmp1, tmp1, tmp2);
5657 cbnz(tmp1, LOOP_8);
5658 stpq(Vtmp1, Vtmp3, dst);
5659 sub(len, len, 32);
5660 add(dst, dst, 32);
5661 add(src, src, 64);
5662 subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
5663 br(GE, NEXT_32_PRFM);
5664 cmp(len, (u1)32);
5665 br(LT, LOOP_8);
5666 BIND(NEXT_32);
5667 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5668 BIND(NEXT_32_START);
5669 } else {
5670 BIND(NEXT_32);
5671 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5672 }
5673 prfm(Address(src, SoftwarePrefetchHintDistance));
5674 uzp1(v4, T16B, Vtmp1, Vtmp2);
5675 uzp1(v5, T16B, Vtmp3, Vtmp4);
5676 orr(Vtmp1, T16B, Vtmp1, Vtmp2);
5677 orr(Vtmp3, T16B, Vtmp3, Vtmp4);
5678 uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
5679 umov(tmp2, Vtmp1, D, 1);
5680 fmovd(tmp1, Vtmp1);
5681 orr(tmp1, tmp1, tmp2);
5682 cbnz(tmp1, LOOP_8);
5683 stpq(v4, v5, dst);
5684 sub(len, len, 32);
5685 add(dst, dst, 32);
5686 add(src, src, 64);
5687 cmp(len, (u1)32);
5688 br(GE, NEXT_32);
5689 cbz(len, DONE);
5690
5691 BIND(LOOP_8);
5692 cmp(len, (u1)8);
5693 br(LT, LOOP_1);
5694 BIND(NEXT_8);
5695 ld1(Vtmp1, T8H, src);
5696 uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes
5697 uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes
5698 fmovd(tmp1, Vtmp3);
5699 cbnz(tmp1, NEXT_1);
5700 strd(Vtmp2, dst);
5701
5702 sub(len, len, 8);
5703 add(dst, dst, 8);
5704 add(src, src, 16);
5705 cmp(len, (u1)8);
5706 br(GE, NEXT_8);
5707
5708 BIND(LOOP_1);
5709 #endif
5710 cbz(len, DONE);
5711 BIND(NEXT_1);
5712 ldrh(tmp1, Address(post(src, 2)));
5713 tst(tmp1, 0xff00);
5714 br(NE, SET_RESULT);
5715 strb(tmp1, Address(post(dst, 1)));
5716 subs(len, len, 1);
5717 br(GT, NEXT_1);
5718
5719 BIND(SET_RESULT);
5720 sub(result, result, len); // Return index where we stopped
5721 // Return len == 0 if we processed all
5722 // characters
5723 BIND(DONE);
5724 }
5725
5726
5727 // Inflate byte[] array to char[].
5728 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
5729 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
5730 Register tmp4) {
5731 Label big, done, after_init, to_stub;
5732
5733 assert_different_registers(src, dst, len, tmp4, rscratch1);
5734
5735 fmovd(vtmp1, zr);
|