< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page
rev 52936 : 8215202: AArch64: jtreg test test/jdk/sun/nio/cs/FindEncoderBugs.java fails
Reviewed-by: duke
Contributed-by: nick.gasson@arm.com


5633 #ifndef BUILTIN_SIM
5634       cmp(len, (u1)8); // handle shortest strings first
5635       br(LT, LOOP_1);
5636       cmp(len, (u1)32);
5637       br(LT, NEXT_8);
5638       // The following code uses the SIMD 'uzp1' and 'uzp2' instructions
5639       // to convert chars to bytes
5640       if (SoftwarePrefetchHintDistance >= 0) {
5641         ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5642         subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
5643         br(LE, NEXT_32_START);
5644         b(NEXT_32_PRFM_START);
5645         BIND(NEXT_32_PRFM);
5646           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5647         BIND(NEXT_32_PRFM_START);
5648           prfm(Address(src, SoftwarePrefetchHintDistance));
5649           orr(v4, T16B, Vtmp1, Vtmp2);
5650           orr(v5, T16B, Vtmp3, Vtmp4);
5651           uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
5652           uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);
5653           stpq(Vtmp1, Vtmp3, dst);
5654           uzp2(v5, T16B, v4, v5); // high bytes
5655           umov(tmp2, v5, D, 1);
5656           fmovd(tmp1, v5);
5657           orr(tmp1, tmp1, tmp2);
5658           cbnz(tmp1, LOOP_8);

5659           sub(len, len, 32);
5660           add(dst, dst, 32);
5661           add(src, src, 64);
5662           subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
5663           br(GE, NEXT_32_PRFM);
5664           cmp(len, (u1)32);
5665           br(LT, LOOP_8);
5666         BIND(NEXT_32);
5667           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5668         BIND(NEXT_32_START);
5669       } else {
5670         BIND(NEXT_32);
5671           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5672       }
5673       prfm(Address(src, SoftwarePrefetchHintDistance));
5674       uzp1(v4, T16B, Vtmp1, Vtmp2);
5675       uzp1(v5, T16B, Vtmp3, Vtmp4);
5676       stpq(v4, v5, dst);
5677       orr(Vtmp1, T16B, Vtmp1, Vtmp2);
5678       orr(Vtmp3, T16B, Vtmp3, Vtmp4);
5679       uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
5680       umov(tmp2, Vtmp1, D, 1);
5681       fmovd(tmp1, Vtmp1);
5682       orr(tmp1, tmp1, tmp2);
5683       cbnz(tmp1, LOOP_8);

5684       sub(len, len, 32);
5685       add(dst, dst, 32);
5686       add(src, src, 64);
5687       cmp(len, (u1)32);
5688       br(GE, NEXT_32);
5689       cbz(len, DONE);
5690 
5691     BIND(LOOP_8);
5692       cmp(len, (u1)8);
5693       br(LT, LOOP_1);
5694     BIND(NEXT_8);
5695       ld1(Vtmp1, T8H, src);
5696       uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes
5697       uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes
5698       strd(Vtmp2, dst);
5699       fmovd(tmp1, Vtmp3);
5700       cbnz(tmp1, NEXT_1);

5701 
5702       sub(len, len, 8);
5703       add(dst, dst, 8);
5704       add(src, src, 16);
5705       cmp(len, (u1)8);
5706       br(GE, NEXT_8);
5707 
5708     BIND(LOOP_1);
5709 #endif
5710     cbz(len, DONE);
5711     BIND(NEXT_1);
5712       ldrh(tmp1, Address(post(src, 2)));
5713       strb(tmp1, Address(post(dst, 1)));
5714       tst(tmp1, 0xff00);
5715       br(NE, SET_RESULT);

5716       subs(len, len, 1);
5717       br(GT, NEXT_1);
5718 
5719     BIND(SET_RESULT);
5720       sub(result, result, len); // Return index where we stopped
5721                                 // Return len == 0 if we processed all
5722                                 // characters
5723     BIND(DONE);
5724 }
5725 
5726 
5727 // Inflate byte[] array to char[].
5728 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
5729                                         FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
5730                                         Register tmp4) {
5731   Label big, done, after_init, to_stub;
5732 
5733   assert_different_registers(src, dst, len, tmp4, rscratch1);
5734 
5735   fmovd(vtmp1, zr);




5633 #ifndef BUILTIN_SIM
5634       cmp(len, (u1)8); // handle shortest strings first
5635       br(LT, LOOP_1);
5636       cmp(len, (u1)32);
5637       br(LT, NEXT_8);
5638       // The following code uses the SIMD 'uzp1' and 'uzp2' instructions
5639       // to convert chars to bytes
5640       if (SoftwarePrefetchHintDistance >= 0) {
5641         ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5642         subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
5643         br(LE, NEXT_32_START);
5644         b(NEXT_32_PRFM_START);
5645         BIND(NEXT_32_PRFM);
5646           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5647         BIND(NEXT_32_PRFM_START);
5648           prfm(Address(src, SoftwarePrefetchHintDistance));
5649           orr(v4, T16B, Vtmp1, Vtmp2);
5650           orr(v5, T16B, Vtmp3, Vtmp4);
5651           uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
5652           uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);

5653           uzp2(v5, T16B, v4, v5); // high bytes
5654           umov(tmp2, v5, D, 1);
5655           fmovd(tmp1, v5);
5656           orr(tmp1, tmp1, tmp2);
5657           cbnz(tmp1, LOOP_8);
5658           stpq(Vtmp1, Vtmp3, dst);
5659           sub(len, len, 32);
5660           add(dst, dst, 32);
5661           add(src, src, 64);
5662           subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
5663           br(GE, NEXT_32_PRFM);
5664           cmp(len, (u1)32);
5665           br(LT, LOOP_8);
5666         BIND(NEXT_32);
5667           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5668         BIND(NEXT_32_START);
5669       } else {
5670         BIND(NEXT_32);
5671           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
5672       }
5673       prfm(Address(src, SoftwarePrefetchHintDistance));
5674       uzp1(v4, T16B, Vtmp1, Vtmp2);
5675       uzp1(v5, T16B, Vtmp3, Vtmp4);

5676       orr(Vtmp1, T16B, Vtmp1, Vtmp2);
5677       orr(Vtmp3, T16B, Vtmp3, Vtmp4);
5678       uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
5679       umov(tmp2, Vtmp1, D, 1);
5680       fmovd(tmp1, Vtmp1);
5681       orr(tmp1, tmp1, tmp2);
5682       cbnz(tmp1, LOOP_8);
5683       stpq(v4, v5, dst);
5684       sub(len, len, 32);
5685       add(dst, dst, 32);
5686       add(src, src, 64);
5687       cmp(len, (u1)32);
5688       br(GE, NEXT_32);
5689       cbz(len, DONE);
5690 
5691     BIND(LOOP_8);
5692       cmp(len, (u1)8);
5693       br(LT, LOOP_1);
5694     BIND(NEXT_8);
5695       ld1(Vtmp1, T8H, src);
5696       uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes
5697       uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes

5698       fmovd(tmp1, Vtmp3);
5699       cbnz(tmp1, NEXT_1);
5700       strd(Vtmp2, dst);
5701 
5702       sub(len, len, 8);
5703       add(dst, dst, 8);
5704       add(src, src, 16);
5705       cmp(len, (u1)8);
5706       br(GE, NEXT_8);
5707 
5708     BIND(LOOP_1);
5709 #endif
5710     cbz(len, DONE);
5711     BIND(NEXT_1);
5712       ldrh(tmp1, Address(post(src, 2)));

5713       tst(tmp1, 0xff00);
5714       br(NE, SET_RESULT);
5715       strb(tmp1, Address(post(dst, 1)));
5716       subs(len, len, 1);
5717       br(GT, NEXT_1);
5718 
5719     BIND(SET_RESULT);
5720       sub(result, result, len); // Return index where we stopped
5721                                 // Return len == 0 if we processed all
5722                                 // characters
5723     BIND(DONE);
5724 }
5725 
5726 
5727 // Inflate byte[] array to char[].
5728 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
5729                                         FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
5730                                         Register tmp4) {
5731   Label big, done, after_init, to_stub;
5732 
5733   assert_different_registers(src, dst, len, tmp4, rscratch1);
5734 
5735   fmovd(vtmp1, zr);


< prev index next >