< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page




5523 #endif
5524     BIND(NEXT_1);
5525       ldrh(tmp1, Address(post(src, 2)));
5526       tst(tmp1, 0xff00);
5527       br(NE, DONE);
5528       strb(tmp1, Address(post(dst, 1)));
5529       subs(len, len, 1);
5530       br(GT, NEXT_1);
5531 
5532     BIND(DONE);
5533       sub(result, result, len); // Return index where we stopped
5534                                 // Return len == 0 if we processed all
5535                                 // characters
5536 }
5537 
5538 
5539 // Inflate byte[] array to char[].
5540 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
5541                                         FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
5542                                         Register tmp4) {
5543   Label big, done;
5544 
5545   assert_different_registers(src, dst, len, tmp4, rscratch1);
5546 
5547   fmovd(vtmp1 , zr);
5548   lsrw(rscratch1, len, 3);
5549 
5550   cbnzw(rscratch1, big);
5551 
5552   // Short string: less than 8 bytes.
5553   {
5554     Label loop, around, tiny;
5555 
5556     subsw(len, len, 4);
5557     andw(len, len, 3);
5558     br(LO, tiny);
5559 


5560     // Use SIMD to do 4 bytes.
5561     ldrs(vtmp2, post(src, 4));
5562     zip1(vtmp3, T8B, vtmp2, vtmp1);

5563     strd(vtmp3, post(dst, 8));
5564 
5565     cbzw(len, done);
5566 
5567     // Do the remaining bytes by steam.
5568     bind(loop);
5569     ldrb(tmp4, post(src, 1));
5570     strh(tmp4, post(dst, 2));
5571     subw(len, len, 1);
5572 
5573     bind(tiny);
5574     cbnz(len, loop);
5575 
5576     bind(around);
5577     b(done);
5578   }
5579 








5580   // Unpack the bytes 8 at a time.
5581   bind(big);
5582   andw(len, len, 7);
5583 
5584   {
5585     Label loop, around;








5586 
5587     bind(loop);
5588     ldrd(vtmp2, post(src, 8));
5589     sub(rscratch1, rscratch1, 1);
5590     zip1(vtmp3, T16B, vtmp2, vtmp1);






5591     st1(vtmp3, T8H, post(dst, 16));
5592     cbnz(rscratch1, loop);
5593 



5594     bind(around);










5595   }
5596 
5597   // Do the tail of up to 8 bytes.
5598   sub(src, src, 8);
5599   add(src, src, len, ext::uxtw, 0);
5600   ldrd(vtmp2, Address(src));
5601   sub(dst, dst, 16);
5602   add(dst, dst, len, ext::uxtw, 1);
5603   zip1(vtmp3, T16B, vtmp2, vtmp1);
5604   st1(vtmp3, T8H, Address(dst));
5605 
5606   bind(done);
5607 }
5608 
5609 // Compress char[] array to byte[].
5610 void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
5611                                          FloatRegister tmp1Reg, FloatRegister tmp2Reg,
5612                                          FloatRegister tmp3Reg, FloatRegister tmp4Reg,
5613                                          Register result) {
5614   encode_iso_array(src, dst, len, result,
5615                    tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
5616   cmp(len, zr);
5617   csel(result, result, zr, EQ);
5618 }
5619 
5620 // get_thread() can be called anywhere inside generated code so we
5621 // need to save whatever non-callee save context might get clobbered
5622 // by the call to JavaThread::aarch64_get_thread_helper() or, indeed,
5623 // the call setup code.
5624 //


5523 #endif
5524     BIND(NEXT_1);
5525       ldrh(tmp1, Address(post(src, 2)));
5526       tst(tmp1, 0xff00);
5527       br(NE, DONE);
5528       strb(tmp1, Address(post(dst, 1)));
5529       subs(len, len, 1);
5530       br(GT, NEXT_1);
5531 
5532     BIND(DONE);
5533       sub(result, result, len); // Return index where we stopped
5534                                 // Return len == 0 if we processed all
5535                                 // characters
5536 }
5537 
5538 
5539 // Inflate byte[] array to char[].
5540 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
5541                                         FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
5542                                         Register tmp4) {
5543   Label big, done, after_init, to_stub;
5544 
5545   assert_different_registers(src, dst, len, tmp4, rscratch1);
5546 
5547   fmovd(vtmp1, zr);
5548   lsrw(tmp4, len, 3);
5549   bind(after_init);
5550   cbnzw(tmp4, big);

5551   // Short string: less than 8 bytes.
5552   {
5553     Label loop, tiny;




5554 
5555     cmpw(len, 4);
5556     br(LT, tiny);
5557     // Use SIMD to do 4 bytes.
5558     ldrs(vtmp2, post(src, 4));
5559     zip1(vtmp3, T8B, vtmp2, vtmp1);
5560     subw(len, len, 4);
5561     strd(vtmp3, post(dst, 8));
5562 
5563     cbzw(len, done);
5564 
5565     // Do the remaining bytes by steam.
5566     bind(loop);
5567     ldrb(tmp4, post(src, 1));
5568     strh(tmp4, post(dst, 2));
5569     subw(len, len, 1);
5570 
5571     bind(tiny);
5572     cbnz(len, loop);
5573 

5574     b(done);
5575   }
5576 
5577   if (SoftwarePrefetchHintDistance >= 0) {
5578     bind(to_stub);
5579       RuntimeAddress stub =  RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate());
5580       assert(stub.target() != NULL, "large_byte_array_inflate stub has not been generated");
5581       trampoline_call(stub);
5582       b(after_init);
5583   }
5584 
5585   // Unpack the bytes 8 at a time.
5586   bind(big);


5587   {
5588     Label loop, around, loop_last, loop_start;
5589 
5590     if (SoftwarePrefetchHintDistance >= 0) {
5591       const int large_loop_threshold = (64 + 16)/8;
5592       ldrd(vtmp2, post(src, 8));
5593       andw(len, len, 7);
5594       cmp(tmp4, large_loop_threshold);
5595       br(GE, to_stub);
5596       b(loop_start);
5597 
5598       bind(loop);
5599       ldrd(vtmp2, post(src, 8));
5600       bind(loop_start);
5601       subs(tmp4, tmp4, 1);
5602       br(EQ, loop_last);
5603       zip1(vtmp2, T16B, vtmp2, vtmp1);
5604       ldrd(vtmp3, post(src, 8));
5605       st1(vtmp2, T8H, post(dst, 16));
5606       subs(tmp4, tmp4, 1);
5607       zip1(vtmp3, T16B, vtmp3, vtmp1);
5608       st1(vtmp3, T8H, post(dst, 16));
5609       br(NE, loop);
5610       b(around);
5611       bind(loop_last);
5612       zip1(vtmp2, T16B, vtmp2, vtmp1);
5613       st1(vtmp2, T8H, post(dst, 16));
5614       bind(around);
5615       cbz(len, done);
5616     } else {
5617       andw(len, len, 7);
5618       bind(loop);
5619       ldrd(vtmp2, post(src, 8));
5620       sub(tmp4, tmp4, 1);
5621       zip1(vtmp3, T16B, vtmp2, vtmp1);
5622       st1(vtmp3, T8H, post(dst, 16));
5623       cbnz(tmp4, loop);
5624     }
5625   }
5626 
5627   // Do the tail of up to 8 bytes.
5628   add(src, src, len);
5629   ldrd(vtmp3, Address(src, -8));


5630   add(dst, dst, len, ext::uxtw, 1);
5631   zip1(vtmp3, T16B, vtmp3, vtmp1);
5632   strq(vtmp3, Address(dst, -16));
5633 
5634   bind(done);
5635 }
5636 
5637 // Compress char[] array to byte[].
5638 void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
5639                                          FloatRegister tmp1Reg, FloatRegister tmp2Reg,
5640                                          FloatRegister tmp3Reg, FloatRegister tmp4Reg,
5641                                          Register result) {
5642   encode_iso_array(src, dst, len, result,
5643                    tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
5644   cmp(len, zr);
5645   csel(result, result, zr, EQ);
5646 }
5647 
5648 // get_thread() can be called anywhere inside generated code so we
5649 // need to save whatever non-callee save context might get clobbered
5650 // by the call to JavaThread::aarch64_get_thread_helper() or, indeed,
5651 // the call setup code.
5652 //
< prev index next >