< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page

        

@@ -5538,30 +5538,28 @@
 
 // Inflate byte[] array to char[].
 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
                                         FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
                                         Register tmp4) {
-  Label big, done;
+  Label big, done, after_init, to_stub;
 
   assert_different_registers(src, dst, len, tmp4, rscratch1);
 
-  fmovd(vtmp1 , zr);
-  lsrw(rscratch1, len, 3);
-
-  cbnzw(rscratch1, big);
-
+  fmovd(vtmp1, zr);
+  lsrw(tmp4, len, 3);
+  bind(after_init);
+  cbnzw(tmp4, big);
   // Short string: less than 8 bytes.
   {
-    Label loop, around, tiny;
-
-    subsw(len, len, 4);
-    andw(len, len, 3);
-    br(LO, tiny);
+    Label loop, tiny;
 
+    cmpw(len, 4);
+    br(LT, tiny);
     // Use SIMD to do 4 bytes.
     ldrs(vtmp2, post(src, 4));
     zip1(vtmp3, T8B, vtmp2, vtmp1);
+    subw(len, len, 4);
     strd(vtmp3, post(dst, 8));
 
     cbzw(len, done);
 
     // Do the remaining bytes by steam.

@@ -5571,39 +5569,69 @@
     subw(len, len, 1);
 
     bind(tiny);
     cbnz(len, loop);
 
-    bind(around);
     b(done);
   }
 
+  if (SoftwarePrefetchHintDistance >= 0) {
+    bind(to_stub);
+      RuntimeAddress stub =  RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate());
+      assert(stub.target() != NULL, "large_byte_array_inflate stub has not been generated");
+      trampoline_call(stub);
+      b(after_init);
+  }
+
   // Unpack the bytes 8 at a time.
   bind(big);
-  andw(len, len, 7);
-
   {
-    Label loop, around;
+    Label loop, around, loop_last, loop_start;
+
+    if (SoftwarePrefetchHintDistance >= 0) {
+      const int large_loop_threshold = (64 + 16)/8;
+      ldrd(vtmp2, post(src, 8));
+      andw(len, len, 7);
+      cmp(tmp4, large_loop_threshold);
+      br(GE, to_stub);
+      b(loop_start);
 
     bind(loop);
     ldrd(vtmp2, post(src, 8));
-    sub(rscratch1, rscratch1, 1);
-    zip1(vtmp3, T16B, vtmp2, vtmp1);
+      bind(loop_start);
+      subs(tmp4, tmp4, 1);
+      br(EQ, loop_last);
+      zip1(vtmp2, T16B, vtmp2, vtmp1);
+      ldrd(vtmp3, post(src, 8));
+      st1(vtmp2, T8H, post(dst, 16));
+      subs(tmp4, tmp4, 1);
+      zip1(vtmp3, T16B, vtmp3, vtmp1);
     st1(vtmp3, T8H, post(dst, 16));
-    cbnz(rscratch1, loop);
-
+      br(NE, loop);
+      b(around);
+      bind(loop_last);
+      zip1(vtmp2, T16B, vtmp2, vtmp1);
+      st1(vtmp2, T8H, post(dst, 16));
     bind(around);
+      cbz(len, done);
+    } else {
+      andw(len, len, 7);
+      bind(loop);
+      ldrd(vtmp2, post(src, 8));
+      sub(tmp4, tmp4, 1);
+      zip1(vtmp3, T16B, vtmp2, vtmp1);
+      st1(vtmp3, T8H, post(dst, 16));
+      cbnz(tmp4, loop);
+    }
   }
 
   // Do the tail of up to 8 bytes.
-  sub(src, src, 8);
-  add(src, src, len, ext::uxtw, 0);
-  ldrd(vtmp2, Address(src));
-  sub(dst, dst, 16);
+  add(src, src, len);
+  ldrd(vtmp3, Address(src, -8));
   add(dst, dst, len, ext::uxtw, 1);
-  zip1(vtmp3, T16B, vtmp2, vtmp1);
-  st1(vtmp3, T8H, Address(dst));
+  zip1(vtmp3, T16B, vtmp3, vtmp1);
+  strq(vtmp3, Address(dst, -16));
 
   bind(done);
 }
 
 // Compress char[] array to byte[].
< prev index next >