< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page

        

@@ -3986,10 +3986,132 @@
     __ leave();
     __ ret(lr);
     return entry;
   }
 
+  //                     ALGORITHM DESCRIPTION - LOG()
+  //                     ---------------------
+  //
+  //    x=2^k * mx, mx in [1,2)
+  //
+  //    Get B~1/mx based on the output of rcpss instruction (B0)
+  //    B = int((B0*2^7+0.5))/2^7
+  //
+  //    Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
+  //
+  //    Result:  k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6)  and
+  //             p(r) is a degree 7 polynomial
+  //             -log(B) read from data table (high, low parts)
+  //             Result is formed from high and low parts
+  //
+  // Special cases:
+  // 1. log(NaN) = quiet NaN
+  // 2. log(+INF) = +INF
+  // 3. log(0) = -INF
+  // 4. log(1) = +0
+  // 5. log(x) = NaN if x < -0, including -INF
+  //
+  address generate_dlog() {
+    StubCodeMark mark(this, "StubRoutines", "dlog");
+    __ align(CodeEntryAlignment);
+    address entry = __ pc();
+    Label DONE, CHECK_CORNER_CASES, SPECIAL_CASE, MAIN,
+        CHECKED_CORNER_CASES, RETURN_MINF_OR_NAN;
+    FloatRegister vtmp0 = v0, vtmp1 = v1, vtmp2 = v2, vtmp3 = v3, vtmp4 = v4,
+        vtmp5 = v5, tmpC1 = v16, tmpC2 = v17, tmpC3 = v18, tmpC4 = v19;
+    Register tmp1 = r0, tmp2 = r1, tmp3 = r2, tmp4 = r3, tmp5 = r4;
+    const long INF_OR_NAN_PREFIX = 0x7FF0;
+    const long MINF_OR_MNAN_PREFIX = 0xFFF0;
+    const long ONE_PREFIX = 0x3FF0;
+      __ movz(tmp2, ONE_PREFIX, 48);
+      __ movz(tmp4, 0x0010, 48);
+      __ fmovd(rscratch1, v0);
+      __ lea(rscratch2, ExternalAddress((address)StubRoutines::aarch64::_log_tbl));
+      __ movz(tmp5, 0x7F);
+      __ add(tmp1, rscratch1, tmp4);
+      __ cmp(tmp2, rscratch1);
+      __ lsr(tmp3, rscratch1, 29);
+      __ ccmp(tmp1, tmp4, 0b1101 /* LE */, __ NE);
+      __ bfm(tmp3, tmp5, 41, 8);
+      __ fmovs(vtmp5, tmp3);
+      __ ld1(tmpC1, tmpC2, tmpC3, tmpC4, __ T2D, __ post(rscratch2, 64));
+      __ br(__ LE, CHECK_CORNER_CASES);
+    __ BIND(CHECKED_CORNER_CASES);
+      // all corner cases are handled
+      __ frecpe(vtmp5, vtmp5, S);
+      __ lsr(tmp2, rscratch1, 48);
+      __ movz(tmp4, 0x77f0, 48);
+      __ fmovd(vtmp4, 1.0d);
+      __ movz(tmp1, INF_OR_NAN_PREFIX, 48);
+      __ bfm(tmp4, rscratch1, 0, 51);
+      __ fmovd(vtmp1, tmp4);
+      __ subw(tmp2, tmp2, 16);
+      __ cmp(tmp2, 0x8000);
+      __ br(__ GE, SPECIAL_CASE);
+    __ bind(MAIN);
+      __ fmovs(tmp3, vtmp5);
+      __ mov(tmp5, 0x3FE0);
+      __ mov(rscratch1, 0xffffe00000000000);
+      __ andr(tmp2, tmp2, tmp1, __ LSR, 48);
+      __ sub(tmp2, tmp2, tmp5);
+      __ scvtfwd(vtmp5, tmp2);
+      __ addw(tmp3, tmp3, 0x8000);
+      __ andr(tmp4, tmp4, rscratch1);
+      __ andr(rscratch1, rscratch1, tmp3, __ LSL, 29);
+      __ ubfm(tmp3, tmp3, 16, 23);
+      __ ldrq(vtmp2, Address(rscratch2, tmp3, Address::lsl(4)));
+      __ fmovd(vtmp3, tmp4);
+      __ fmovd(vtmp0, rscratch1);
+      __ fsubd(vtmp1, vtmp1, vtmp3);
+      __ fnmsub(vtmp3, vtmp3, vtmp0, vtmp4);
+      __ fmlavsd(vtmp2, tmpC4, vtmp5, 0);
+      __ fmaddd(vtmp1, vtmp1, vtmp0, vtmp3);
+      __ ins(vtmp5, __ D, vtmp2, 0, 1);
+      __ faddd(vtmp0, vtmp2, vtmp1);
+      __ fmlavsd(tmpC3, tmpC2, vtmp1, 0);
+      __ fsubd(vtmp2, vtmp2, vtmp0);
+      __ fmuld(vtmp3, vtmp1, vtmp1);
+      __ faddd(tmpC4, vtmp1, vtmp2);
+      __ fmlavsd(tmpC3, tmpC1, vtmp3, 0);
+      __ faddd(tmpC4, tmpC4, vtmp5);
+      __ fmuld(vtmp4, vtmp3, vtmp1);
+      __ faddd(vtmp0, vtmp0, tmpC4);
+      __ fmlavsd(tmpC3, vtmp4, tmpC3, 1);
+      __ fmaddd(vtmp0, tmpC3, vtmp3, vtmp0);
+      __ ret(lr);
+    __ BIND(SPECIAL_CASE);
+      __ movz(tmp2, 0x47F0, 48);
+      __ fmovd(vtmp1, tmp2);
+      __ fmuld(vtmp0, vtmp1, vtmp0);
+      __ fmovd(vtmp1, vtmp0);
+      __ umov(tmp2, vtmp1, __ S, 3);
+      __ orr(vtmp0, __ T16B, vtmp0, vtmp4);
+      __ ushr(vtmp5, __ T2D, vtmp0, 27);
+      __ ushr(vtmp5, __ T4S, vtmp5, 2);
+      __ frecpe(vtmp5, vtmp5, S);
+      __ shl(vtmp1, __ T2D, vtmp1, 12);
+      __ ushr(vtmp1, __ T2D, vtmp1, 12);
+      __ b(MAIN);
+    __ bind(RETURN_MINF_OR_NAN);
+      __ movz(tmp1, MINF_OR_MNAN_PREFIX, 48);
+      __ orr(rscratch1, rscratch1, tmp1);
+      __ fmovd(v0, rscratch1);
+      __ ret(lr);
+    __ BIND(CHECK_CORNER_CASES);
+      __ movz(tmp1, INF_OR_NAN_PREFIX, 48);
+      __ cmp(rscratch1, zr);
+      __ br(__ LE, RETURN_MINF_OR_NAN);
+      __ cmp(rscratch1, tmp1);
+      __ br(__ GE, DONE); // special cases 1 and 2
+      __ cmp(rscratch1, tmp2);
+      __ br(__ NE, CHECKED_CORNER_CASES);
+      // special case 4
+      __ fmovd(v0, 0.0d);
+    __ BIND(DONE);
+      __ ret(lr);
+    return entry;
+  }
 
   /**
    *  Arguments:
    *
    *  Input:

@@ -5074,10 +5196,12 @@
     // array equals stub for large arrays.
     if (!UseSimpleArrayEquals) {
       StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
     }
 
+    StubRoutines::_dlog = generate_dlog();
+
     if (UseMultiplyToLenIntrinsic) {
       StubRoutines::_multiplyToLen = generate_multiplyToLen();
     }
 
     if (UseSquareToLenIntrinsic) {
< prev index next >