jdk-jdk Sdiff src/hotspot/cpu/aarch64

src/hotspot/cpu/aarch64/macroAssembler_aarch64_log.cpp

 275     bfm(tmp3, tmp5, 41, 8);
 276     fmovs(vtmp5, tmp3);
 277     // Load coefficients from table. All coefficients are organized to be
 278     // in specific order, because load below will load it in vectors to be used
 279     // later in vector instructions. Load will be performed in parallel while
 280     // branches are taken. C1 will contain vector of {C1_0, C1_1}, C2 =
 281     // {C2_0, C2_1}, C3 = {C3_0, C3_1}, C4 = {C4_0, C4_1}
 282     ld1(C1, C2, C3, C4, T2D, post(rscratch2, 64));
 283     br(LE, CHECK_CORNER_CASES);
 284   bind(CHECKED_CORNER_CASES);
 285     // all corner cases are handled
 286     frecpe(vtmp5, vtmp5, S);                   // vtmp5 ~= 1/vtmp5
 287     lsr(tmp2, rscratch1, 48);
 288     movz(tmp4, 0x77f0, 48);
 289     fmovd(vtmp4, 1.0d);
 290     movz(tmp1, INF_OR_NAN_PREFIX, 48);
 291     bfm(tmp4, rscratch1, 0, 51);               // tmp4 = 0x77F0 << 48 | mantissa(X)
 292     // vtmp1 = AS_DOUBLE_BITS(0x77F0 << 48 | mantissa(X)) == mx
 293     fmovd(vtmp1, tmp4);
 294     subw(tmp2, tmp2, 16);
 295     cmp(tmp2, 0x8000);
 296     br(GE, SMALL_VALUE);
 297   bind(MAIN);
 298     fmovs(tmp3, vtmp5);                        // int intB0 = AS_INT_BITS(B);
 299     mov(tmp5, 0x3FE0);
 300     mov(rscratch1, 0xffffe00000000000);
 301     andr(tmp2, tmp2, tmp1, LSR, 48);           // hiWord & 0x7FF0
 302     sub(tmp2, tmp2, tmp5);                     // tmp2 = hiWord & 0x7FF0 - 0x3FE0
 303     scvtfwd(vtmp5, tmp2);                      // vtmp5 = (double)tmp2;
 304     addw(tmp3, tmp3, 0x8000);                  // tmp3 = B
 305     andr(tmp4, tmp4, rscratch1);               // tmp4 == hi_part(mx)
 306     andr(rscratch1, rscratch1, tmp3, LSL, 29); // rscratch1 = hi_part(B)
 307     ubfm(tmp3, tmp3, 16, 23);                  // int index = (intB0 >> 16) && 0xFF
 308     ldrq(vtmp2, Address(rscratch2, tmp3, Address::lsl(4))); // vtmp2 = _L_tbl[index]
 309     // AS_LONG_BITS(vtmp1) & 0xffffe00000000000 // hi_part(mx)
 310     fmovd(vtmp3, tmp4);
 311     fmovd(vtmp0, rscratch1);                   // vtmp0 = hi_part(B)
 312     fsubd(vtmp1, vtmp1, vtmp3);                // vtmp1 -= vtmp3; // low_part(mx)
 313     fnmsub(vtmp3, vtmp3, vtmp0, vtmp4);        // vtmp3 = vtmp3*vtmp0 - vtmp4
 314     fmlavs(vtmp2, T2D, C4, vtmp5, 0);          // vtmp2 += {C4} * vtmp5
 315     // vtmp1 = r = vtmp1 * vtmp0 + vtmp3 == low_part(mx) * hi_part(B) + (hi_part(mx)*hi_part(B) - 1.0)

 275     bfm(tmp3, tmp5, 41, 8);
 276     fmovs(vtmp5, tmp3);
 277     // Load coefficients from table. All coefficients are organized to be
 278     // in specific order, because load below will load it in vectors to be used
 279     // later in vector instructions. Load will be performed in parallel while
 280     // branches are taken. C1 will contain vector of {C1_0, C1_1}, C2 =
 281     // {C2_0, C2_1}, C3 = {C3_0, C3_1}, C4 = {C4_0, C4_1}
 282     ld1(C1, C2, C3, C4, T2D, post(rscratch2, 64));
 283     br(LE, CHECK_CORNER_CASES);
 284   bind(CHECKED_CORNER_CASES);
 285     // all corner cases are handled
 286     frecpe(vtmp5, vtmp5, S);                   // vtmp5 ~= 1/vtmp5
 287     lsr(tmp2, rscratch1, 48);
 288     movz(tmp4, 0x77f0, 48);
 289     fmovd(vtmp4, 1.0d);
 290     movz(tmp1, INF_OR_NAN_PREFIX, 48);
 291     bfm(tmp4, rscratch1, 0, 51);               // tmp4 = 0x77F0 << 48 | mantissa(X)
 292     // vtmp1 = AS_DOUBLE_BITS(0x77F0 << 48 | mantissa(X)) == mx
 293     fmovd(vtmp1, tmp4);
 294     subw(tmp2, tmp2, 16);
 295     subs(zr, tmp2, 0x8000);
 296     br(GE, SMALL_VALUE);
 297   bind(MAIN);
 298     fmovs(tmp3, vtmp5);                        // int intB0 = AS_INT_BITS(B);
 299     mov(tmp5, 0x3FE0);
 300     mov(rscratch1, 0xffffe00000000000);
 301     andr(tmp2, tmp2, tmp1, LSR, 48);           // hiWord & 0x7FF0
 302     sub(tmp2, tmp2, tmp5);                     // tmp2 = hiWord & 0x7FF0 - 0x3FE0
 303     scvtfwd(vtmp5, tmp2);                      // vtmp5 = (double)tmp2;
 304     addw(tmp3, tmp3, 0x8000);                  // tmp3 = B
 305     andr(tmp4, tmp4, rscratch1);               // tmp4 == hi_part(mx)
 306     andr(rscratch1, rscratch1, tmp3, LSL, 29); // rscratch1 = hi_part(B)
 307     ubfm(tmp3, tmp3, 16, 23);                  // int index = (intB0 >> 16) && 0xFF
 308     ldrq(vtmp2, Address(rscratch2, tmp3, Address::lsl(4))); // vtmp2 = _L_tbl[index]
 309     // AS_LONG_BITS(vtmp1) & 0xffffe00000000000 // hi_part(mx)
 310     fmovd(vtmp3, tmp4);
 311     fmovd(vtmp0, rscratch1);                   // vtmp0 = hi_part(B)
 312     fsubd(vtmp1, vtmp1, vtmp3);                // vtmp1 -= vtmp3; // low_part(mx)
 313     fnmsub(vtmp3, vtmp3, vtmp0, vtmp4);        // vtmp3 = vtmp3*vtmp0 - vtmp4
 314     fmlavs(vtmp2, T2D, C4, vtmp5, 0);          // vtmp2 += {C4} * vtmp5
 315     // vtmp1 = r = vtmp1 * vtmp0 + vtmp3 == low_part(mx) * hi_part(B) + (hi_part(mx)*hi_part(B) - 1.0)

< prev index next >