< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64_log.cpp

Print this page
8248238: Adding Windows support to OpenJDK on AArch64

Summary: LP64 vs LLP64 changes to add Windows support

Contributed-by: Monica Beckwith <monica.beckwith@microsoft.com>, Ludovic Henry <luhenry@microsoft.com>
Reviewed-by:


 243 //    double p7 = C3_0*r2 + C2_0*r3 + C1_0*r2*r2 + C3_1*r3*r2 + C2_1*r3*r3
 244 //              + C1_1*r3*r2*r2; // degree 7 polynomial
 245 //    return p7 + (vtmp0 + ((r + hiTableValue) + lowTableValue));
 246 //  }
 247 //
 248 // END dlog PSEUDO CODE
 249 
 250 
 251 // Generate log(X). X passed in register v0. Return log(X) into v0.
 252 // Generator parameters: 10 temporary FPU registers and  temporary general
 253 // purpose registers
 254 void MacroAssembler::fast_log(FloatRegister vtmp0, FloatRegister vtmp1,
 255                               FloatRegister vtmp2, FloatRegister vtmp3,
 256                               FloatRegister vtmp4, FloatRegister vtmp5,
 257                               FloatRegister C1, FloatRegister C2,
 258                               FloatRegister C3, FloatRegister C4,
 259                               Register tmp1, Register tmp2, Register tmp3,
 260                               Register tmp4, Register tmp5) {
 261   Label DONE, CHECK_CORNER_CASES, SMALL_VALUE, MAIN,
 262       CHECKED_CORNER_CASES, RETURN_MINF_OR_NAN;
 263   const long INF_OR_NAN_PREFIX = 0x7FF0;
 264   const long MINF_OR_MNAN_PREFIX = 0xFFF0;
 265   const long ONE_PREFIX = 0x3FF0;
 266     movz(tmp2, ONE_PREFIX, 48);
 267     movz(tmp4, 0x0010, 48);
 268     fmovd(rscratch1, v0); // rscratch1 = AS_LONG_BITS(X)
 269     lea(rscratch2, ExternalAddress((address)_L_tbl));
 270     movz(tmp5, 0x7F);
 271     add(tmp1, rscratch1, tmp4);
 272     cmp(tmp2, rscratch1);
 273     lsr(tmp3, rscratch1, 29);
 274     ccmp(tmp1, tmp4, 0b1101 /* LE */, NE);
 275     bfm(tmp3, tmp5, 41, 8);
 276     fmovs(vtmp5, tmp3);
 277     // Load coefficients from table. All coefficients are organized to be
 278     // in specific order, because load below will load it in vectors to be used
 279     // later in vector instructions. Load will be performed in parallel while
 280     // branches are taken. C1 will contain vector of {C1_0, C1_1}, C2 =
 281     // {C2_0, C2_1}, C3 = {C3_0, C3_1}, C4 = {C4_0, C4_1}
 282     ld1(C1, C2, C3, C4, T2D, post(rscratch2, 64));
 283     br(LE, CHECK_CORNER_CASES);
 284   bind(CHECKED_CORNER_CASES);
 285     // all corner cases are handled




 243 //    double p7 = C3_0*r2 + C2_0*r3 + C1_0*r2*r2 + C3_1*r3*r2 + C2_1*r3*r3
 244 //              + C1_1*r3*r2*r2; // degree 7 polynomial
 245 //    return p7 + (vtmp0 + ((r + hiTableValue) + lowTableValue));
 246 //  }
 247 //
 248 // END dlog PSEUDO CODE
 249 
 250 
 251 // Generate log(X). X passed in register v0. Return log(X) into v0.
 252 // Generator parameters: 10 temporary FPU registers and  temporary general
 253 // purpose registers
 254 void MacroAssembler::fast_log(FloatRegister vtmp0, FloatRegister vtmp1,
 255                               FloatRegister vtmp2, FloatRegister vtmp3,
 256                               FloatRegister vtmp4, FloatRegister vtmp5,
 257                               FloatRegister C1, FloatRegister C2,
 258                               FloatRegister C3, FloatRegister C4,
 259                               Register tmp1, Register tmp2, Register tmp3,
 260                               Register tmp4, Register tmp5) {
 261   Label DONE, CHECK_CORNER_CASES, SMALL_VALUE, MAIN,
 262       CHECKED_CORNER_CASES, RETURN_MINF_OR_NAN;
 263   const int64_t INF_OR_NAN_PREFIX = 0x7FF0;
 264   const int64_t MINF_OR_MNAN_PREFIX = 0xFFF0;
 265   const int64_t ONE_PREFIX = 0x3FF0;
 266     movz(tmp2, ONE_PREFIX, 48);
 267     movz(tmp4, 0x0010, 48);
 268     fmovd(rscratch1, v0); // rscratch1 = AS_LONG_BITS(X)
 269     lea(rscratch2, ExternalAddress((address)_L_tbl));
 270     movz(tmp5, 0x7F);
 271     add(tmp1, rscratch1, tmp4);
 272     cmp(tmp2, rscratch1);
 273     lsr(tmp3, rscratch1, 29);
 274     ccmp(tmp1, tmp4, 0b1101 /* LE */, NE);
 275     bfm(tmp3, tmp5, 41, 8);
 276     fmovs(vtmp5, tmp3);
 277     // Load coefficients from table. All coefficients are organized to be
 278     // in specific order, because load below will load it in vectors to be used
 279     // later in vector instructions. Load will be performed in parallel while
 280     // branches are taken. C1 will contain vector of {C1_0, C1_1}, C2 =
 281     // {C2_0, C2_1}, C3 = {C3_0, C3_1}, C4 = {C4_0, C4_1}
 282     ld1(C1, C2, C3, C4, T2D, post(rscratch2, 64));
 283     br(LE, CHECK_CORNER_CASES);
 284   bind(CHECKED_CORNER_CASES);
 285     // all corner cases are handled


< prev index next >