48 // B = int((B0*2^7+0.5))/2^7
49 //
50 // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
51 //
52 // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
53 // p(r) is a degree 7 polynomial
54 // -log(B) read from data table (high, low parts)
55 // Result is formed from high and low parts
56 //
57 // Special cases:
58 // 1. log(NaN) = quiet NaN
59 // 2. log(+INF) = that INF
60 // 3. log(0) = -INF
61 // 4. log(1) = +0
62 // 5. log(x) = NaN if x < -0, including -INF
63 //
64 /******************************************************************************/
65
66 // Table with p(r) polynomial coefficients
67 // and table representation of logarithm values (hi and low parts)
68 __attribute__ ((aligned(64))) juint _L_tbl[] =
69 {
70 // coefficients of p(r) polynomial:
71 // _coeff[]
72 0x00000000UL, 0xbfd00000UL, // C1_0 = -0.25
73 0x92492492UL, 0x3fc24924UL, // C1_1 = 0.14285714285714285
74 0x55555555UL, 0x3fd55555UL, // C2_0 = 0.3333333333333333
75 0x3d6fb175UL, 0xbfc5555eUL, // C2_1 = -0.16666772842235003
76 0x00000000UL, 0xbfe00000UL, // C3_0 = -0.5
77 0x9999999aUL, 0x3fc99999UL, // C3_1 = 0.2
78 // _log2[]
79 0xfefa3800UL, 0x3fa62e42UL, // C4_0 = 0.043321698784993146
80 0x93c76730UL, 0x3ceef357UL, // C4_1 = 3.436201886692732e-15
81 // _L_tbl[] with logarithm values (hi and low parts)
82 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
83 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
84 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
85 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
86 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
87 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
88 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
243 // double p7 = C3_0*r2 + C2_0*r3 + C1_0*r2*r2 + C3_1*r3*r2 + C2_1*r3*r3
244 // + C1_1*r3*r2*r2; // degree 7 polynomial
245 // return p7 + (vtmp0 + ((r + hiTableValue) + lowTableValue));
246 // }
247 //
248 // END dlog PSEUDO CODE
249
250
251 // Generate log(X). X passed in register v0. Return log(X) into v0.
252 // Generator parameters: 10 temporary FPU registers and temporary general
253 // purpose registers
254 void MacroAssembler::fast_log(FloatRegister vtmp0, FloatRegister vtmp1,
255 FloatRegister vtmp2, FloatRegister vtmp3,
256 FloatRegister vtmp4, FloatRegister vtmp5,
257 FloatRegister C1, FloatRegister C2,
258 FloatRegister C3, FloatRegister C4,
259 Register tmp1, Register tmp2, Register tmp3,
260 Register tmp4, Register tmp5) {
261 Label DONE, CHECK_CORNER_CASES, SMALL_VALUE, MAIN,
262 CHECKED_CORNER_CASES, RETURN_MINF_OR_NAN;
263 const long INF_OR_NAN_PREFIX = 0x7FF0;
264 const long MINF_OR_MNAN_PREFIX = 0xFFF0;
265 const long ONE_PREFIX = 0x3FF0;
266 movz(tmp2, ONE_PREFIX, 48);
267 movz(tmp4, 0x0010, 48);
268 fmovd(rscratch1, v0); // rscratch1 = AS_LONG_BITS(X)
269 lea(rscratch2, ExternalAddress((address)_L_tbl));
270 movz(tmp5, 0x7F);
271 add(tmp1, rscratch1, tmp4);
272 cmp(tmp2, rscratch1);
273 lsr(tmp3, rscratch1, 29);
274 ccmp(tmp1, tmp4, 0b1101 /* LE */, NE);
275 bfm(tmp3, tmp5, 41, 8);
276 fmovs(vtmp5, tmp3);
277 // Load coefficients from table. All coefficients are organized to be
278 // in specific order, because load below will load it in vectors to be used
279 // later in vector instructions. Load will be performed in parallel while
280 // branches are taken. C1 will contain vector of {C1_0, C1_1}, C2 =
281 // {C2_0, C2_1}, C3 = {C3_0, C3_1}, C4 = {C4_0, C4_1}
282 ld1(C1, C2, C3, C4, T2D, post(rscratch2, 64));
283 br(LE, CHECK_CORNER_CASES);
284 bind(CHECKED_CORNER_CASES);
285 // all corner cases are handled
|
48 // B = int((B0*2^7+0.5))/2^7
49 //
50 // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
51 //
52 // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
53 // p(r) is a degree 7 polynomial
54 // -log(B) read from data table (high, low parts)
55 // Result is formed from high and low parts
56 //
57 // Special cases:
58 // 1. log(NaN) = quiet NaN
59 // 2. log(+INF) = that INF
60 // 3. log(0) = -INF
61 // 4. log(1) = +0
62 // 5. log(x) = NaN if x < -0, including -INF
63 //
64 /******************************************************************************/
65
66 // Table with p(r) polynomial coefficients
67 // and table representation of logarithm values (hi and low parts)
68 ATTRIBUTE_ALIGNED(64) juint _L_tbl[] =
69 {
70 // coefficients of p(r) polynomial:
71 // _coeff[]
72 0x00000000UL, 0xbfd00000UL, // C1_0 = -0.25
73 0x92492492UL, 0x3fc24924UL, // C1_1 = 0.14285714285714285
74 0x55555555UL, 0x3fd55555UL, // C2_0 = 0.3333333333333333
75 0x3d6fb175UL, 0xbfc5555eUL, // C2_1 = -0.16666772842235003
76 0x00000000UL, 0xbfe00000UL, // C3_0 = -0.5
77 0x9999999aUL, 0x3fc99999UL, // C3_1 = 0.2
78 // _log2[]
79 0xfefa3800UL, 0x3fa62e42UL, // C4_0 = 0.043321698784993146
80 0x93c76730UL, 0x3ceef357UL, // C4_1 = 3.436201886692732e-15
81 // _L_tbl[] with logarithm values (hi and low parts)
82 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
83 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
84 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
85 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
86 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
87 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
88 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
243 // double p7 = C3_0*r2 + C2_0*r3 + C1_0*r2*r2 + C3_1*r3*r2 + C2_1*r3*r3
244 // + C1_1*r3*r2*r2; // degree 7 polynomial
245 // return p7 + (vtmp0 + ((r + hiTableValue) + lowTableValue));
246 // }
247 //
248 // END dlog PSEUDO CODE
249
250
251 // Generate log(X). X passed in register v0. Return log(X) into v0.
252 // Generator parameters: 10 temporary FPU registers and temporary general
253 // purpose registers
254 void MacroAssembler::fast_log(FloatRegister vtmp0, FloatRegister vtmp1,
255 FloatRegister vtmp2, FloatRegister vtmp3,
256 FloatRegister vtmp4, FloatRegister vtmp5,
257 FloatRegister C1, FloatRegister C2,
258 FloatRegister C3, FloatRegister C4,
259 Register tmp1, Register tmp2, Register tmp3,
260 Register tmp4, Register tmp5) {
261 Label DONE, CHECK_CORNER_CASES, SMALL_VALUE, MAIN,
262 CHECKED_CORNER_CASES, RETURN_MINF_OR_NAN;
263 const int64_t INF_OR_NAN_PREFIX = 0x7FF0;
264 const int64_t MINF_OR_MNAN_PREFIX = 0xFFF0;
265 const int64_t ONE_PREFIX = 0x3FF0;
266 movz(tmp2, ONE_PREFIX, 48);
267 movz(tmp4, 0x0010, 48);
268 fmovd(rscratch1, v0); // rscratch1 = AS_LONG_BITS(X)
269 lea(rscratch2, ExternalAddress((address)_L_tbl));
270 movz(tmp5, 0x7F);
271 add(tmp1, rscratch1, tmp4);
272 cmp(tmp2, rscratch1);
273 lsr(tmp3, rscratch1, 29);
274 ccmp(tmp1, tmp4, 0b1101 /* LE */, NE);
275 bfm(tmp3, tmp5, 41, 8);
276 fmovs(vtmp5, tmp3);
277 // Load coefficients from table. All coefficients are organized to be
278 // in specific order, because load below will load it in vectors to be used
279 // later in vector instructions. Load will be performed in parallel while
280 // branches are taken. C1 will contain vector of {C1_0, C1_1}, C2 =
281 // {C2_0, C2_1}, C3 = {C3_0, C3_1}, C4 = {C4_0, C4_1}
282 ld1(C1, C2, C3, C4, T2D, post(rscratch2, 64));
283 br(LE, CHECK_CORNER_CASES);
284 bind(CHECKED_CORNER_CASES);
285 // all corner cases are handled
|