1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "jni.h" 27 #include "runtime/interfaceSupport.inline.hpp" 28 #include "runtime/sharedRuntime.hpp" 29 #include "runtime/sharedRuntimeMath.hpp" 30 31 // This file contains copies of the fdlibm routines used by 32 // StrictMath. It turns out that it is almost always required to use 33 // these runtime routines; the Intel CPU doesn't meet the Java 34 // specification for sin/cos outside a certain limited argument range, 35 // and the SPARC CPU doesn't appear to have sin/cos instructions. It 36 // also turns out that avoiding the indirect call through function 37 // pointer out to libjava.so in SharedRuntime speeds these routines up 38 // by roughly 15% on both Win32/x86 and Solaris/SPARC. 39 40 /* 41 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) 42 * double x[],y[]; int e0,nx,prec; int ipio2[]; 43 * 44 * __kernel_rem_pio2 return the last three digits of N with 45 * y = x - N*pi/2 46 * so that |y| < pi/2. 47 * 48 * The method is to compute the integer (mod 8) and fraction parts of 49 * (2/pi)*x without doing the full multiplication. In general we 50 * skip the part of the product that are known to be a huge integer ( 51 * more accurately, = 0 mod 8 ). Thus the number of operations are 52 * independent of the exponent of the input. 53 * 54 * (2/pi) is represented by an array of 24-bit integers in ipio2[]. 55 * 56 * Input parameters: 57 * x[] The input value (must be positive) is broken into nx 58 * pieces of 24-bit integers in double precision format. 59 * x[i] will be the i-th 24 bit of x. The scaled exponent 60 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 61 * match x's up to 24 bits. 62 * 63 * Example of breaking a double positive z into x[0]+x[1]+x[2]: 64 * e0 = ilogb(z)-23 65 * z = scalbn(z,-e0) 66 * for i = 0,1,2 67 * x[i] = floor(z) 68 * z = (z-x[i])*2**24 69 * 70 * 71 * y[] ouput result in an array of double precision numbers. 72 * The dimension of y[] is: 73 * 24-bit precision 1 74 * 53-bit precision 2 75 * 64-bit precision 2 76 * 113-bit precision 3 77 * The actual value is the sum of them. Thus for 113-bit 78 * precsion, one may have to do something like: 79 * 80 * long double t,w,r_head, r_tail; 81 * t = (long double)y[2] + (long double)y[1]; 82 * w = (long double)y[0]; 83 * r_head = t+w; 84 * r_tail = w - (r_head - t); 85 * 86 * e0 The exponent of x[0] 87 * 88 * nx dimension of x[] 89 * 90 * prec an interger indicating the precision: 91 * 0 24 bits (single) 92 * 1 53 bits (double) 93 * 2 64 bits (extended) 94 * 3 113 bits (quad) 95 * 96 * ipio2[] 97 * integer array, contains the (24*i)-th to (24*i+23)-th 98 * bit of 2/pi after binary point. The corresponding 99 * floating value is 100 * 101 * ipio2[i] * 2^(-24(i+1)). 102 * 103 * External function: 104 * double scalbn(), floor(); 105 * 106 * 107 * Here is the description of some local variables: 108 * 109 * jk jk+1 is the initial number of terms of ipio2[] needed 110 * in the computation. The recommended value is 2,3,4, 111 * 6 for single, double, extended,and quad. 112 * 113 * jz local integer variable indicating the number of 114 * terms of ipio2[] used. 115 * 116 * jx nx - 1 117 * 118 * jv index for pointing to the suitable ipio2[] for the 119 * computation. In general, we want 120 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 121 * is an integer. Thus 122 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv 123 * Hence jv = max(0,(e0-3)/24). 124 * 125 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. 126 * 127 * q[] double array with integral value, representing the 128 * 24-bits chunk of the product of x and 2/pi. 129 * 130 * q0 the corresponding exponent of q[0]. Note that the 131 * exponent for q[i] would be q0-24*i. 132 * 133 * PIo2[] double precision array, obtained by cutting pi/2 134 * into 24 bits chunks. 135 * 136 * f[] ipio2[] in floating point 137 * 138 * iq[] integer array by breaking up q[] in 24-bits chunk. 139 * 140 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] 141 * 142 * ih integer. If >0 it indicates q[] is >= 0.5, hence 143 * it also indicates the *sign* of the result. 144 * 145 */ 146 147 148 /* 149 * Constants: 150 * The hexadecimal values are the intended ones for the following 151 * constants. The decimal values may be used, provided that the 152 * compiler will convert from decimal to binary accurately enough 153 * to produce the hexadecimal values shown. 154 */ 155 156 157 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */ 158 159 static const double PIo2[] = { 160 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ 161 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ 162 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ 163 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ 164 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ 165 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ 166 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ 167 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ 168 }; 169 170 static const double 171 zeroB = 0.0, 172 one = 1.0, 173 two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ 174 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ 175 176 static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) { 177 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; 178 double z,fw,f[20],fq[20],q[20]; 179 180 /* initialize jk*/ 181 jk = init_jk[prec]; 182 jp = jk; 183 184 /* determine jx,jv,q0, note that 3>q0 */ 185 jx = nx-1; 186 jv = (e0-3)/24; if(jv<0) jv=0; 187 q0 = e0-24*(jv+1); 188 189 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ 190 j = jv-jx; m = jx+jk; 191 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j]; 192 193 /* compute q[0],q[1],...q[jk] */ 194 for (i=0;i<=jk;i++) { 195 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; 196 } 197 198 jz = jk; 199 recompute: 200 /* distill q[] into iq[] reversingly */ 201 for(i=0,j=jz,z=q[jz];j>0;i++,j--) { 202 fw = (double)((int)(twon24* z)); 203 iq[i] = (int)(z-two24B*fw); 204 z = q[j-1]+fw; 205 } 206 207 /* compute n */ 208 z = scalbnA(z,q0); /* actual value of z */ 209 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ 210 n = (int) z; 211 z -= (double)n; 212 ih = 0; 213 if(q0>0) { /* need iq[jz-1] to determine n */ 214 i = (iq[jz-1]>>(24-q0)); n += i; 215 iq[jz-1] -= i<<(24-q0); 216 ih = iq[jz-1]>>(23-q0); 217 } 218 else if(q0==0) ih = iq[jz-1]>>23; 219 else if(z>=0.5) ih=2; 220 221 if(ih>0) { /* q > 0.5 */ 222 n += 1; carry = 0; 223 for(i=0;i<jz ;i++) { /* compute 1-q */ 224 j = iq[i]; 225 if(carry==0) { 226 if(j!=0) { 227 carry = 1; iq[i] = 0x1000000- j; 228 } 229 } else iq[i] = 0xffffff - j; 230 } 231 if(q0>0) { /* rare case: chance is 1 in 12 */ 232 switch(q0) { 233 case 1: 234 iq[jz-1] &= 0x7fffff; break; 235 case 2: 236 iq[jz-1] &= 0x3fffff; break; 237 } 238 } 239 if(ih==2) { 240 z = one - z; 241 if(carry!=0) z -= scalbnA(one,q0); 242 } 243 } 244 245 /* check if recomputation is needed */ 246 if(z==zeroB) { 247 j = 0; 248 for (i=jz-1;i>=jk;i--) j |= iq[i]; 249 if(j==0) { /* need recomputation */ 250 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ 251 252 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ 253 f[jx+i] = (double) ipio2[jv+i]; 254 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; 255 q[i] = fw; 256 } 257 jz += k; 258 goto recompute; 259 } 260 } 261 262 /* chop off zero terms */ 263 if(z==0.0) { 264 jz -= 1; q0 -= 24; 265 while(iq[jz]==0) { jz--; q0-=24;} 266 } else { /* break z into 24-bit if necessary */ 267 z = scalbnA(z,-q0); 268 if(z>=two24B) { 269 fw = (double)((int)(twon24*z)); 270 iq[jz] = (int)(z-two24B*fw); 271 jz += 1; q0 += 24; 272 iq[jz] = (int) fw; 273 } else iq[jz] = (int) z ; 274 } 275 276 /* convert integer "bit" chunk to floating-point value */ 277 fw = scalbnA(one,q0); 278 for(i=jz;i>=0;i--) { 279 q[i] = fw*(double)iq[i]; fw*=twon24; 280 } 281 282 /* compute PIo2[0,...,jp]*q[jz,...,0] */ 283 for(i=jz;i>=0;i--) { 284 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; 285 fq[jz-i] = fw; 286 } 287 288 /* compress fq[] into y[] */ 289 switch(prec) { 290 case 0: 291 fw = 0.0; 292 for (i=jz;i>=0;i--) fw += fq[i]; 293 y[0] = (ih==0)? fw: -fw; 294 break; 295 case 1: 296 case 2: 297 fw = 0.0; 298 for (i=jz;i>=0;i--) fw += fq[i]; 299 y[0] = (ih==0)? fw: -fw; 300 fw = fq[0]-fw; 301 for (i=1;i<=jz;i++) fw += fq[i]; 302 y[1] = (ih==0)? fw: -fw; 303 break; 304 case 3: /* painful */ 305 for (i=jz;i>0;i--) { 306 fw = fq[i-1]+fq[i]; 307 fq[i] += fq[i-1]-fw; 308 fq[i-1] = fw; 309 } 310 for (i=jz;i>1;i--) { 311 fw = fq[i-1]+fq[i]; 312 fq[i] += fq[i-1]-fw; 313 fq[i-1] = fw; 314 } 315 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; 316 if(ih==0) { 317 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; 318 } else { 319 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; 320 } 321 } 322 return n&7; 323 } 324 325 326 /* 327 * ==================================================== 328 * Copyright (c) 1993 Oracle and/or its affiliates. All rights reserved. 329 * 330 * Developed at SunPro, a Sun Microsystems, Inc. business. 331 * Permission to use, copy, modify, and distribute this 332 * software is freely granted, provided that this notice 333 * is preserved. 334 * ==================================================== 335 * 336 */ 337 338 /* __ieee754_rem_pio2(x,y) 339 * 340 * return the remainder of x rem pi/2 in y[0]+y[1] 341 * use __kernel_rem_pio2() 342 */ 343 344 /* 345 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi 346 */ 347 static const int two_over_pi[] = { 348 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 349 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 350 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 351 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 352 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, 353 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, 354 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 355 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 356 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 357 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 358 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, 359 }; 360 361 static const int npio2_hw[] = { 362 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, 363 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, 364 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, 365 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, 366 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, 367 0x404858EB, 0x404921FB, 368 }; 369 370 /* 371 * invpio2: 53 bits of 2/pi 372 * pio2_1: first 33 bit of pi/2 373 * pio2_1t: pi/2 - pio2_1 374 * pio2_2: second 33 bit of pi/2 375 * pio2_2t: pi/2 - (pio2_1+pio2_2) 376 * pio2_3: third 33 bit of pi/2 377 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) 378 */ 379 380 static const double 381 zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ 382 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ 383 two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ 384 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ 385 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ 386 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ 387 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ 388 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ 389 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ 390 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ 391 392 static int __ieee754_rem_pio2(double x, double *y) { 393 double z,w,t,r,fn; 394 double tx[3]; 395 int e0,i,j,nx,n,ix,hx,i0; 396 397 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ 398 hx = *(i0+(int*)&x); /* high word of x */ 399 ix = hx&0x7fffffff; 400 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */ 401 {y[0] = x; y[1] = 0; return 0;} 402 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ 403 if(hx>0) { 404 z = x - pio2_1; 405 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ 406 y[0] = z - pio2_1t; 407 y[1] = (z-y[0])-pio2_1t; 408 } else { /* near pi/2, use 33+33+53 bit pi */ 409 z -= pio2_2; 410 y[0] = z - pio2_2t; 411 y[1] = (z-y[0])-pio2_2t; 412 } 413 return 1; 414 } else { /* negative x */ 415 z = x + pio2_1; 416 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ 417 y[0] = z + pio2_1t; 418 y[1] = (z-y[0])+pio2_1t; 419 } else { /* near pi/2, use 33+33+53 bit pi */ 420 z += pio2_2; 421 y[0] = z + pio2_2t; 422 y[1] = (z-y[0])+pio2_2t; 423 } 424 return -1; 425 } 426 } 427 if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ 428 t = fabsd(x); 429 n = (int) (t*invpio2+half); 430 fn = (double)n; 431 r = t-fn*pio2_1; 432 w = fn*pio2_1t; /* 1st round good to 85 bit */ 433 if(n<32&&ix!=npio2_hw[n-1]) { 434 y[0] = r-w; /* quick check no cancellation */ 435 } else { 436 j = ix>>20; 437 y[0] = r-w; 438 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); 439 if(i>16) { /* 2nd iteration needed, good to 118 */ 440 t = r; 441 w = fn*pio2_2; 442 r = t-w; 443 w = fn*pio2_2t-((t-r)-w); 444 y[0] = r-w; 445 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); 446 if(i>49) { /* 3rd iteration need, 151 bits acc */ 447 t = r; /* will cover all possible cases */ 448 w = fn*pio2_3; 449 r = t-w; 450 w = fn*pio2_3t-((t-r)-w); 451 y[0] = r-w; 452 } 453 } 454 } 455 y[1] = (r-y[0])-w; 456 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} 457 else return n; 458 } 459 /* 460 * all other (large) arguments 461 */ 462 if(ix>=0x7ff00000) { /* x is inf or NaN */ 463 y[0]=y[1]=x-x; return 0; 464 } 465 /* set z = scalbn(|x|,ilogb(x)-23) */ 466 *(1-i0+(int*)&z) = *(1-i0+(int*)&x); 467 e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ 468 *(i0+(int*)&z) = ix - (e0<<20); 469 for(i=0;i<2;i++) { 470 tx[i] = (double)((int)(z)); 471 z = (z-tx[i])*two24A; 472 } 473 tx[2] = z; 474 nx = 3; 475 while(tx[nx-1]==zeroA) nx--; /* skip zero term */ 476 n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); 477 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} 478 return n; 479 } 480 481 482 /* __kernel_sin( x, y, iy) 483 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 484 * Input x is assumed to be bounded by ~pi/4 in magnitude. 485 * Input y is the tail of x. 486 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). 487 * 488 * Algorithm 489 * 1. Since sin(-x) = -sin(x), we need only to consider positive x. 490 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. 491 * 3. sin(x) is approximated by a polynomial of degree 13 on 492 * [0,pi/4] 493 * 3 13 494 * sin(x) ~ x + S1*x + ... + S6*x 495 * where 496 * 497 * |sin(x) 2 4 6 8 10 12 | -58 498 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 499 * | x | 500 * 501 * 4. sin(x+y) = sin(x) + sin'(x')*y 502 * ~ sin(x) + (1-x*x/2)*y 503 * For better accuracy, let 504 * 3 2 2 2 2 505 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) 506 * then 3 2 507 * sin(x) = x + (S1*x + (x *(r-y/2)+y)) 508 */ 509 510 static const double 511 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ 512 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ 513 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ 514 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ 515 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ 516 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ 517 518 static double __kernel_sin(double x, double y, int iy) 519 { 520 double z,r,v; 521 int ix; 522 ix = high(x)&0x7fffffff; /* high word of x */ 523 if(ix<0x3e400000) /* |x| < 2**-27 */ 524 {if((int)x==0) return x;} /* generate inexact */ 525 z = x*x; 526 v = z*x; 527 r = S2+z*(S3+z*(S4+z*(S5+z*S6))); 528 if(iy==0) return x+v*(S1+z*r); 529 else return x-((z*(half*y-v*r)-y)-v*S1); 530 } 531 532 /* 533 * __kernel_cos( x, y ) 534 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 535 * Input x is assumed to be bounded by ~pi/4 in magnitude. 536 * Input y is the tail of x. 537 * 538 * Algorithm 539 * 1. Since cos(-x) = cos(x), we need only to consider positive x. 540 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. 541 * 3. cos(x) is approximated by a polynomial of degree 14 on 542 * [0,pi/4] 543 * 4 14 544 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x 545 * where the remez error is 546 * 547 * | 2 4 6 8 10 12 14 | -58 548 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 549 * | | 550 * 551 * 4 6 8 10 12 14 552 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then 553 * cos(x) = 1 - x*x/2 + r 554 * since cos(x+y) ~ cos(x) - sin(x)*y 555 * ~ cos(x) - x*y, 556 * a correction term is necessary in cos(x) and hence 557 * cos(x+y) = 1 - (x*x/2 - (r - x*y)) 558 * For better accuracy when x > 0.3, let qx = |x|/4 with 559 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. 560 * Then 561 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). 562 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the 563 * magnitude of the latter is at least a quarter of x*x/2, 564 * thus, reducing the rounding error in the subtraction. 565 */ 566 567 static const double 568 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ 569 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ 570 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ 571 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ 572 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ 573 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ 574 575 static double __kernel_cos(double x, double y) 576 { 577 double a,h,z,r,qx=0; 578 int ix; 579 ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/ 580 if(ix<0x3e400000) { /* if x < 2**27 */ 581 if(((int)x)==0) return one; /* generate inexact */ 582 } 583 z = x*x; 584 r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); 585 if(ix < 0x3FD33333) /* if |x| < 0.3 */ 586 return one - (0.5*z - (z*r - x*y)); 587 else { 588 if(ix > 0x3fe90000) { /* x > 0.78125 */ 589 qx = 0.28125; 590 } else { 591 set_high(&qx, ix-0x00200000); /* x/4 */ 592 set_low(&qx, 0); 593 } 594 h = 0.5*z-qx; 595 a = one-qx; 596 return a - (h - (z*r-x*y)); 597 } 598 } 599 600 /* __kernel_tan( x, y, k ) 601 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854 602 * Input x is assumed to be bounded by ~pi/4 in magnitude. 603 * Input y is the tail of x. 604 * Input k indicates whether tan (if k=1) or 605 * -1/tan (if k= -1) is returned. 606 * 607 * Algorithm 608 * 1. Since tan(-x) = -tan(x), we need only to consider positive x. 609 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0. 610 * 3. tan(x) is approximated by a odd polynomial of degree 27 on 611 * [0,0.67434] 612 * 3 27 613 * tan(x) ~ x + T1*x + ... + T13*x 614 * where 615 * 616 * |tan(x) 2 4 26 | -59.2 617 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 618 * | x | 619 * 620 * Note: tan(x+y) = tan(x) + tan'(x)*y 621 * ~ tan(x) + (1+x*x)*y 622 * Therefore, for better accuracy in computing tan(x+y), let 623 * 3 2 2 2 2 624 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) 625 * then 626 * 3 2 627 * tan(x+y) = x + (T1*x + (x *(r+y)+y)) 628 * 629 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then 630 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) 631 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) 632 */ 633 634 static const double 635 pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ 636 pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */ 637 T[] = { 638 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */ 639 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */ 640 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */ 641 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */ 642 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */ 643 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */ 644 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */ 645 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */ 646 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */ 647 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */ 648 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */ 649 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */ 650 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */ 651 }; 652 653 static double __kernel_tan(double x, double y, int iy) 654 { 655 double z,r,v,w,s; 656 int ix,hx; 657 hx = high(x); /* high word of x */ 658 ix = hx&0x7fffffff; /* high word of |x| */ 659 if(ix<0x3e300000) { /* x < 2**-28 */ 660 if((int)x==0) { /* generate inexact */ 661 if (((ix | low(x)) | (iy + 1)) == 0) 662 return one / fabsd(x); 663 else { 664 if (iy == 1) 665 return x; 666 else { /* compute -1 / (x+y) carefully */ 667 double a, t; 668 669 z = w = x + y; 670 set_low(&z, 0); 671 v = y - (z - x); 672 t = a = -one / w; 673 set_low(&t, 0); 674 s = one + t * z; 675 return t + a * (s + t * v); 676 } 677 } 678 } 679 } 680 if(ix>=0x3FE59428) { /* |x|>=0.6744 */ 681 if(hx<0) {x = -x; y = -y;} 682 z = pio4-x; 683 w = pio4lo-y; 684 x = z+w; y = 0.0; 685 } 686 z = x*x; 687 w = z*z; 688 /* Break x^5*(T[1]+x^2*T[2]+...) into 689 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + 690 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) 691 */ 692 r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11])))); 693 v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12]))))); 694 s = z*x; 695 r = y + z*(s*(r+v)+y); 696 r += T[0]*s; 697 w = x+r; 698 if(ix>=0x3FE59428) { 699 v = (double)iy; 700 return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r))); 701 } 702 if(iy==1) return w; 703 else { /* if allow error up to 2 ulp, 704 simply return -1.0/(x+r) here */ 705 /* compute -1.0/(x+r) accurately */ 706 double a,t; 707 z = w; 708 set_low(&z, 0); 709 v = r-(z - x); /* z+v = r+x */ 710 t = a = -1.0/w; /* a = -1.0/w */ 711 set_low(&t, 0); 712 s = 1.0+t*z; 713 return t+a*(s+t*v); 714 } 715 } 716 717 718 //---------------------------------------------------------------------- 719 // 720 // Routines for new sin/cos implementation 721 // 722 //---------------------------------------------------------------------- 723 724 /* sin(x) 725 * Return sine function of x. 726 * 727 * kernel function: 728 * __kernel_sin ... sine function on [-pi/4,pi/4] 729 * __kernel_cos ... cose function on [-pi/4,pi/4] 730 * __ieee754_rem_pio2 ... argument reduction routine 731 * 732 * Method. 733 * Let S,C and T denote the sin, cos and tan respectively on 734 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 735 * in [-pi/4 , +pi/4], and let n = k mod 4. 736 * We have 737 * 738 * n sin(x) cos(x) tan(x) 739 * ---------------------------------------------------------- 740 * 0 S C T 741 * 1 C -S -1/T 742 * 2 -S -C T 743 * 3 -C S -1/T 744 * ---------------------------------------------------------- 745 * 746 * Special cases: 747 * Let trig be any of sin, cos, or tan. 748 * trig(+-INF) is NaN, with signals; 749 * trig(NaN) is that NaN; 750 * 751 * Accuracy: 752 * TRIG(x) returns trig(x) nearly rounded 753 */ 754 755 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x)) 756 double y[2],z=0.0; 757 int n, ix; 758 759 /* High word of x. */ 760 ix = high(x); 761 762 /* |x| ~< pi/4 */ 763 ix &= 0x7fffffff; 764 if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0); 765 766 /* sin(Inf or NaN) is NaN */ 767 else if (ix>=0x7ff00000) return x-x; 768 769 /* argument reduction needed */ 770 else { 771 n = __ieee754_rem_pio2(x,y); 772 switch(n&3) { 773 case 0: return __kernel_sin(y[0],y[1],1); 774 case 1: return __kernel_cos(y[0],y[1]); 775 case 2: return -__kernel_sin(y[0],y[1],1); 776 default: 777 return -__kernel_cos(y[0],y[1]); 778 } 779 } 780 JRT_END 781 782 /* cos(x) 783 * Return cosine function of x. 784 * 785 * kernel function: 786 * __kernel_sin ... sine function on [-pi/4,pi/4] 787 * __kernel_cos ... cosine function on [-pi/4,pi/4] 788 * __ieee754_rem_pio2 ... argument reduction routine 789 * 790 * Method. 791 * Let S,C and T denote the sin, cos and tan respectively on 792 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 793 * in [-pi/4 , +pi/4], and let n = k mod 4. 794 * We have 795 * 796 * n sin(x) cos(x) tan(x) 797 * ---------------------------------------------------------- 798 * 0 S C T 799 * 1 C -S -1/T 800 * 2 -S -C T 801 * 3 -C S -1/T 802 * ---------------------------------------------------------- 803 * 804 * Special cases: 805 * Let trig be any of sin, cos, or tan. 806 * trig(+-INF) is NaN, with signals; 807 * trig(NaN) is that NaN; 808 * 809 * Accuracy: 810 * TRIG(x) returns trig(x) nearly rounded 811 */ 812 813 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x)) 814 double y[2],z=0.0; 815 int n, ix; 816 817 /* High word of x. */ 818 ix = high(x); 819 820 /* |x| ~< pi/4 */ 821 ix &= 0x7fffffff; 822 if(ix <= 0x3fe921fb) return __kernel_cos(x,z); 823 824 /* cos(Inf or NaN) is NaN */ 825 else if (ix>=0x7ff00000) return x-x; 826 827 /* argument reduction needed */ 828 else { 829 n = __ieee754_rem_pio2(x,y); 830 switch(n&3) { 831 case 0: return __kernel_cos(y[0],y[1]); 832 case 1: return -__kernel_sin(y[0],y[1],1); 833 case 2: return -__kernel_cos(y[0],y[1]); 834 default: 835 return __kernel_sin(y[0],y[1],1); 836 } 837 } 838 JRT_END 839 840 /* tan(x) 841 * Return tangent function of x. 842 * 843 * kernel function: 844 * __kernel_tan ... tangent function on [-pi/4,pi/4] 845 * __ieee754_rem_pio2 ... argument reduction routine 846 * 847 * Method. 848 * Let S,C and T denote the sin, cos and tan respectively on 849 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 850 * in [-pi/4 , +pi/4], and let n = k mod 4. 851 * We have 852 * 853 * n sin(x) cos(x) tan(x) 854 * ---------------------------------------------------------- 855 * 0 S C T 856 * 1 C -S -1/T 857 * 2 -S -C T 858 * 3 -C S -1/T 859 * ---------------------------------------------------------- 860 * 861 * Special cases: 862 * Let trig be any of sin, cos, or tan. 863 * trig(+-INF) is NaN, with signals; 864 * trig(NaN) is that NaN; 865 * 866 * Accuracy: 867 * TRIG(x) returns trig(x) nearly rounded 868 */ 869 870 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x)) 871 double y[2],z=0.0; 872 int n, ix; 873 874 /* High word of x. */ 875 ix = high(x); 876 877 /* |x| ~< pi/4 */ 878 ix &= 0x7fffffff; 879 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1); 880 881 /* tan(Inf or NaN) is NaN */ 882 else if (ix>=0x7ff00000) return x-x; /* NaN */ 883 884 /* argument reduction needed */ 885 else { 886 n = __ieee754_rem_pio2(x,y); 887 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even 888 -1 -- n odd */ 889 } 890 JRT_END