New src/hotspot/share/runtime/sharedRuntimeTrig.cpp

   1 /*
   2  * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jni.h"
  27 #include "runtime/interfaceSupport.inline.hpp"
  28 #include "runtime/sharedRuntime.hpp"
  29 #include "runtime/sharedRuntimeMath.hpp"
  30 
  31 // This file contains copies of the fdlibm routines used by
  32 // StrictMath. It turns out that it is almost always required to use
  33 // these runtime routines; the Intel CPU doesn't meet the Java
  34 // specification for sin/cos outside a certain limited argument range,
  35 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
  36 // also turns out that avoiding the indirect call through function
  37 // pointer out to libjava.so in SharedRuntime speeds these routines up
  38 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
  39 
  40 /*
  41  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
  42  * double x[],y[]; int e0,nx,prec; int ipio2[];
  43  *
  44  * __kernel_rem_pio2 return the last three digits of N with
  45  *              y = x - N*pi/2
  46  * so that |y| < pi/2.
  47  *
  48  * The method is to compute the integer (mod 8) and fraction parts of
  49  * (2/pi)*x without doing the full multiplication. In general we
  50  * skip the part of the product that are known to be a huge integer (
  51  * more accurately, = 0 mod 8 ). Thus the number of operations are
  52  * independent of the exponent of the input.
  53  *
  54  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
  55  *
  56  * Input parameters:
  57  *      x[]     The input value (must be positive) is broken into nx
  58  *              pieces of 24-bit integers in double precision format.
  59  *              x[i] will be the i-th 24 bit of x. The scaled exponent
  60  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
  61  *              match x's up to 24 bits.
  62  *
  63  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
  64  *                      e0 = ilogb(z)-23
  65  *                      z  = scalbn(z,-e0)
  66  *              for i = 0,1,2
  67  *                      x[i] = floor(z)
  68  *                      z    = (z-x[i])*2**24
  69  *
  70  *
  71  *      y[]     ouput result in an array of double precision numbers.
  72  *              The dimension of y[] is:
  73  *                      24-bit  precision       1
  74  *                      53-bit  precision       2
  75  *                      64-bit  precision       2
  76  *                      113-bit precision       3
  77  *              The actual value is the sum of them. Thus for 113-bit
  78  *              precsion, one may have to do something like:
  79  *
  80  *              long double t,w,r_head, r_tail;
  81  *              t = (long double)y[2] + (long double)y[1];
  82  *              w = (long double)y[0];
  83  *              r_head = t+w;
  84  *              r_tail = w - (r_head - t);
  85  *
  86  *      e0      The exponent of x[0]
  87  *
  88  *      nx      dimension of x[]
  89  *
  90  *      prec    an interger indicating the precision:
  91  *                      0       24  bits (single)
  92  *                      1       53  bits (double)
  93  *                      2       64  bits (extended)
  94  *                      3       113 bits (quad)
  95  *
  96  *      ipio2[]
  97  *              integer array, contains the (24*i)-th to (24*i+23)-th
  98  *              bit of 2/pi after binary point. The corresponding
  99  *              floating value is
 100  *
 101  *                      ipio2[i] * 2^(-24(i+1)).
 102  *
 103  * External function:
 104  *      double scalbn(), floor();
 105  *
 106  *
 107  * Here is the description of some local variables:
 108  *
 109  *      jk      jk+1 is the initial number of terms of ipio2[] needed
 110  *              in the computation. The recommended value is 2,3,4,
 111  *              6 for single, double, extended,and quad.
 112  *
 113  *      jz      local integer variable indicating the number of
 114  *              terms of ipio2[] used.
 115  *
 116  *      jx      nx - 1
 117  *
 118  *      jv      index for pointing to the suitable ipio2[] for the
 119  *              computation. In general, we want
 120  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
 121  *              is an integer. Thus
 122  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
 123  *              Hence jv = max(0,(e0-3)/24).
 124  *
 125  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
 126  *
 127  *      q[]     double array with integral value, representing the
 128  *              24-bits chunk of the product of x and 2/pi.
 129  *
 130  *      q0      the corresponding exponent of q[0]. Note that the
 131  *              exponent for q[i] would be q0-24*i.
 132  *
 133  *      PIo2[]  double precision array, obtained by cutting pi/2
 134  *              into 24 bits chunks.
 135  *
 136  *      f[]     ipio2[] in floating point
 137  *
 138  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
 139  *
 140  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
 141  *
 142  *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
 143  *              it also indicates the *sign* of the result.
 144  *
 145  */
 146 
 147 
 148 /*
 149  * Constants:
 150  * The hexadecimal values are the intended ones for the following
 151  * constants. The decimal values may be used, provided that the
 152  * compiler will convert from decimal to binary accurately enough
 153  * to produce the hexadecimal values shown.
 154  */
 155 
 156 
 157 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
 158 
 159 static const double PIo2[] = {
 160   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
 161   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
 162   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
 163   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
 164   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
 165   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
 166   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
 167   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
 168 };
 169 
 170 static const double
 171 zeroB   = 0.0,
 172 one     = 1.0,
 173 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
 174 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
 175 
 176 static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
 177   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
 178   double z,fw,f[20],fq[20],q[20];
 179 
 180   /* initialize jk*/
 181   jk = init_jk[prec];
 182   jp = jk;
 183 
 184   /* determine jx,jv,q0, note that 3>q0 */
 185   jx =  nx-1;
 186   jv = (e0-3)/24; if(jv<0) jv=0;
 187   q0 =  e0-24*(jv+1);
 188 
 189   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 190   j = jv-jx; m = jx+jk;
 191   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
 192 
 193   /* compute q[0],q[1],...q[jk] */
 194   for (i=0;i<=jk;i++) {
 195     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
 196   }
 197 
 198   jz = jk;
 199 recompute:
 200   /* distill q[] into iq[] reversingly */
 201   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
 202     fw    =  (double)((int)(twon24* z));
 203     iq[i] =  (int)(z-two24B*fw);
 204     z     =  q[j-1]+fw;
 205   }
 206 
 207   /* compute n */
 208   z  = scalbnA(z,q0);           /* actual value of z */
 209   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
 210   n  = (int) z;
 211   z -= (double)n;
 212   ih = 0;
 213   if(q0>0) {    /* need iq[jz-1] to determine n */
 214     i  = (iq[jz-1]>>(24-q0)); n += i;
 215     iq[jz-1] -= i<<(24-q0);
 216     ih = iq[jz-1]>>(23-q0);
 217   }
 218   else if(q0==0) ih = iq[jz-1]>>23;
 219   else if(z>=0.5) ih=2;
 220 
 221   if(ih>0) {    /* q > 0.5 */
 222     n += 1; carry = 0;
 223     for(i=0;i<jz ;i++) {        /* compute 1-q */
 224       j = iq[i];
 225       if(carry==0) {
 226         if(j!=0) {
 227           carry = 1; iq[i] = 0x1000000- j;
 228         }
 229       } else  iq[i] = 0xffffff - j;
 230     }
 231     if(q0>0) {          /* rare case: chance is 1 in 12 */
 232       switch(q0) {
 233       case 1:
 234         iq[jz-1] &= 0x7fffff; break;
 235       case 2:
 236         iq[jz-1] &= 0x3fffff; break;
 237       }
 238     }
 239     if(ih==2) {
 240       z = one - z;
 241       if(carry!=0) z -= scalbnA(one,q0);
 242     }
 243   }
 244 
 245   /* check if recomputation is needed */
 246   if(z==zeroB) {
 247     j = 0;
 248     for (i=jz-1;i>=jk;i--) j |= iq[i];
 249     if(j==0) { /* need recomputation */
 250       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
 251 
 252       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
 253         f[jx+i] = (double) ipio2[jv+i];
 254         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
 255         q[i] = fw;
 256       }
 257       jz += k;
 258       goto recompute;
 259     }
 260   }
 261 
 262   /* chop off zero terms */
 263   if(z==0.0) {
 264     jz -= 1; q0 -= 24;
 265     while(iq[jz]==0) { jz--; q0-=24;}
 266   } else { /* break z into 24-bit if necessary */
 267     z = scalbnA(z,-q0);
 268     if(z>=two24B) {
 269       fw = (double)((int)(twon24*z));
 270       iq[jz] = (int)(z-two24B*fw);
 271       jz += 1; q0 += 24;
 272       iq[jz] = (int) fw;
 273     } else iq[jz] = (int) z ;
 274   }
 275 
 276   /* convert integer "bit" chunk to floating-point value */
 277   fw = scalbnA(one,q0);
 278   for(i=jz;i>=0;i--) {
 279     q[i] = fw*(double)iq[i]; fw*=twon24;
 280   }
 281 
 282   /* compute PIo2[0,...,jp]*q[jz,...,0] */
 283   for(i=jz;i>=0;i--) {
 284     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
 285     fq[jz-i] = fw;
 286   }
 287 
 288   /* compress fq[] into y[] */
 289   switch(prec) {
 290   case 0:
 291     fw = 0.0;
 292     for (i=jz;i>=0;i--) fw += fq[i];
 293     y[0] = (ih==0)? fw: -fw;
 294     break;
 295   case 1:
 296   case 2:
 297     fw = 0.0;
 298     for (i=jz;i>=0;i--) fw += fq[i];
 299     y[0] = (ih==0)? fw: -fw;
 300     fw = fq[0]-fw;
 301     for (i=1;i<=jz;i++) fw += fq[i];
 302     y[1] = (ih==0)? fw: -fw;
 303     break;
 304   case 3:       /* painful */
 305     for (i=jz;i>0;i--) {
 306       fw      = fq[i-1]+fq[i];
 307       fq[i]  += fq[i-1]-fw;
 308       fq[i-1] = fw;
 309     }
 310     for (i=jz;i>1;i--) {
 311       fw      = fq[i-1]+fq[i];
 312       fq[i]  += fq[i-1]-fw;
 313       fq[i-1] = fw;
 314     }
 315     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
 316     if(ih==0) {
 317       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
 318     } else {
 319       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
 320     }
 321   }
 322   return n&7;
 323 }
 324 
 325 
 326 /*
 327  * ====================================================
 328  * Copyright (c) 1993 Oracle and/or its affiliates. All rights reserved.
 329  *
 330  * Developed at SunPro, a Sun Microsystems, Inc. business.
 331  * Permission to use, copy, modify, and distribute this
 332  * software is freely granted, provided that this notice
 333  * is preserved.
 334  * ====================================================
 335  *
 336  */
 337 
 338 /* __ieee754_rem_pio2(x,y)
 339  *
 340  * return the remainder of x rem pi/2 in y[0]+y[1]
 341  * use __kernel_rem_pio2()
 342  */
 343 
 344 /*
 345  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
 346  */
 347 static const int two_over_pi[] = {
 348   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 349   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 350   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
 351   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
 352   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
 353   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
 354   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
 355   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
 356   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
 357   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
 358   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 359 };
 360 
 361 static const int npio2_hw[] = {
 362   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
 363   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
 364   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
 365   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
 366   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
 367   0x404858EB, 0x404921FB,
 368 };
 369 
 370 /*
 371  * invpio2:  53 bits of 2/pi
 372  * pio2_1:   first  33 bit of pi/2
 373  * pio2_1t:  pi/2 - pio2_1
 374  * pio2_2:   second 33 bit of pi/2
 375  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
 376  * pio2_3:   third  33 bit of pi/2
 377  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
 378  */
 379 
 380 static const double
 381 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
 382 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
 383 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
 384 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
 385 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
 386 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
 387 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
 388 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
 389 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
 390 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 391 
 392 static int __ieee754_rem_pio2(double x, double *y) {
 393   double z,w,t,r,fn;
 394   double tx[3];
 395   int e0,i,j,nx,n,ix,hx,i0;
 396 
 397   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
 398   hx = *(i0+(int*)&x);          /* high word of x */
 399   ix = hx&0x7fffffff;
 400   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
 401     {y[0] = x; y[1] = 0; return 0;}
 402   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
 403     if(hx>0) {
 404       z = x - pio2_1;
 405       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
 406         y[0] = z - pio2_1t;
 407         y[1] = (z-y[0])-pio2_1t;
 408       } else {                /* near pi/2, use 33+33+53 bit pi */
 409         z -= pio2_2;
 410         y[0] = z - pio2_2t;
 411         y[1] = (z-y[0])-pio2_2t;
 412       }
 413       return 1;
 414     } else {    /* negative x */
 415       z = x + pio2_1;
 416       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
 417         y[0] = z + pio2_1t;
 418         y[1] = (z-y[0])+pio2_1t;
 419       } else {                /* near pi/2, use 33+33+53 bit pi */
 420         z += pio2_2;
 421         y[0] = z + pio2_2t;
 422         y[1] = (z-y[0])+pio2_2t;
 423       }
 424       return -1;
 425     }
 426   }
 427   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
 428     t  = fabsd(x);
 429     n  = (int) (t*invpio2+half);
 430     fn = (double)n;
 431     r  = t-fn*pio2_1;
 432     w  = fn*pio2_1t;    /* 1st round good to 85 bit */
 433     if(n<32&&ix!=npio2_hw[n-1]) {
 434       y[0] = r-w;       /* quick check no cancellation */
 435     } else {
 436       j  = ix>>20;
 437       y[0] = r-w;
 438       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
 439       if(i>16) {  /* 2nd iteration needed, good to 118 */
 440         t  = r;
 441         w  = fn*pio2_2;
 442         r  = t-w;
 443         w  = fn*pio2_2t-((t-r)-w);
 444         y[0] = r-w;
 445         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
 446         if(i>49)  {     /* 3rd iteration need, 151 bits acc */
 447           t  = r;       /* will cover all possible cases */
 448           w  = fn*pio2_3;
 449           r  = t-w;
 450           w  = fn*pio2_3t-((t-r)-w);
 451           y[0] = r-w;
 452         }
 453       }
 454     }
 455     y[1] = (r-y[0])-w;
 456     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
 457     else         return n;
 458   }
 459   /*
 460    * all other (large) arguments
 461    */
 462   if(ix>=0x7ff00000) {          /* x is inf or NaN */
 463     y[0]=y[1]=x-x; return 0;
 464   }
 465   /* set z = scalbn(|x|,ilogb(x)-23) */
 466   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
 467   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
 468   *(i0+(int*)&z) = ix - (e0<<20);
 469   for(i=0;i<2;i++) {
 470     tx[i] = (double)((int)(z));
 471     z     = (z-tx[i])*two24A;
 472   }
 473   tx[2] = z;
 474   nx = 3;
 475   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
 476   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
 477   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
 478   return n;
 479 }
 480 
 481 
 482 /* __kernel_sin( x, y, iy)
 483  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
 484  * Input x is assumed to be bounded by ~pi/4 in magnitude.
 485  * Input y is the tail of x.
 486  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
 487  *
 488  * Algorithm
 489  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
 490  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
 491  *      3. sin(x) is approximated by a polynomial of degree 13 on
 492  *         [0,pi/4]
 493  *                               3            13
 494  *              sin(x) ~ x + S1*x + ... + S6*x
 495  *         where
 496  *
 497  *      |sin(x)         2     4     6     8     10     12  |     -58
 498  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
 499  *      |  x                                               |
 500  *
 501  *      4. sin(x+y) = sin(x) + sin'(x')*y
 502  *                  ~ sin(x) + (1-x*x/2)*y
 503  *         For better accuracy, let
 504  *                   3      2      2      2      2
 505  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
 506  *         then                   3    2
 507  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
 508  */
 509 
 510 static const double
 511 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
 512 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
 513 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
 514 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
 515 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
 516 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
 517 
 518 static double __kernel_sin(double x, double y, int iy)
 519 {
 520         double z,r,v;
 521         int ix;
 522         ix = high(x)&0x7fffffff;                /* high word of x */
 523         if(ix<0x3e400000)                       /* |x| < 2**-27 */
 524            {if((int)x==0) return x;}            /* generate inexact */
 525         z       =  x*x;
 526         v       =  z*x;
 527         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
 528         if(iy==0) return x+v*(S1+z*r);
 529         else      return x-((z*(half*y-v*r)-y)-v*S1);
 530 }
 531 
 532 /*
 533  * __kernel_cos( x,  y )
 534  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
 535  * Input x is assumed to be bounded by ~pi/4 in magnitude.
 536  * Input y is the tail of x.
 537  *
 538  * Algorithm
 539  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
 540  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
 541  *      3. cos(x) is approximated by a polynomial of degree 14 on
 542  *         [0,pi/4]
 543  *                                       4            14
 544  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
 545  *         where the remez error is
 546  *
 547  *      |              2     4     6     8     10    12     14 |     -58
 548  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
 549  *      |                                                      |
 550  *
 551  *                     4     6     8     10    12     14
 552  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
 553  *             cos(x) = 1 - x*x/2 + r
 554  *         since cos(x+y) ~ cos(x) - sin(x)*y
 555  *                        ~ cos(x) - x*y,
 556  *         a correction term is necessary in cos(x) and hence
 557  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
 558  *         For better accuracy when x > 0.3, let qx = |x|/4 with
 559  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
 560  *         Then
 561  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
 562  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
 563  *         magnitude of the latter is at least a quarter of x*x/2,
 564  *         thus, reducing the rounding error in the subtraction.
 565  */
 566 
 567 static const double
 568 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
 569 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
 570 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
 571 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
 572 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
 573 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
 574 
 575 static double __kernel_cos(double x, double y)
 576 {
 577   double a,h,z,r,qx=0;
 578   int ix;
 579   ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
 580   if(ix<0x3e400000) {                   /* if x < 2**27 */
 581     if(((int)x)==0) return one;         /* generate inexact */
 582   }
 583   z  = x*x;
 584   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
 585   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
 586     return one - (0.5*z - (z*r - x*y));
 587   else {
 588     if(ix > 0x3fe90000) {               /* x > 0.78125 */
 589       qx = 0.28125;
 590     } else {
 591       set_high(&qx, ix-0x00200000); /* x/4 */
 592       set_low(&qx, 0);
 593     }
 594     h = 0.5*z-qx;
 595     a = one-qx;
 596     return a - (h - (z*r-x*y));
 597   }
 598 }
 599 
 600 /* __kernel_tan( x, y, k )
 601  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
 602  * Input x is assumed to be bounded by ~pi/4 in magnitude.
 603  * Input y is the tail of x.
 604  * Input k indicates whether tan (if k=1) or
 605  * -1/tan (if k= -1) is returned.
 606  *
 607  * Algorithm
 608  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
 609  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
 610  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
 611  *         [0,0.67434]
 612  *                               3             27
 613  *              tan(x) ~ x + T1*x + ... + T13*x
 614  *         where
 615  *
 616  *              |tan(x)         2     4            26   |     -59.2
 617  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
 618  *              |  x                                    |
 619  *
 620  *         Note: tan(x+y) = tan(x) + tan'(x)*y
 621  *                        ~ tan(x) + (1+x*x)*y
 622  *         Therefore, for better accuracy in computing tan(x+y), let
 623  *                   3      2      2       2       2
 624  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
 625  *         then
 626  *                                  3    2
 627  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
 628  *
 629  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
 630  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
 631  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
 632  */
 633 
 634 static const double
 635 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
 636 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
 637 T[] =  {
 638   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
 639   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
 640   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
 641   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
 642   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
 643   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
 644   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
 645   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
 646   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
 647   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
 648   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
 649  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
 650   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
 651 };
 652 
 653 static double __kernel_tan(double x, double y, int iy)
 654 {
 655   double z,r,v,w,s;
 656   int ix,hx;
 657   hx = high(x);           /* high word of x */
 658   ix = hx&0x7fffffff;     /* high word of |x| */
 659   if(ix<0x3e300000) {                     /* x < 2**-28 */
 660     if((int)x==0) {                       /* generate inexact */
 661       if (((ix | low(x)) | (iy + 1)) == 0)
 662         return one / fabsd(x);
 663       else {
 664         if (iy == 1)
 665           return x;
 666         else {    /* compute -1 / (x+y) carefully */
 667           double a, t;
 668 
 669           z = w = x + y;
 670           set_low(&z, 0);
 671           v = y - (z - x);
 672           t = a = -one / w;
 673           set_low(&t, 0);
 674           s = one + t * z;
 675           return t + a * (s + t * v);
 676         }
 677       }
 678     }
 679   }
 680   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
 681     if(hx<0) {x = -x; y = -y;}
 682     z = pio4-x;
 683     w = pio4lo-y;
 684     x = z+w; y = 0.0;
 685   }
 686   z       =  x*x;
 687   w       =  z*z;
 688   /* Break x^5*(T[1]+x^2*T[2]+...) into
 689    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
 690    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
 691    */
 692   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
 693   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
 694   s = z*x;
 695   r = y + z*(s*(r+v)+y);
 696   r += T[0]*s;
 697   w = x+r;
 698   if(ix>=0x3FE59428) {
 699     v = (double)iy;
 700     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
 701   }
 702   if(iy==1) return w;
 703   else {          /* if allow error up to 2 ulp,
 704                      simply return -1.0/(x+r) here */
 705     /*  compute -1.0/(x+r) accurately */
 706     double a,t;
 707     z  = w;
 708     set_low(&z, 0);
 709     v  = r-(z - x);     /* z+v = r+x */
 710     t = a  = -1.0/w;    /* a = -1.0/w */
 711     set_low(&t, 0);
 712     s  = 1.0+t*z;
 713     return t+a*(s+t*v);
 714   }
 715 }
 716 
 717 
 718 //----------------------------------------------------------------------
 719 //
 720 // Routines for new sin/cos implementation
 721 //
 722 //----------------------------------------------------------------------
 723 
 724 /* sin(x)
 725  * Return sine function of x.
 726  *
 727  * kernel function:
 728  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
 729  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
 730  *      __ieee754_rem_pio2      ... argument reduction routine
 731  *
 732  * Method.
 733  *      Let S,C and T denote the sin, cos and tan respectively on
 734  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
 735  *      in [-pi/4 , +pi/4], and let n = k mod 4.
 736  *      We have
 737  *
 738  *          n        sin(x)      cos(x)        tan(x)
 739  *     ----------------------------------------------------------
 740  *          0          S           C             T
 741  *          1          C          -S            -1/T
 742  *          2         -S          -C             T
 743  *          3         -C           S            -1/T
 744  *     ----------------------------------------------------------
 745  *
 746  * Special cases:
 747  *      Let trig be any of sin, cos, or tan.
 748  *      trig(+-INF)  is NaN, with signals;
 749  *      trig(NaN)    is that NaN;
 750  *
 751  * Accuracy:
 752  *      TRIG(x) returns trig(x) nearly rounded
 753  */
 754 
 755 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
 756   double y[2],z=0.0;
 757   int n, ix;
 758 
 759   /* High word of x. */
 760   ix = high(x);
 761 
 762   /* |x| ~< pi/4 */
 763   ix &= 0x7fffffff;
 764   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
 765 
 766   /* sin(Inf or NaN) is NaN */
 767   else if (ix>=0x7ff00000) return x-x;
 768 
 769   /* argument reduction needed */
 770   else {
 771     n = __ieee754_rem_pio2(x,y);
 772     switch(n&3) {
 773     case 0: return  __kernel_sin(y[0],y[1],1);
 774     case 1: return  __kernel_cos(y[0],y[1]);
 775     case 2: return -__kernel_sin(y[0],y[1],1);
 776     default:
 777       return -__kernel_cos(y[0],y[1]);
 778     }
 779   }
 780 JRT_END
 781 
 782 /* cos(x)
 783  * Return cosine function of x.
 784  *
 785  * kernel function:
 786  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
 787  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
 788  *      __ieee754_rem_pio2      ... argument reduction routine
 789  *
 790  * Method.
 791  *      Let S,C and T denote the sin, cos and tan respectively on
 792  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
 793  *      in [-pi/4 , +pi/4], and let n = k mod 4.
 794  *      We have
 795  *
 796  *          n        sin(x)      cos(x)        tan(x)
 797  *     ----------------------------------------------------------
 798  *          0          S           C             T
 799  *          1          C          -S            -1/T
 800  *          2         -S          -C             T
 801  *          3         -C           S            -1/T
 802  *     ----------------------------------------------------------
 803  *
 804  * Special cases:
 805  *      Let trig be any of sin, cos, or tan.
 806  *      trig(+-INF)  is NaN, with signals;
 807  *      trig(NaN)    is that NaN;
 808  *
 809  * Accuracy:
 810  *      TRIG(x) returns trig(x) nearly rounded
 811  */
 812 
 813 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
 814   double y[2],z=0.0;
 815   int n, ix;
 816 
 817   /* High word of x. */
 818   ix = high(x);
 819 
 820   /* |x| ~< pi/4 */
 821   ix &= 0x7fffffff;
 822   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
 823 
 824   /* cos(Inf or NaN) is NaN */
 825   else if (ix>=0x7ff00000) return x-x;
 826 
 827   /* argument reduction needed */
 828   else {
 829     n = __ieee754_rem_pio2(x,y);
 830     switch(n&3) {
 831     case 0: return  __kernel_cos(y[0],y[1]);
 832     case 1: return -__kernel_sin(y[0],y[1],1);
 833     case 2: return -__kernel_cos(y[0],y[1]);
 834     default:
 835       return  __kernel_sin(y[0],y[1],1);
 836     }
 837   }
 838 JRT_END
 839 
 840 /* tan(x)
 841  * Return tangent function of x.
 842  *
 843  * kernel function:
 844  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
 845  *      __ieee754_rem_pio2      ... argument reduction routine
 846  *
 847  * Method.
 848  *      Let S,C and T denote the sin, cos and tan respectively on
 849  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
 850  *      in [-pi/4 , +pi/4], and let n = k mod 4.
 851  *      We have
 852  *
 853  *          n        sin(x)      cos(x)        tan(x)
 854  *     ----------------------------------------------------------
 855  *          0          S           C             T
 856  *          1          C          -S            -1/T
 857  *          2         -S          -C             T
 858  *          3         -C           S            -1/T
 859  *     ----------------------------------------------------------
 860  *
 861  * Special cases:
 862  *      Let trig be any of sin, cos, or tan.
 863  *      trig(+-INF)  is NaN, with signals;
 864  *      trig(NaN)    is that NaN;
 865  *
 866  * Accuracy:
 867  *      TRIG(x) returns trig(x) nearly rounded
 868  */
 869 
 870 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
 871   double y[2],z=0.0;
 872   int n, ix;
 873 
 874   /* High word of x. */
 875   ix = high(x);
 876 
 877   /* |x| ~< pi/4 */
 878   ix &= 0x7fffffff;
 879   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
 880 
 881   /* tan(Inf or NaN) is NaN */
 882   else if (ix>=0x7ff00000) return x-x;            /* NaN */
 883 
 884   /* argument reduction needed */
 885   else {
 886     n = __ieee754_rem_pio2(x,y);
 887     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
 888                                                      -1 -- n odd */
 889   }
 890 JRT_END