src/share/vm/runtime/sharedRuntimeTrig.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File bug_jdk6961433 Sdiff src/share/vm/runtime

src/share/vm/runtime/sharedRuntimeTrig.cpp

Print this page




   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "prims/jni.h"
  27 #include "runtime/interfaceSupport.hpp"
  28 #include "runtime/sharedRuntime.hpp"

  29 
  30 // This file contains copies of the fdlibm routines used by
  31 // StrictMath. It turns out that it is almost always required to use
  32 // these runtime routines; the Intel CPU doesn't meet the Java
  33 // specification for sin/cos outside a certain limited argument range,
  34 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
  35 // also turns out that avoiding the indirect call through function
  36 // pointer out to libjava.so in SharedRuntime speeds these routines up
  37 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
  38 
  39 // Enabling optimizations in this file causes incorrect code to be
  40 // generated; can not figure out how to turn down optimization for one
  41 // file in the IDE on Windows
  42 #ifdef WIN32
  43 # pragma optimize ( "", off )
  44 #endif
  45 
  46 /* The above workaround now causes more problems with the latest MS compiler.
  47  * Visual Studio 2010's /GS option tries to guard against buffer overruns.
  48  * /GS is on by default if you specify optimizations, which we do globally
  49  * via /W3 /O2. However the above selective turning off of optimizations means
  50  * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
  51  * then the compilation fails. There are several possible solutions
  52  * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
  53  * (2) Stop treating warnings as errors - would be a backward step
  54  * (3) Disable /GS - may help performance but you lose the security checks
  55  * (4) Disable the warning with "#pragma warning( disable : 4748 )"
  56  * (5) Disable planting the code with  __declspec(safebuffers)
  57  * I've opted for (5) although we should investigate the local performance
  58  * benefits of (1) and global performance benefit of (3).
  59  */
  60 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
  61 #define SAFEBUF __declspec(safebuffers)
  62 #else
  63 #define SAFEBUF
  64 #endif
  65 
  66 #include "runtime/sharedRuntimeMath.hpp"
  67 
  68 /*
  69  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
  70  * double x[],y[]; int e0,nx,prec; int ipio2[];
  71  *
  72  * __kernel_rem_pio2 return the last three digits of N with
  73  *              y = x - N*pi/2
  74  * so that |y| < pi/2.
  75  *
  76  * The method is to compute the integer (mod 8) and fraction parts of
  77  * (2/pi)*x without doing the full multiplication. In general we
  78  * skip the part of the product that are known to be a huge integer (
  79  * more accurately, = 0 mod 8 ). Thus the number of operations are
  80  * independent of the exponent of the input.
  81  *
  82  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
  83  *
  84  * Input parameters:
  85  *      x[]     The input value (must be positive) is broken into nx
  86  *              pieces of 24-bit integers in double precision format.
  87  *              x[i] will be the i-th 24 bit of x. The scaled exponent


 184 
 185 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
 186 
 187 static const double PIo2[] = {
 188   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
 189   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
 190   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
 191   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
 192   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
 193   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
 194   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
 195   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
 196 };
 197 
 198 static const double
 199 zeroB   = 0.0,
 200 one     = 1.0,
 201 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
 202 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
 203 
 204 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
 205   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
 206   double z,fw,f[20],fq[20],q[20];
 207 
 208   /* initialize jk*/
 209   jk = init_jk[prec];
 210   jp = jk;
 211 
 212   /* determine jx,jv,q0, note that 3>q0 */
 213   jx =  nx-1;
 214   jv = (e0-3)/24; if(jv<0) jv=0;
 215   q0 =  e0-24*(jv+1);
 216 
 217   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 218   j = jv-jx; m = jx+jk;
 219   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
 220 
 221   /* compute q[0],q[1],...q[jk] */
 222   for (i=0;i<=jk;i++) {
 223     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
 224   }


 400  * pio2_1:   first  33 bit of pi/2
 401  * pio2_1t:  pi/2 - pio2_1
 402  * pio2_2:   second 33 bit of pi/2
 403  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
 404  * pio2_3:   third  33 bit of pi/2
 405  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
 406  */
 407 
 408 static const double
 409 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
 410 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
 411 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
 412 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
 413 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
 414 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
 415 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
 416 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
 417 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
 418 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 419 
 420 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
 421   double z,w,t,r,fn;
 422   double tx[3];
 423   int e0,i,j,nx,n,ix,hx,i0;
 424 
 425   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
 426   hx = *(i0+(int*)&x);          /* high word of x */
 427   ix = hx&0x7fffffff;
 428   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
 429     {y[0] = x; y[1] = 0; return 0;}
 430   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
 431     if(hx>0) {
 432       z = x - pio2_1;
 433       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
 434         y[0] = z - pio2_1t;
 435         y[1] = (z-y[0])-pio2_1t;
 436       } else {                /* near pi/2, use 33+33+53 bit pi */
 437         z -= pio2_2;
 438         y[0] = z - pio2_2t;
 439         y[1] = (z-y[0])-pio2_2t;
 440       }


 899   double y[2],z=0.0;
 900   int n, ix;
 901 
 902   /* High word of x. */
 903   ix = __HI(x);
 904 
 905   /* |x| ~< pi/4 */
 906   ix &= 0x7fffffff;
 907   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
 908 
 909   /* tan(Inf or NaN) is NaN */
 910   else if (ix>=0x7ff00000) return x-x;            /* NaN */
 911 
 912   /* argument reduction needed */
 913   else {
 914     n = __ieee754_rem_pio2(x,y);
 915     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
 916                                                      -1 -- n odd */
 917   }
 918 JRT_END
 919 
 920 
 921 #ifdef WIN32
 922 # pragma optimize ( "", on )
 923 #endif


   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "prims/jni.h"
  27 #include "runtime/interfaceSupport.hpp"
  28 #include "runtime/sharedRuntime.hpp"
  29 #include "runtime/sharedRuntimeMath.hpp"
  30 
  31 // This file contains copies of the fdlibm routines used by
  32 // StrictMath. It turns out that it is almost always required to use
  33 // these runtime routines; the Intel CPU doesn't meet the Java
  34 // specification for sin/cos outside a certain limited argument range,
  35 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
  36 // also turns out that avoiding the indirect call through function
  37 // pointer out to libjava.so in SharedRuntime speeds these routines up
  38 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
  39 





























  40 /*
  41  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
  42  * double x[],y[]; int e0,nx,prec; int ipio2[];
  43  *
  44  * __kernel_rem_pio2 return the last three digits of N with
  45  *              y = x - N*pi/2
  46  * so that |y| < pi/2.
  47  *
  48  * The method is to compute the integer (mod 8) and fraction parts of
  49  * (2/pi)*x without doing the full multiplication. In general we
  50  * skip the part of the product that are known to be a huge integer (
  51  * more accurately, = 0 mod 8 ). Thus the number of operations are
  52  * independent of the exponent of the input.
  53  *
  54  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
  55  *
  56  * Input parameters:
  57  *      x[]     The input value (must be positive) is broken into nx
  58  *              pieces of 24-bit integers in double precision format.
  59  *              x[i] will be the i-th 24 bit of x. The scaled exponent


 156 
 157 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
 158 
 159 static const double PIo2[] = {
 160   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
 161   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
 162   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
 163   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
 164   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
 165   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
 166   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
 167   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
 168 };
 169 
 170 static const double
 171 zeroB   = 0.0,
 172 one     = 1.0,
 173 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
 174 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
 175 
 176 static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
 177   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
 178   double z,fw,f[20],fq[20],q[20];
 179 
 180   /* initialize jk*/
 181   jk = init_jk[prec];
 182   jp = jk;
 183 
 184   /* determine jx,jv,q0, note that 3>q0 */
 185   jx =  nx-1;
 186   jv = (e0-3)/24; if(jv<0) jv=0;
 187   q0 =  e0-24*(jv+1);
 188 
 189   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 190   j = jv-jx; m = jx+jk;
 191   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
 192 
 193   /* compute q[0],q[1],...q[jk] */
 194   for (i=0;i<=jk;i++) {
 195     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
 196   }


 372  * pio2_1:   first  33 bit of pi/2
 373  * pio2_1t:  pi/2 - pio2_1
 374  * pio2_2:   second 33 bit of pi/2
 375  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
 376  * pio2_3:   third  33 bit of pi/2
 377  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
 378  */
 379 
 380 static const double
 381 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
 382 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
 383 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
 384 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
 385 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
 386 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
 387 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
 388 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
 389 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
 390 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 391 
 392 static int __ieee754_rem_pio2(double x, double *y) {
 393   double z,w,t,r,fn;
 394   double tx[3];
 395   int e0,i,j,nx,n,ix,hx,i0;
 396 
 397   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
 398   hx = *(i0+(int*)&x);          /* high word of x */
 399   ix = hx&0x7fffffff;
 400   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
 401     {y[0] = x; y[1] = 0; return 0;}
 402   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
 403     if(hx>0) {
 404       z = x - pio2_1;
 405       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
 406         y[0] = z - pio2_1t;
 407         y[1] = (z-y[0])-pio2_1t;
 408       } else {                /* near pi/2, use 33+33+53 bit pi */
 409         z -= pio2_2;
 410         y[0] = z - pio2_2t;
 411         y[1] = (z-y[0])-pio2_2t;
 412       }


 871   double y[2],z=0.0;
 872   int n, ix;
 873 
 874   /* High word of x. */
 875   ix = __HI(x);
 876 
 877   /* |x| ~< pi/4 */
 878   ix &= 0x7fffffff;
 879   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
 880 
 881   /* tan(Inf or NaN) is NaN */
 882   else if (ix>=0x7ff00000) return x-x;            /* NaN */
 883 
 884   /* argument reduction needed */
 885   else {
 886     n = __ieee754_rem_pio2(x,y);
 887     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
 888                                                      -1 -- n odd */
 889   }
 890 JRT_END





src/share/vm/runtime/sharedRuntimeTrig.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File