9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "prims/jni.h"
27 #include "runtime/interfaceSupport.hpp"
28 #include "runtime/sharedRuntime.hpp"
29
30 // This file contains copies of the fdlibm routines used by
31 // StrictMath. It turns out that it is almost always required to use
32 // these runtime routines; the Intel CPU doesn't meet the Java
33 // specification for sin/cos outside a certain limited argument range,
34 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
35 // also turns out that avoiding the indirect call through function
36 // pointer out to libjava.so in SharedRuntime speeds these routines up
37 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
38
39 // Enabling optimizations in this file causes incorrect code to be
40 // generated; can not figure out how to turn down optimization for one
41 // file in the IDE on Windows
42 #ifdef WIN32
43 # pragma optimize ( "", off )
44 #endif
45
46 /* The above workaround now causes more problems with the latest MS compiler.
47 * Visual Studio 2010's /GS option tries to guard against buffer overruns.
48 * /GS is on by default if you specify optimizations, which we do globally
49 * via /W3 /O2. However the above selective turning off of optimizations means
50 * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
51 * then the compilation fails. There are several possible solutions
52 * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
53 * (2) Stop treating warnings as errors - would be a backward step
54 * (3) Disable /GS - may help performance but you lose the security checks
55 * (4) Disable the warning with "#pragma warning( disable : 4748 )"
56 * (5) Disable planting the code with __declspec(safebuffers)
57 * I've opted for (5) although we should investigate the local performance
58 * benefits of (1) and global performance benefit of (3).
59 */
60 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
61 #define SAFEBUF __declspec(safebuffers)
62 #else
63 #define SAFEBUF
64 #endif
65
66 #include "runtime/sharedRuntimeMath.hpp"
67
68 /*
69 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
70 * double x[],y[]; int e0,nx,prec; int ipio2[];
71 *
72 * __kernel_rem_pio2 return the last three digits of N with
73 * y = x - N*pi/2
74 * so that |y| < pi/2.
75 *
76 * The method is to compute the integer (mod 8) and fraction parts of
77 * (2/pi)*x without doing the full multiplication. In general we
78 * skip the part of the product that are known to be a huge integer (
79 * more accurately, = 0 mod 8 ). Thus the number of operations are
80 * independent of the exponent of the input.
81 *
82 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
83 *
84 * Input parameters:
85 * x[] The input value (must be positive) is broken into nx
86 * pieces of 24-bit integers in double precision format.
87 * x[i] will be the i-th 24 bit of x. The scaled exponent
184
185 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
186
187 static const double PIo2[] = {
188 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
189 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
190 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
191 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
192 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
193 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
194 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
195 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
196 };
197
198 static const double
199 zeroB = 0.0,
200 one = 1.0,
201 two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
202 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
203
204 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
205 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
206 double z,fw,f[20],fq[20],q[20];
207
208 /* initialize jk*/
209 jk = init_jk[prec];
210 jp = jk;
211
212 /* determine jx,jv,q0, note that 3>q0 */
213 jx = nx-1;
214 jv = (e0-3)/24; if(jv<0) jv=0;
215 q0 = e0-24*(jv+1);
216
217 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
218 j = jv-jx; m = jx+jk;
219 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
220
221 /* compute q[0],q[1],...q[jk] */
222 for (i=0;i<=jk;i++) {
223 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
224 }
400 * pio2_1: first 33 bit of pi/2
401 * pio2_1t: pi/2 - pio2_1
402 * pio2_2: second 33 bit of pi/2
403 * pio2_2t: pi/2 - (pio2_1+pio2_2)
404 * pio2_3: third 33 bit of pi/2
405 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
406 */
407
408 static const double
409 zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
410 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
411 two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
412 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
413 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
414 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
415 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
416 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
417 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
418 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
419
420 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
421 double z,w,t,r,fn;
422 double tx[3];
423 int e0,i,j,nx,n,ix,hx,i0;
424
425 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */
426 hx = *(i0+(int*)&x); /* high word of x */
427 ix = hx&0x7fffffff;
428 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
429 {y[0] = x; y[1] = 0; return 0;}
430 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
431 if(hx>0) {
432 z = x - pio2_1;
433 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
434 y[0] = z - pio2_1t;
435 y[1] = (z-y[0])-pio2_1t;
436 } else { /* near pi/2, use 33+33+53 bit pi */
437 z -= pio2_2;
438 y[0] = z - pio2_2t;
439 y[1] = (z-y[0])-pio2_2t;
440 }
899 double y[2],z=0.0;
900 int n, ix;
901
902 /* High word of x. */
903 ix = __HI(x);
904
905 /* |x| ~< pi/4 */
906 ix &= 0x7fffffff;
907 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
908
909 /* tan(Inf or NaN) is NaN */
910 else if (ix>=0x7ff00000) return x-x; /* NaN */
911
912 /* argument reduction needed */
913 else {
914 n = __ieee754_rem_pio2(x,y);
915 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
916 -1 -- n odd */
917 }
918 JRT_END
919
920
921 #ifdef WIN32
922 # pragma optimize ( "", on )
923 #endif
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "prims/jni.h"
27 #include "runtime/interfaceSupport.hpp"
28 #include "runtime/sharedRuntime.hpp"
29 #include "runtime/sharedRuntimeMath.hpp"
30
31 // This file contains copies of the fdlibm routines used by
32 // StrictMath. It turns out that it is almost always required to use
33 // these runtime routines; the Intel CPU doesn't meet the Java
34 // specification for sin/cos outside a certain limited argument range,
35 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
36 // also turns out that avoiding the indirect call through function
37 // pointer out to libjava.so in SharedRuntime speeds these routines up
38 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
39
40 /*
41 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
42 * double x[],y[]; int e0,nx,prec; int ipio2[];
43 *
44 * __kernel_rem_pio2 return the last three digits of N with
45 * y = x - N*pi/2
46 * so that |y| < pi/2.
47 *
48 * The method is to compute the integer (mod 8) and fraction parts of
49 * (2/pi)*x without doing the full multiplication. In general we
50 * skip the part of the product that are known to be a huge integer (
51 * more accurately, = 0 mod 8 ). Thus the number of operations are
52 * independent of the exponent of the input.
53 *
54 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
55 *
56 * Input parameters:
57 * x[] The input value (must be positive) is broken into nx
58 * pieces of 24-bit integers in double precision format.
59 * x[i] will be the i-th 24 bit of x. The scaled exponent
156
157 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
158
159 static const double PIo2[] = {
160 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
161 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
162 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
163 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
164 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
165 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
166 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
167 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
168 };
169
170 static const double
171 zeroB = 0.0,
172 one = 1.0,
173 two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
174 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
175
176 static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
177 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
178 double z,fw,f[20],fq[20],q[20];
179
180 /* initialize jk*/
181 jk = init_jk[prec];
182 jp = jk;
183
184 /* determine jx,jv,q0, note that 3>q0 */
185 jx = nx-1;
186 jv = (e0-3)/24; if(jv<0) jv=0;
187 q0 = e0-24*(jv+1);
188
189 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
190 j = jv-jx; m = jx+jk;
191 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
192
193 /* compute q[0],q[1],...q[jk] */
194 for (i=0;i<=jk;i++) {
195 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
196 }
372 * pio2_1: first 33 bit of pi/2
373 * pio2_1t: pi/2 - pio2_1
374 * pio2_2: second 33 bit of pi/2
375 * pio2_2t: pi/2 - (pio2_1+pio2_2)
376 * pio2_3: third 33 bit of pi/2
377 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
378 */
379
380 static const double
381 zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
382 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
383 two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
384 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
385 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
386 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
387 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
388 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
389 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
390 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
391
392 static int __ieee754_rem_pio2(double x, double *y) {
393 double z,w,t,r,fn;
394 double tx[3];
395 int e0,i,j,nx,n,ix,hx,i0;
396
397 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */
398 hx = *(i0+(int*)&x); /* high word of x */
399 ix = hx&0x7fffffff;
400 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
401 {y[0] = x; y[1] = 0; return 0;}
402 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
403 if(hx>0) {
404 z = x - pio2_1;
405 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
406 y[0] = z - pio2_1t;
407 y[1] = (z-y[0])-pio2_1t;
408 } else { /* near pi/2, use 33+33+53 bit pi */
409 z -= pio2_2;
410 y[0] = z - pio2_2t;
411 y[1] = (z-y[0])-pio2_2t;
412 }
871 double y[2],z=0.0;
872 int n, ix;
873
874 /* High word of x. */
875 ix = __HI(x);
876
877 /* |x| ~< pi/4 */
878 ix &= 0x7fffffff;
879 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
880
881 /* tan(Inf or NaN) is NaN */
882 else if (ix>=0x7ff00000) return x-x; /* NaN */
883
884 /* argument reduction needed */
885 else {
886 n = __ieee754_rem_pio2(x,y);
887 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
888 -1 -- n odd */
889 }
890 JRT_END
|