1 /*
2 * Copyright (c) 1994, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
1459 * <p>Special Cases:
1460 * <ul>
1461 * <li> If the argument is NaN, then the result is NaN.
1462 * <li> If the argument is positive or negative infinity, then the
1463 * result is positive infinity.
1464 * <li> If the argument is positive or negative zero, then the result is
1465 * {@code Double.MIN_VALUE}.
1466 * <li> If the argument is ±{@code Double.MAX_VALUE}, then
1467 * the result is equal to 2<sup>971</sup>.
1468 * </ul>
1469 *
1470 * @param d the floating-point value whose ulp is to be returned
1471 * @return the size of an ulp of the argument
1472 * @author Joseph D. Darcy
1473 * @since 1.5
1474 */
1475 public static double ulp(double d) {
1476 int exp = getExponent(d);
1477
1478 switch(exp) {
1479 case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity
1480 return Math.abs(d);
1481
1482 case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal
1483 return Double.MIN_VALUE;
1484
1485 default:
1486 assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT;
1487
1488 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1489 exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1);
1490 if (exp >= DoubleConsts.MIN_EXPONENT) {
1491 return powerOfTwoD(exp);
1492 }
1493 else {
1494 // return a subnormal result; left shift integer
1495 // representation of Double.MIN_VALUE appropriate
1496 // number of positions
1497 return Double.longBitsToDouble(1L <<
1498 (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) ));
1499 }
1500 }
1501 }
1502
1503 /**
1504 * Returns the size of an ulp of the argument. An ulp, unit in
1505 * the last place, of a {@code float} value is the positive
1506 * distance between this floating-point value and the {@code
1507 * float} value next larger in magnitude. Note that for non-NaN
1508 * <i>x</i>, <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
1509 *
1510 * <p>Special Cases:
1511 * <ul>
1512 * <li> If the argument is NaN, then the result is NaN.
1513 * <li> If the argument is positive or negative infinity, then the
1514 * result is positive infinity.
1515 * <li> If the argument is positive or negative zero, then the result is
1516 * {@code Float.MIN_VALUE}.
1517 * <li> If the argument is ±{@code Float.MAX_VALUE}, then
1518 * the result is equal to 2<sup>104</sup>.
1519 * </ul>
1520 *
1521 * @param f the floating-point value whose ulp is to be returned
1522 * @return the size of an ulp of the argument
1523 * @author Joseph D. Darcy
1524 * @since 1.5
1525 */
1526 public static float ulp(float f) {
1527 int exp = getExponent(f);
1528
1529 switch(exp) {
1530 case FloatConsts.MAX_EXPONENT+1: // NaN or infinity
1531 return Math.abs(f);
1532
1533 case FloatConsts.MIN_EXPONENT-1: // zero or subnormal
1534 return FloatConsts.MIN_VALUE;
1535
1536 default:
1537 assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT;
1538
1539 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1540 exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1);
1541 if (exp >= FloatConsts.MIN_EXPONENT) {
1542 return powerOfTwoF(exp);
1543 }
1544 else {
1545 // return a subnormal result; left shift integer
1546 // representation of FloatConsts.MIN_VALUE appropriate
1547 // number of positions
1548 return Float.intBitsToFloat(1 <<
1549 (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) ));
1550 }
1551 }
1552 }
1553
1554 /**
1555 * Returns the signum function of the argument; zero if the argument
1556 * is zero, 1.0 if the argument is greater than zero, -1.0 if the
1557 * argument is less than zero.
1558 *
1559 * <p>Special Cases:
1560 * <ul>
1561 * <li> If the argument is NaN, then the result is NaN.
1562 * <li> If the argument is positive zero or negative zero, then the
1563 * result is the same as the argument.
1564 * </ul>
1565 *
1566 * @param d the floating-point value whose signum is to be returned
1567 * @return the signum function of the argument
1568 * @author Joseph D. Darcy
1569 * @since 1.5
2259 * set.
2260 *
2261 * It is _not_ a valid implementation to first multiply d by
2262 * 2^MIN_EXPONENT and then by 2 ^ (scaleFactor %
2263 * MIN_EXPONENT) since even in a strictfp program double
2264 * rounding on underflow could occur; e.g. if the scaleFactor
2265 * argument was (MIN_EXPONENT - n) and the exponent of d was a
2266 * little less than -(MIN_EXPONENT - n), meaning the final
2267 * result would be subnormal.
2268 *
2269 * Since exact reproducibility of this method can be achieved
2270 * without any undue performance burden, there is no
2271 * compelling reason to allow double rounding on underflow in
2272 * scalb.
2273 */
2274
2275 // magnitude of a power of two so large that scaling a finite
2276 // nonzero value by it would be guaranteed to over or
2277 // underflow; due to rounding, scaling down takes an
2278 // additional power of two which is reflected here
2279 final int MAX_SCALE = DoubleConsts.MAX_EXPONENT + -DoubleConsts.MIN_EXPONENT +
2280 DoubleConsts.SIGNIFICAND_WIDTH + 1;
2281 int exp_adjust = 0;
2282 int scale_increment = 0;
2283 double exp_delta = Double.NaN;
2284
2285 // Make sure scaling factor is in a reasonable range
2286
2287 if(scaleFactor < 0) {
2288 scaleFactor = Math.max(scaleFactor, -MAX_SCALE);
2289 scale_increment = -512;
2290 exp_delta = twoToTheDoubleScaleDown;
2291 }
2292 else {
2293 scaleFactor = Math.min(scaleFactor, MAX_SCALE);
2294 scale_increment = 512;
2295 exp_delta = twoToTheDoubleScaleUp;
2296 }
2297
2298 // Calculate (scaleFactor % +/-512), 512 = 2^9, using
2299 // technique from "Hacker's Delight" section 10-2.
2328 *
2329 * <p>Special cases:
2330 * <ul>
2331 * <li> If the first argument is NaN, NaN is returned.
2332 * <li> If the first argument is infinite, then an infinity of the
2333 * same sign is returned.
2334 * <li> If the first argument is zero, then a zero of the same
2335 * sign is returned.
2336 * </ul>
2337 *
2338 * @param f number to be scaled by a power of two.
2339 * @param scaleFactor power of 2 used to scale {@code f}
2340 * @return {@code f} × 2<sup>{@code scaleFactor}</sup>
2341 * @since 1.6
2342 */
2343 public static float scalb(float f, int scaleFactor) {
2344 // magnitude of a power of two so large that scaling a finite
2345 // nonzero value by it would be guaranteed to over or
2346 // underflow; due to rounding, scaling down takes an
2347 // additional power of two which is reflected here
2348 final int MAX_SCALE = FloatConsts.MAX_EXPONENT + -FloatConsts.MIN_EXPONENT +
2349 FloatConsts.SIGNIFICAND_WIDTH + 1;
2350
2351 // Make sure scaling factor is in a reasonable range
2352 scaleFactor = Math.max(Math.min(scaleFactor, MAX_SCALE), -MAX_SCALE);
2353
2354 /*
2355 * Since + MAX_SCALE for float fits well within the double
2356 * exponent range and + float -> double conversion is exact
2357 * the multiplication below will be exact. Therefore, the
2358 * rounding that occurs when the double product is cast to
2359 * float will be the correctly rounded float result. Since
2360 * all operations other than the final multiply will be exact,
2361 * it is not necessary to declare this method strictfp.
2362 */
2363 return (float)((double)f*powerOfTwoD(scaleFactor));
2364 }
2365
2366 // Constants used in scalb
2367 static double twoToTheDoubleScaleUp = powerOfTwoD(512);
2368 static double twoToTheDoubleScaleDown = powerOfTwoD(-512);
2369
2370 /**
2371 * Returns a floating-point power of two in the normal range.
2372 */
2373 static double powerOfTwoD(int n) {
2374 assert(n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT);
2375 return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) <<
2376 (DoubleConsts.SIGNIFICAND_WIDTH-1))
2377 & DoubleConsts.EXP_BIT_MASK);
2378 }
2379
2380 /**
2381 * Returns a floating-point power of two in the normal range.
2382 */
2383 static float powerOfTwoF(int n) {
2384 assert(n >= FloatConsts.MIN_EXPONENT && n <= FloatConsts.MAX_EXPONENT);
2385 return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) <<
2386 (FloatConsts.SIGNIFICAND_WIDTH-1))
2387 & FloatConsts.EXP_BIT_MASK);
2388 }
2389 }
|
1 /*
2 * Copyright (c) 1994, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
1459 * <p>Special Cases:
1460 * <ul>
1461 * <li> If the argument is NaN, then the result is NaN.
1462 * <li> If the argument is positive or negative infinity, then the
1463 * result is positive infinity.
1464 * <li> If the argument is positive or negative zero, then the result is
1465 * {@code Double.MIN_VALUE}.
1466 * <li> If the argument is ±{@code Double.MAX_VALUE}, then
1467 * the result is equal to 2<sup>971</sup>.
1468 * </ul>
1469 *
1470 * @param d the floating-point value whose ulp is to be returned
1471 * @return the size of an ulp of the argument
1472 * @author Joseph D. Darcy
1473 * @since 1.5
1474 */
1475 public static double ulp(double d) {
1476 int exp = getExponent(d);
1477
1478 switch(exp) {
1479 case Double.MAX_EXPONENT + 1: // NaN or infinity
1480 return Math.abs(d);
1481
1482 case Double.MIN_EXPONENT - 1: // zero or subnormal
1483 return Double.MIN_VALUE;
1484
1485 default:
1486 assert exp <= Double.MAX_EXPONENT && exp >= Double.MIN_EXPONENT;
1487
1488 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1489 exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1);
1490 if (exp >= Double.MIN_EXPONENT) {
1491 return powerOfTwoD(exp);
1492 }
1493 else {
1494 // return a subnormal result; left shift integer
1495 // representation of Double.MIN_VALUE appropriate
1496 // number of positions
1497 return Double.longBitsToDouble(1L <<
1498 (exp - (Double.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) ));
1499 }
1500 }
1501 }
1502
1503 /**
1504 * Returns the size of an ulp of the argument. An ulp, unit in
1505 * the last place, of a {@code float} value is the positive
1506 * distance between this floating-point value and the {@code
1507 * float} value next larger in magnitude. Note that for non-NaN
1508 * <i>x</i>, <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
1509 *
1510 * <p>Special Cases:
1511 * <ul>
1512 * <li> If the argument is NaN, then the result is NaN.
1513 * <li> If the argument is positive or negative infinity, then the
1514 * result is positive infinity.
1515 * <li> If the argument is positive or negative zero, then the result is
1516 * {@code Float.MIN_VALUE}.
1517 * <li> If the argument is ±{@code Float.MAX_VALUE}, then
1518 * the result is equal to 2<sup>104</sup>.
1519 * </ul>
1520 *
1521 * @param f the floating-point value whose ulp is to be returned
1522 * @return the size of an ulp of the argument
1523 * @author Joseph D. Darcy
1524 * @since 1.5
1525 */
1526 public static float ulp(float f) {
1527 int exp = getExponent(f);
1528
1529 switch(exp) {
1530 case Float.MAX_EXPONENT+1: // NaN or infinity
1531 return Math.abs(f);
1532
1533 case Float.MIN_EXPONENT-1: // zero or subnormal
1534 return Float.MIN_VALUE;
1535
1536 default:
1537 assert exp <= Float.MAX_EXPONENT && exp >= Float.MIN_EXPONENT;
1538
1539 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1540 exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1);
1541 if (exp >= Float.MIN_EXPONENT) {
1542 return powerOfTwoF(exp);
1543 } else {
1544 // return a subnormal result; left shift integer
1545 // representation of FloatConsts.MIN_VALUE appropriate
1546 // number of positions
1547 return Float.intBitsToFloat(1 <<
1548 (exp - (Float.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) ));
1549 }
1550 }
1551 }
1552
1553 /**
1554 * Returns the signum function of the argument; zero if the argument
1555 * is zero, 1.0 if the argument is greater than zero, -1.0 if the
1556 * argument is less than zero.
1557 *
1558 * <p>Special Cases:
1559 * <ul>
1560 * <li> If the argument is NaN, then the result is NaN.
1561 * <li> If the argument is positive zero or negative zero, then the
1562 * result is the same as the argument.
1563 * </ul>
1564 *
1565 * @param d the floating-point value whose signum is to be returned
1566 * @return the signum function of the argument
1567 * @author Joseph D. Darcy
1568 * @since 1.5
2258 * set.
2259 *
2260 * It is _not_ a valid implementation to first multiply d by
2261 * 2^MIN_EXPONENT and then by 2 ^ (scaleFactor %
2262 * MIN_EXPONENT) since even in a strictfp program double
2263 * rounding on underflow could occur; e.g. if the scaleFactor
2264 * argument was (MIN_EXPONENT - n) and the exponent of d was a
2265 * little less than -(MIN_EXPONENT - n), meaning the final
2266 * result would be subnormal.
2267 *
2268 * Since exact reproducibility of this method can be achieved
2269 * without any undue performance burden, there is no
2270 * compelling reason to allow double rounding on underflow in
2271 * scalb.
2272 */
2273
2274 // magnitude of a power of two so large that scaling a finite
2275 // nonzero value by it would be guaranteed to over or
2276 // underflow; due to rounding, scaling down takes an
2277 // additional power of two which is reflected here
2278 final int MAX_SCALE = Double.MAX_EXPONENT + -Double.MIN_EXPONENT +
2279 DoubleConsts.SIGNIFICAND_WIDTH + 1;
2280 int exp_adjust = 0;
2281 int scale_increment = 0;
2282 double exp_delta = Double.NaN;
2283
2284 // Make sure scaling factor is in a reasonable range
2285
2286 if(scaleFactor < 0) {
2287 scaleFactor = Math.max(scaleFactor, -MAX_SCALE);
2288 scale_increment = -512;
2289 exp_delta = twoToTheDoubleScaleDown;
2290 }
2291 else {
2292 scaleFactor = Math.min(scaleFactor, MAX_SCALE);
2293 scale_increment = 512;
2294 exp_delta = twoToTheDoubleScaleUp;
2295 }
2296
2297 // Calculate (scaleFactor % +/-512), 512 = 2^9, using
2298 // technique from "Hacker's Delight" section 10-2.
2327 *
2328 * <p>Special cases:
2329 * <ul>
2330 * <li> If the first argument is NaN, NaN is returned.
2331 * <li> If the first argument is infinite, then an infinity of the
2332 * same sign is returned.
2333 * <li> If the first argument is zero, then a zero of the same
2334 * sign is returned.
2335 * </ul>
2336 *
2337 * @param f number to be scaled by a power of two.
2338 * @param scaleFactor power of 2 used to scale {@code f}
2339 * @return {@code f} × 2<sup>{@code scaleFactor}</sup>
2340 * @since 1.6
2341 */
2342 public static float scalb(float f, int scaleFactor) {
2343 // magnitude of a power of two so large that scaling a finite
2344 // nonzero value by it would be guaranteed to over or
2345 // underflow; due to rounding, scaling down takes an
2346 // additional power of two which is reflected here
2347 final int MAX_SCALE = Float.MAX_EXPONENT + -Float.MIN_EXPONENT +
2348 FloatConsts.SIGNIFICAND_WIDTH + 1;
2349
2350 // Make sure scaling factor is in a reasonable range
2351 scaleFactor = Math.max(Math.min(scaleFactor, MAX_SCALE), -MAX_SCALE);
2352
2353 /*
2354 * Since + MAX_SCALE for float fits well within the double
2355 * exponent range and + float -> double conversion is exact
2356 * the multiplication below will be exact. Therefore, the
2357 * rounding that occurs when the double product is cast to
2358 * float will be the correctly rounded float result. Since
2359 * all operations other than the final multiply will be exact,
2360 * it is not necessary to declare this method strictfp.
2361 */
2362 return (float)((double)f*powerOfTwoD(scaleFactor));
2363 }
2364
2365 // Constants used in scalb
2366 static double twoToTheDoubleScaleUp = powerOfTwoD(512);
2367 static double twoToTheDoubleScaleDown = powerOfTwoD(-512);
2368
2369 /**
2370 * Returns a floating-point power of two in the normal range.
2371 */
2372 static double powerOfTwoD(int n) {
2373 assert(n >= Double.MIN_EXPONENT && n <= Double.MAX_EXPONENT);
2374 return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) <<
2375 (DoubleConsts.SIGNIFICAND_WIDTH-1))
2376 & DoubleConsts.EXP_BIT_MASK);
2377 }
2378
2379 /**
2380 * Returns a floating-point power of two in the normal range.
2381 */
2382 static float powerOfTwoF(int n) {
2383 assert(n >= Float.MIN_EXPONENT && n <= Float.MAX_EXPONENT);
2384 return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) <<
2385 (FloatConsts.SIGNIFICAND_WIDTH-1))
2386 & FloatConsts.EXP_BIT_MASK);
2387 }
2388 }
|