< prev index next >

src/java.base/share/classes/java/lang/Math.java

Special Cases: 1460 *

1461 *
• If the argument is NaN, then the result is NaN. 1462 *
• If the argument is positive or negative infinity, then the 1463 * result is positive infinity. 1464 *
• If the argument is positive or negative zero, then the result is 1465 * {@code Double.MIN_VALUE}. 1466 *
• If the argument is ±{@code Double.MAX_VALUE}, then 1467 * the result is equal to 2971. 1468 *
1469 * 1470 * @param d the floating-point value whose ulp is to be returned 1471 * @return the size of an ulp of the argument 1472 * @author Joseph D. Darcy 1473 * @since 1.5 1474 */ 1475 public static double ulp(double d) { 1476 int exp = getExponent(d); 1477 1478 switch(exp) { 1479 case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity 1480 return Math.abs(d); 1481 1482 case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal 1483 return Double.MIN_VALUE; 1484 1485 default: 1486 assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT; 1487 1488 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) 1489 exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1); 1490 if (exp >= DoubleConsts.MIN_EXPONENT) { 1491 return powerOfTwoD(exp); 1492 } 1493 else { 1494 // return a subnormal result; left shift integer 1495 // representation of Double.MIN_VALUE appropriate 1496 // number of positions 1497 return Double.longBitsToDouble(1L << 1498 (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) )); 1499 } 1500 } 1501 } 1502 1503 /** 1504 * Returns the size of an ulp of the argument. An ulp, unit in 1505 * the last place, of a {@code float} value is the positive 1506 * distance between this floating-point value and the {@code 1507 * float} value next larger in magnitude. Note that for non-NaN 1508 * x, ulp(-x) == ulp(x). 1509 * 1510 *

Special Cases: 1511 *

1512 *
• If the argument is NaN, then the result is NaN. 1513 *
• If the argument is positive or negative infinity, then the 1514 * result is positive infinity. 1515 *
• If the argument is positive or negative zero, then the result is 1516 * {@code Float.MIN_VALUE}. 1517 *
• If the argument is ±{@code Float.MAX_VALUE}, then 1518 * the result is equal to 2104. 1519 *
1520 * 1521 * @param f the floating-point value whose ulp is to be returned 1522 * @return the size of an ulp of the argument 1523 * @author Joseph D. Darcy 1524 * @since 1.5 1525 */ 1526 public static float ulp(float f) { 1527 int exp = getExponent(f); 1528 1529 switch(exp) { 1530 case FloatConsts.MAX_EXPONENT+1: // NaN or infinity 1531 return Math.abs(f); 1532 1533 case FloatConsts.MIN_EXPONENT-1: // zero or subnormal 1534 return FloatConsts.MIN_VALUE; 1535 1536 default: 1537 assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT; 1538 1539 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) 1540 exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1); 1541 if (exp >= FloatConsts.MIN_EXPONENT) { 1542 return powerOfTwoF(exp); 1543 } 1544 else { 1545 // return a subnormal result; left shift integer 1546 // representation of FloatConsts.MIN_VALUE appropriate 1547 // number of positions 1548 return Float.intBitsToFloat(1 << 1549 (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) )); 1550 } 1551 } 1552 } 1553 1554 /** 1555 * Returns the signum function of the argument; zero if the argument 1556 * is zero, 1.0 if the argument is greater than zero, -1.0 if the 1557 * argument is less than zero. 1558 * 1559 *

Special Cases: 1560 *

1561 *
• If the argument is NaN, then the result is NaN. 1562 *
• If the argument is positive zero or negative zero, then the 1563 * result is the same as the argument. 1564 *
1565 * 1566 * @param d the floating-point value whose signum is to be returned 1567 * @return the signum function of the argument 1568 * @author Joseph D. Darcy 1569 * @since 1.5 2259 * set. 2260 * 2261 * It is _not_ a valid implementation to first multiply d by 2262 * 2^MIN_EXPONENT and then by 2 ^ (scaleFactor % 2263 * MIN_EXPONENT) since even in a strictfp program double 2264 * rounding on underflow could occur; e.g. if the scaleFactor 2265 * argument was (MIN_EXPONENT - n) and the exponent of d was a 2266 * little less than -(MIN_EXPONENT - n), meaning the final 2267 * result would be subnormal. 2268 * 2269 * Since exact reproducibility of this method can be achieved 2270 * without any undue performance burden, there is no 2271 * compelling reason to allow double rounding on underflow in 2272 * scalb. 2273 */ 2274 2275 // magnitude of a power of two so large that scaling a finite 2276 // nonzero value by it would be guaranteed to over or 2277 // underflow; due to rounding, scaling down takes an 2278 // additional power of two which is reflected here 2279 final int MAX_SCALE = DoubleConsts.MAX_EXPONENT + -DoubleConsts.MIN_EXPONENT + 2280 DoubleConsts.SIGNIFICAND_WIDTH + 1; 2281 int exp_adjust = 0; 2282 int scale_increment = 0; 2283 double exp_delta = Double.NaN; 2284 2285 // Make sure scaling factor is in a reasonable range 2286 2287 if(scaleFactor < 0) { 2288 scaleFactor = Math.max(scaleFactor, -MAX_SCALE); 2289 scale_increment = -512; 2290 exp_delta = twoToTheDoubleScaleDown; 2291 } 2292 else { 2293 scaleFactor = Math.min(scaleFactor, MAX_SCALE); 2294 scale_increment = 512; 2295 exp_delta = twoToTheDoubleScaleUp; 2296 } 2297 2298 // Calculate (scaleFactor % +/-512), 512 = 2^9, using 2299 // technique from "Hacker's Delight" section 10-2. 2328 * 2329 *

Special cases: 2330 *

2331 *
• If the first argument is NaN, NaN is returned. 2332 *
• If the first argument is infinite, then an infinity of the 2333 * same sign is returned. 2334 *
• If the first argument is zero, then a zero of the same 2335 * sign is returned. 2336 *

Special Cases: 1460 *

1461 *
• If the argument is NaN, then the result is NaN. 1462 *
• If the argument is positive or negative infinity, then the 1463 * result is positive infinity. 1464 *
• If the argument is positive or negative zero, then the result is 1465 * {@code Double.MIN_VALUE}. 1466 *
• If the argument is ±{@code Double.MAX_VALUE}, then 1467 * the result is equal to 2971. 1468 *
1469 * 1470 * @param d the floating-point value whose ulp is to be returned 1471 * @return the size of an ulp of the argument 1472 * @author Joseph D. Darcy 1473 * @since 1.5 1474 */ 1475 public static double ulp(double d) { 1476 int exp = getExponent(d); 1477 1478 switch(exp) { 1479 case Double.MAX_EXPONENT + 1: // NaN or infinity 1480 return Math.abs(d); 1481 1482 case Double.MIN_EXPONENT - 1: // zero or subnormal 1483 return Double.MIN_VALUE; 1484 1485 default: 1486 assert exp <= Double.MAX_EXPONENT && exp >= Double.MIN_EXPONENT; 1487 1488 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) 1489 exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1); 1490 if (exp >= Double.MIN_EXPONENT) { 1491 return powerOfTwoD(exp); 1492 } 1493 else { 1494 // return a subnormal result; left shift integer 1495 // representation of Double.MIN_VALUE appropriate 1496 // number of positions 1497 return Double.longBitsToDouble(1L << 1498 (exp - (Double.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) )); 1499 } 1500 } 1501 } 1502 1503 /** 1504 * Returns the size of an ulp of the argument. An ulp, unit in 1505 * the last place, of a {@code float} value is the positive 1506 * distance between this floating-point value and the {@code 1507 * float} value next larger in magnitude. Note that for non-NaN 1508 * x, ulp(-x) == ulp(x). 1509 * 1510 *

Special Cases: 1511 *

1512 *
• If the argument is NaN, then the result is NaN. 1513 *
• If the argument is positive or negative infinity, then the 1514 * result is positive infinity. 1515 *
• If the argument is positive or negative zero, then the result is 1516 * {@code Float.MIN_VALUE}. 1517 *
• If the argument is ±{@code Float.MAX_VALUE}, then 1518 * the result is equal to 2104. 1519 *
1520 * 1521 * @param f the floating-point value whose ulp is to be returned 1522 * @return the size of an ulp of the argument 1523 * @author Joseph D. Darcy 1524 * @since 1.5 1525 */ 1526 public static float ulp(float f) { 1527 int exp = getExponent(f); 1528 1529 switch(exp) { 1530 case Float.MAX_EXPONENT+1: // NaN or infinity 1531 return Math.abs(f); 1532 1533 case Float.MIN_EXPONENT-1: // zero or subnormal 1534 return Float.MIN_VALUE; 1535 1536 default: 1537 assert exp <= Float.MAX_EXPONENT && exp >= Float.MIN_EXPONENT; 1538 1539 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) 1540 exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1); 1541 if (exp >= Float.MIN_EXPONENT) { 1542 return powerOfTwoF(exp); 1543 } else { 1544 // return a subnormal result; left shift integer 1545 // representation of FloatConsts.MIN_VALUE appropriate 1546 // number of positions 1547 return Float.intBitsToFloat(1 << 1548 (exp - (Float.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) )); 1549 } 1550 } 1551 } 1552 1553 /** 1554 * Returns the signum function of the argument; zero if the argument 1555 * is zero, 1.0 if the argument is greater than zero, -1.0 if the 1556 * argument is less than zero. 1557 * 1558 *

Special Cases: 1559 *

1560 *
• If the argument is NaN, then the result is NaN. 1561 *
• If the argument is positive zero or negative zero, then the 1562 * result is the same as the argument. 1563 *
1564 * 1565 * @param d the floating-point value whose signum is to be returned 1566 * @return the signum function of the argument 1567 * @author Joseph D. Darcy 1568 * @since 1.5 2258 * set. 2259 * 2260 * It is _not_ a valid implementation to first multiply d by 2261 * 2^MIN_EXPONENT and then by 2 ^ (scaleFactor % 2262 * MIN_EXPONENT) since even in a strictfp program double 2263 * rounding on underflow could occur; e.g. if the scaleFactor 2264 * argument was (MIN_EXPONENT - n) and the exponent of d was a 2265 * little less than -(MIN_EXPONENT - n), meaning the final 2266 * result would be subnormal. 2267 * 2268 * Since exact reproducibility of this method can be achieved 2269 * without any undue performance burden, there is no 2270 * compelling reason to allow double rounding on underflow in 2271 * scalb. 2272 */ 2273 2274 // magnitude of a power of two so large that scaling a finite 2275 // nonzero value by it would be guaranteed to over or 2276 // underflow; due to rounding, scaling down takes an 2277 // additional power of two which is reflected here 2278 final int MAX_SCALE = Double.MAX_EXPONENT + -Double.MIN_EXPONENT + 2279 DoubleConsts.SIGNIFICAND_WIDTH + 1; 2280 int exp_adjust = 0; 2281 int scale_increment = 0; 2282 double exp_delta = Double.NaN; 2283 2284 // Make sure scaling factor is in a reasonable range 2285 2286 if(scaleFactor < 0) { 2287 scaleFactor = Math.max(scaleFactor, -MAX_SCALE); 2288 scale_increment = -512; 2289 exp_delta = twoToTheDoubleScaleDown; 2290 } 2291 else { 2292 scaleFactor = Math.min(scaleFactor, MAX_SCALE); 2293 scale_increment = 512; 2294 exp_delta = twoToTheDoubleScaleUp; 2295 } 2296 2297 // Calculate (scaleFactor % +/-512), 512 = 2^9, using 2298 // technique from "Hacker's Delight" section 10-2. 2327 * 2328 *

Special cases: 2329 *

2330 *
• If the first argument is NaN, NaN is returned. 2331 *
• If the first argument is infinite, then an infinity of the 2332 * same sign is returned. 2333 *
• If the first argument is zero, then a zero of the same 2334 * sign is returned. 2335 *
2336 * 2337 * @param f number to be scaled by a power of two. 2338 * @param scaleFactor power of 2 used to scale {@code f} 2339 * @return {@code f} × 2{@code scaleFactor} 2340 * @since 1.6 2341 */ 2342 public static float scalb(float f, int scaleFactor) { 2343 // magnitude of a power of two so large that scaling a finite 2344 // nonzero value by it would be guaranteed to over or 2345 // underflow; due to rounding, scaling down takes an 2346 // additional power of two which is reflected here 2347 final int MAX_SCALE = Float.MAX_EXPONENT + -Float.MIN_EXPONENT + 2348 FloatConsts.SIGNIFICAND_WIDTH + 1; 2349 2350 // Make sure scaling factor is in a reasonable range 2351 scaleFactor = Math.max(Math.min(scaleFactor, MAX_SCALE), -MAX_SCALE); 2352 2353 /* 2354 * Since + MAX_SCALE for float fits well within the double 2355 * exponent range and + float -> double conversion is exact 2356 * the multiplication below will be exact. Therefore, the 2357 * rounding that occurs when the double product is cast to 2358 * float will be the correctly rounded float result. Since 2359 * all operations other than the final multiply will be exact, 2360 * it is not necessary to declare this method strictfp. 2361 */ 2362 return (float)((double)f*powerOfTwoD(scaleFactor)); 2363 } 2364 2365 // Constants used in scalb 2366 static double twoToTheDoubleScaleUp = powerOfTwoD(512); 2367 static double twoToTheDoubleScaleDown = powerOfTwoD(-512); 2368 2369 /** 2370 * Returns a floating-point power of two in the normal range. 2371 */ 2372 static double powerOfTwoD(int n) { 2373 assert(n >= Double.MIN_EXPONENT && n <= Double.MAX_EXPONENT); 2374 return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) << 2375 (DoubleConsts.SIGNIFICAND_WIDTH-1)) 2376 & DoubleConsts.EXP_BIT_MASK); 2377 } 2378 2379 /** 2380 * Returns a floating-point power of two in the normal range. 2381 */ 2382 static float powerOfTwoF(int n) { 2383 assert(n >= Float.MIN_EXPONENT && n <= Float.MAX_EXPONENT); 2384 return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) << 2385 (FloatConsts.SIGNIFICAND_WIDTH-1)) 2386 & FloatConsts.EXP_BIT_MASK); 2387 } 2388 }
< prev index next >